jit.c 32.6 KiB raw
1
/*
2
 * JIT compiler: RV64I basic-block -> x86-64 native code.
3
 *
4
 * Strategy:
5
 *   - Guest registers live in a u64[32] array pointed to by the first arg
6
 *     (rdi on entry).  Memory base is in rsi.  The third arg (rdx) is a
7
 *     pointer where we write the next guest PC on exit.
8
 *   - During block execution we pin:
9
 *       r12 = &regs[0]   (guest register file)
10
 *       r13 = memory base
11
 *       r14 = pc_out pointer
12
 *   - Guest registers are loaded/stored on demand from/to the array.
13
 *   - At block boundaries we store the next guest PC into *pc_out and
14
 *     return an exit-reason code in eax.
15
 *
16
 *  We emit raw x86-64 bytes into the code cache.
17
 */
18
19
#include <string.h>
20
#include <sys/mman.h>
21
22
#include "jit.h"
23
#include "riscv.h"
24
#include "types.h"
25
26
/* ---------- x86-64 code emitter helpers --------------------------------- */
27
28
/* A small code buffer that we fill up, then copy into the cache. */
29
struct emitter {
30
    u8  *buf;      /* Start of buffer. */
31
    u32  pos;      /* Current write position. */
32
    u32  capacity; /* Maximum bytes. */
33
    bool overflow; /* Set if we ran out of space. */
34
};
35
36
static inline void emit_u8(struct emitter *e, u8 b) {
37
    if (e->pos < e->capacity)
38
        e->buf[e->pos++] = b;
39
    else
40
        e->overflow = true;
41
}
42
43
static inline void emit_u32(struct emitter *e, u32 v) {
44
    emit_u8(e, (u8)(v));
45
    emit_u8(e, (u8)(v >> 8));
46
    emit_u8(e, (u8)(v >> 16));
47
    emit_u8(e, (u8)(v >> 24));
48
}
49
50
static inline void emit_u64(struct emitter *e, u64 v) {
51
    emit_u32(e, (u32)v);
52
    emit_u32(e, (u32)(v >> 32));
53
}
54
55
/* x86-64 register encoding (for ModR/M, SIB, REX). */
56
enum x86reg {
57
    X_RAX = 0,
58
    X_RCX = 1,
59
    X_RDX = 2,
60
    X_RBX = 3,
61
    X_RSP = 4,
62
    X_RBP = 5,
63
    X_RSI = 6,
64
    X_RDI = 7,
65
    X_R8  = 8,
66
    X_R9  = 9,
67
    X_R10 = 10,
68
    X_R11 = 11,
69
    X_R12 = 12,
70
    X_R13 = 13,
71
    X_R14 = 14,
72
    X_R15 = 15,
73
};
74
75
/* REX prefix byte. W=64-bit, R=reg extension, X=SIB index, B=rm/base. */
76
static inline u8 rex(bool w, bool r, bool x, bool b) {
77
    return (u8)(0x40 | (w ? 8 : 0) | (r ? 4 : 0) | (x ? 2 : 0) | (b ? 1 : 0));
78
}
79
80
/* ModR/M byte. */
81
static inline u8 modrm(u8 mod, u8 reg, u8 rm) {
82
    return (u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7));
83
}
84
85
/* SIB byte. */
86
static inline u8 sib(u8 scale, u8 index, u8 base) {
87
    return (u8)((scale << 6) | ((index & 7) << 3) | (base & 7));
88
}
89
90
/* Pinned host registers. */
91
#define HREGS  X_R12 /* &regs[0]  */
92
#define HMEM   X_R13 /* memory    */
93
#define HPCOUT X_R14 /* pc_out    */
94
95
/* Scratch registers for codegen (caller-saved, not pinned). */
96
#define HTMP1 X_RAX
97
#define HTMP2 X_RCX
98
99
/* ---------- Common encoding helpers ------------------------------------ */
100
101
/* Emit REX.W prefix (64-bit operand size, no extended regs). */
102
static inline void emit_rexw(struct emitter *e) {
103
    emit_u8(e, 0x48);
104
}
105
106
/* Emit a 2-register ALU op: <op> rax, rcx (64-bit).
107
 * `opcode` is the x86 opcode byte (e.g. 0x01=add, 0x29=sub, etc). */
108
static void emit_alu_rax_rcx(struct emitter *e, u8 opcode) {
109
    emit_rexw(e);
110
    emit_u8(e, opcode);
111
    emit_u8(e, modrm(3, X_RCX, X_RAX));
112
}
113
114
/* Emit a shift: <shift> rax, cl (64-bit).
115
 * `ext` is the ModR/M extension (4=SHL, 5=SHR, 7=SAR). */
116
static void emit_shift_rax_cl(struct emitter *e, u8 ext) {
117
    emit_rexw(e);
118
    emit_u8(e, 0xD3);
119
    emit_u8(e, modrm(3, ext, X_RAX));
120
}
121
122
/* ---------- Load/store guest register from register file --------------- */
123
124
/* Emit mov with [r12 + disp] addressing (r12 needs SIB). */
125
static void emit_r12_disp(
126
    struct emitter *e, u8 opcode, enum x86reg reg, u32 off
127
) {
128
    emit_u8(e, rex(true, reg >= 8, false, true)); /* B=1 for r12 */
129
    emit_u8(e, opcode);
130
    if (off < 128) {
131
        emit_u8(e, modrm(1, reg, X_R12 & 7));
132
        emit_u8(e, sib(0, 4, X_R12 & 7));
133
        emit_u8(e, (u8)off);
134
    } else {
135
        emit_u8(e, modrm(2, reg, X_R12 & 7));
136
        emit_u8(e, sib(0, 4, X_R12 & 7));
137
        emit_u32(e, off);
138
    }
139
}
140
141
/* Load guest register `guest` into host `dst` from [r12 + guest*8]. */
142
static void emit_load_guest(struct emitter *e, enum x86reg dst, u32 guest) {
143
    if (guest == 0) {
144
        /* x0 is always zero -- xor dst, dst. */
145
        emit_u8(e, rex(true, dst >= 8, false, dst >= 8));
146
        emit_u8(e, 0x31);
147
        emit_u8(e, modrm(3, dst, dst));
148
        return;
149
    }
150
    emit_r12_disp(e, 0x8B, dst, guest * 8);
151
}
152
153
/* Store host `src` to guest register [r12 + guest*8]. */
154
static void emit_store_guest(struct emitter *e, u32 guest, enum x86reg src) {
155
    if (guest == 0)
156
        return;
157
    emit_r12_disp(e, 0x89, src, guest * 8);
158
}
159
160
/* mov reg, imm64 (REX.W + B8+rd) */
161
static void emit_mov_imm64(struct emitter *e, enum x86reg dst, u64 imm) {
162
    emit_u8(e, rex(true, false, false, dst >= 8));
163
    emit_u8(e, (u8)(0xB8 + (dst & 7)));
164
    emit_u64(e, imm);
165
}
166
167
/* mov reg, imm32 (sign-extended to 64 via mov r/m64, imm32) */
168
static void emit_mov_imm32_sx(struct emitter *e, enum x86reg dst, i32 imm) {
169
    emit_u8(e, rex(true, false, false, dst >= 8));
170
    emit_u8(e, 0xC7);
171
    emit_u8(e, modrm(3, 0, dst));
172
    emit_u32(e, (u32)imm);
173
}
174
175
/* Careful prologue using known encodings. */
176
static void emit_prologue(struct emitter *e) {
177
    emit_u8(e, 0x55); /* push rbp */
178
    emit_u8(e, 0x41);
179
    emit_u8(e, 0x54); /* push r12 */
180
    emit_u8(e, 0x41);
181
    emit_u8(e, 0x55); /* push r13 */
182
    emit_u8(e, 0x41);
183
    emit_u8(e, 0x56); /* push r14 */
184
    emit_u8(e, 0x49);
185
    emit_u8(e, 0x89);
186
    emit_u8(e, 0xFC); /* mov r12, rdi */
187
    emit_u8(e, 0x49);
188
    emit_u8(e, 0x89);
189
    emit_u8(e, 0xF5); /* mov r13, rsi */
190
    emit_u8(e, 0x49);
191
    emit_u8(e, 0x89);
192
    emit_u8(e, 0xD6); /* mov r14, rdx */
193
}
194
195
/* Emit function epilogue + ret with exit reason. */
196
static void emit_epilogue_with_exit(struct emitter *e, u32 exit_reason) {
197
    emit_u8(e, 0xB8);
198
    emit_u32(e, exit_reason); /* mov eax, reason */
199
    emit_u8(e, 0x41);
200
    emit_u8(e, 0x5E); /* pop r14 */
201
    emit_u8(e, 0x41);
202
    emit_u8(e, 0x5D); /* pop r13 */
203
    emit_u8(e, 0x41);
204
    emit_u8(e, 0x5C); /* pop r12 */
205
    emit_u8(e, 0x5D); /* pop rbp */
206
    emit_u8(e, 0xC3); /* ret */
207
}
208
209
/* Write next-PC to *r14, then epilogue+ret. */
210
static void emit_block_exit(
211
    struct emitter *e, enum jit_exit reason, u32 next_pc
212
) {
213
    /* mov dword [r14], next_pc */
214
    emit_u8(e, 0x41);
215
    emit_u8(e, 0xC7);
216
    emit_u8(e, modrm(0, 0, X_R14 & 7));
217
    emit_u32(e, next_pc);
218
    emit_epilogue_with_exit(e, (u32)reason);
219
}
220
221
/* ---------- Jump patching ---------------------------------------------- */
222
223
/* Emit jnz rel32 (0F 85), return position for patching. */
224
static u32 emit_jnz_placeholder(struct emitter *e) {
225
    u32 pos = e->pos;
226
    emit_u8(e, 0x0F);
227
    emit_u8(e, 0x85);
228
    emit_u32(e, 0);
229
    return pos;
230
}
231
232
/* Emit jne rel32 (0F 85), return position for patching. */
233
#define emit_jne_placeholder emit_jnz_placeholder
234
235
/* Emit jmp rel32 (E9), return position for patching. */
236
static u32 emit_jmp_placeholder(struct emitter *e) {
237
    u32 pos = e->pos;
238
    emit_u8(e, 0xE9);
239
    emit_u32(e, 0);
240
    return pos;
241
}
242
243
/* Patch a jcc rel32 (6-byte: 0F xx rel32) to jump to `target`. */
244
static void patch_jcc(struct emitter *e, u32 jcc_pos, u32 target) {
245
    u32 rel             = target - (jcc_pos + 6);
246
    e->buf[jcc_pos + 2] = (u8)rel;
247
    e->buf[jcc_pos + 3] = (u8)(rel >> 8);
248
    e->buf[jcc_pos + 4] = (u8)(rel >> 16);
249
    e->buf[jcc_pos + 5] = (u8)(rel >> 24);
250
}
251
252
/* Patch a jmp rel32 (5-byte: E9 rel32) to jump to `target`. */
253
static void patch_jmp(struct emitter *e, u32 jmp_pos, u32 target) {
254
    u32 rel             = target - (jmp_pos + 5);
255
    e->buf[jmp_pos + 1] = (u8)rel;
256
    e->buf[jmp_pos + 2] = (u8)(rel >> 8);
257
    e->buf[jmp_pos + 3] = (u8)(rel >> 16);
258
    e->buf[jmp_pos + 4] = (u8)(rel >> 24);
259
}
260
261
/* ---------- Per-instruction translation helpers ------------------------ */
262
263
/* Load rs1 into rax. */
264
static void emit_load_rs1(struct emitter *e, u32 rs1) {
265
    emit_load_guest(e, HTMP1, rs1);
266
}
267
268
/* Load rs2 into rcx. */
269
static void emit_load_rs2(struct emitter *e, u32 rs2) {
270
    emit_load_guest(e, HTMP2, rs2);
271
}
272
273
/* Store rax to rd. */
274
static void emit_store_rd(struct emitter *e, u32 rd) {
275
    emit_store_guest(e, rd, HTMP1);
276
}
277
278
/* add rax, imm32 (sign-extended) */
279
static void emit_add_rax_imm32(struct emitter *e, i32 imm) {
280
    if (imm == 0)
281
        return;
282
    emit_rexw(e);
283
    if (imm >= -128 && imm <= 127) {
284
        emit_u8(e, 0x83);
285
        emit_u8(e, modrm(3, 0, X_RAX));
286
        emit_u8(e, (u8)(i8)imm);
287
    } else {
288
        emit_u8(e, 0x05); /* add rax, imm32 (short form) */
289
        emit_u32(e, (u32)imm);
290
    }
291
}
292
293
/* imul rax, rcx */
294
static void emit_imul_rax_rcx(struct emitter *e) {
295
    emit_rexw(e);
296
    emit_u8(e, 0x0F);
297
    emit_u8(e, 0xAF);
298
    emit_u8(e, modrm(3, X_RAX, X_RCX));
299
}
300
301
/* cqo (sign-extend rax into rdx:rax) */
302
static void emit_cqo(struct emitter *e) {
303
    emit_rexw(e);
304
    emit_u8(e, 0x99);
305
}
306
307
/* idiv rcx (signed divide rdx:rax by rcx, 64-bit) */
308
static void emit_idiv_rcx(struct emitter *e) {
309
    emit_rexw(e);
310
    emit_u8(e, 0xF7);
311
    emit_u8(e, modrm(3, 7, X_RCX));
312
}
313
314
/* div rcx (unsigned divide rdx:rax by rcx, 64-bit) */
315
static void emit_div_rcx(struct emitter *e) {
316
    emit_rexw(e);
317
    emit_u8(e, 0xF7);
318
    emit_u8(e, modrm(3, 6, X_RCX));
319
}
320
321
/* xor rdx, rdx (zero rdx for unsigned division) */
322
static void emit_xor_rdx_rdx(struct emitter *e) {
323
    emit_rexw(e);
324
    emit_u8(e, 0x31);
325
    emit_u8(e, modrm(3, X_RDX, X_RDX));
326
}
327
328
/* movsxd rax, eax (sign-extend 32-bit result to 64-bit) */
329
static void emit_movsxd_rax_eax(struct emitter *e) {
330
    emit_rexw(e);
331
    emit_u8(e, 0x63);
332
    emit_u8(e, modrm(3, X_RAX, X_RAX));
333
}
334
335
/* test rcx, rcx (64-bit) */
336
static void emit_test_rcx(struct emitter *e) {
337
    emit_rexw(e);
338
    emit_u8(e, 0x85);
339
    emit_u8(e, modrm(3, X_RCX, X_RCX));
340
}
341
342
/* test ecx, ecx (32-bit) */
343
static void emit_test_ecx(struct emitter *e) {
344
    emit_u8(e, 0x85);
345
    emit_u8(e, modrm(3, X_RCX, X_RCX));
346
}
347
348
/* cmp rcx, -1 (64-bit) */
349
static void emit_cmp_rcx_neg1_64(struct emitter *e) {
350
    emit_rexw(e);
351
    emit_u8(e, 0x83);
352
    emit_u8(e, modrm(3, 7, X_RCX));
353
    emit_u8(e, 0xFF);
354
}
355
356
/* cmp ecx, -1 (32-bit) */
357
static void emit_cmp_ecx_neg1_32(struct emitter *e) {
358
    emit_u8(e, 0x83);
359
    emit_u8(e, modrm(3, 7, X_RCX));
360
    emit_u8(e, 0xFF);
361
}
362
363
/* mov rax, r11 (via: REX.WR mov rax, r11) */
364
static void emit_mov_rax_r11(struct emitter *e) {
365
    emit_u8(e, 0x4C);
366
    emit_u8(e, 0x89);
367
    emit_u8(e, modrm(3, X_R11 & 7, X_RAX));
368
}
369
370
/* mov rax, rdx (64-bit) */
371
static void emit_mov_rax_rdx(struct emitter *e) {
372
    emit_rexw(e);
373
    emit_u8(e, 0x89);
374
    emit_u8(e, modrm(3, X_RDX, X_RAX));
375
}
376
377
/* cmp rax, rcx (64-bit) */
378
static void emit_cmp_rax_rcx(struct emitter *e) {
379
    emit_rexw(e);
380
    emit_u8(e, 0x39);
381
    emit_u8(e, modrm(3, X_RCX, X_RAX));
382
}
383
384
/* setCC al + movzx rax, al (64-bit).
385
 * `cc` is the setcc secondary opcode (e.g. 0x9C=setl, 0x92=setb). */
386
static void emit_setcc_rax(struct emitter *e, u8 cc) {
387
    emit_u8(e, 0x0F);
388
    emit_u8(e, cc);
389
    emit_u8(e, modrm(3, 0, X_RAX));
390
    emit_rexw(e);
391
    emit_u8(e, 0x0F);
392
    emit_u8(e, 0xB6);
393
    emit_u8(e, modrm(3, X_RAX, X_RAX));
394
}
395
396
/* and ecx, imm8 (for masking shift amounts). */
397
static void emit_and_ecx_imm8(struct emitter *e, u8 mask) {
398
    emit_u8(e, 0x83);
399
    emit_u8(e, modrm(3, 4, X_RCX));
400
    emit_u8(e, mask);
401
}
402
403
/* ---------- Division helpers ------------------------------------------- */
404
405
/*
406
 * Shared skeleton for 64-bit div/rem with RISC-V corner cases:
407
 *   - divisor == 0: emit_zero_case (custom per variant)
408
 *   - signed overflow (INT_MIN / -1): emit_overflow_case
409
 *   - otherwise: normal division
410
 *
411
 * `is_signed`: whether to check for INT_MIN/-1 overflow.
412
 * `is_rem`:    whether to move rdx->rax after division (remainder result).
413
 * `save_dividend`: whether to save rax to r11 before testing (needed for
414
 *                  rem(x,0)=x semantics).
415
 */
416
static void emit_div64(struct emitter *e, bool is_signed, bool is_rem) {
417
    /* For rem: save dividend to r11 (needed if divisor==0). */
418
    if (is_rem) {
419
        /* mov r11, rax */
420
        emit_u8(e, rex(true, false, false, true));
421
        emit_u8(e, 0x89);
422
        emit_u8(e, modrm(3, X_RAX, X_R11 & 7));
423
    }
424
425
    emit_test_rcx(e);
426
    u32 jnz = emit_jnz_placeholder(e);
427
428
    /* Divisor == 0. */
429
    if (is_rem) {
430
        emit_mov_rax_r11(e); /* result = dividend */
431
    } else {
432
        emit_mov_imm32_sx(e, HTMP1, -1); /* result = -1 (all ones) */
433
    }
434
    u32 jmp_end1 = emit_jmp_placeholder(e);
435
436
    /* .nonzero: */
437
    u32 nonzero  = e->pos;
438
    u32 jne_safe = 0, jmp_end2 = 0;
439
440
    if (is_signed) {
441
        emit_cmp_rcx_neg1_64(e);
442
        jne_safe = emit_jne_placeholder(e);
443
        /* rcx == -1: overflow case. */
444
        if (is_rem) {
445
            /* result = 0 */
446
            emit_rexw(e);
447
            emit_u8(e, 0x31);
448
            emit_u8(e, modrm(3, X_RAX, X_RAX));
449
        }
450
        /* else: result = rax (already INT_MIN, which is correct) */
451
        jmp_end2 = emit_jmp_placeholder(e);
452
    }
453
454
    /* .safe: perform the actual division. */
455
    u32 safe = e->pos;
456
    if (is_signed) {
457
        emit_cqo(e);
458
        emit_idiv_rcx(e);
459
    } else {
460
        emit_xor_rdx_rdx(e);
461
        emit_div_rcx(e);
462
    }
463
    if (is_rem) {
464
        emit_mov_rax_rdx(e); /* remainder is in rdx */
465
    }
466
467
    /* .end: */
468
    u32 end = e->pos;
469
    patch_jcc(e, jnz, nonzero);
470
    patch_jmp(e, jmp_end1, end);
471
    if (is_signed) {
472
        patch_jcc(e, jne_safe, safe);
473
        patch_jmp(e, jmp_end2, end);
474
    }
475
}
476
477
/* Same pattern for 32-bit division (W-suffix instructions). */
478
static void emit_div32(struct emitter *e, bool is_signed, bool is_rem) {
479
    /* For rem: save dividend in edx. */
480
    if (is_rem) {
481
        /* mov edx, eax */
482
        emit_u8(e, 0x89);
483
        emit_u8(e, modrm(3, X_RAX, X_RDX));
484
    }
485
486
    emit_test_ecx(e);
487
    u32 jnz = emit_jnz_placeholder(e);
488
489
    /* Divisor == 0. */
490
    if (is_rem) {
491
        /* mov eax, edx (result = dividend) */
492
        emit_u8(e, 0x89);
493
        emit_u8(e, modrm(3, X_RDX, X_RAX));
494
    } else {
495
        /* mov eax, -1 */
496
        emit_u8(e, 0xB8);
497
        emit_u32(e, 0xFFFFFFFF);
498
    }
499
    u32 jmp_end1 = emit_jmp_placeholder(e);
500
501
    /* .nonzero: */
502
    u32 nonzero  = e->pos;
503
    u32 jne_safe = 0, jmp_end2 = 0;
504
505
    if (is_signed) {
506
        emit_cmp_ecx_neg1_32(e);
507
        jne_safe = emit_jne_placeholder(e);
508
        if (is_rem) {
509
            /* result = 0 */
510
            emit_u8(e, 0x31);
511
            emit_u8(e, modrm(3, X_RAX, X_RAX));
512
        }
513
        /* else: result = eax (INT_MIN stays INT_MIN) */
514
        jmp_end2 = emit_jmp_placeholder(e);
515
    }
516
517
    /* .safe: */
518
    u32 safe = e->pos;
519
    if (is_rem) {
520
        /* Restore dividend to eax from edx. */
521
        emit_u8(e, 0x89);
522
        emit_u8(e, modrm(3, X_RDX, X_RAX));
523
    }
524
    if (is_signed) {
525
        emit_u8(e, 0x99); /* cdq */
526
        emit_u8(e, 0xF7);
527
        emit_u8(e, modrm(3, 7, X_RCX)); /* idiv ecx */
528
    } else {
529
        emit_u8(e, 0x31);
530
        emit_u8(e, modrm(3, X_RDX, X_RDX)); /* xor edx,edx */
531
        emit_u8(e, 0xF7);
532
        emit_u8(e, modrm(3, 6, X_RCX)); /* div ecx */
533
    }
534
    if (is_rem) {
535
        emit_u8(e, 0x89);
536
        emit_u8(e, modrm(3, X_RDX, X_RAX)); /* mov eax,edx */
537
    }
538
539
    /* .end: */
540
    u32 end = e->pos;
541
    patch_jcc(e, jnz, nonzero);
542
    patch_jmp(e, jmp_end1, end);
543
    if (is_signed) {
544
        patch_jcc(e, jne_safe, safe);
545
        patch_jmp(e, jmp_end2, end);
546
    }
547
}
548
549
/* ---------- Memory access helpers -------------------------------------- */
550
551
/* add rax, r13 (compute host address from guest address). */
552
static void emit_add_rax_r13(struct emitter *e) {
553
    emit_u8(e, 0x4C);
554
    emit_u8(e, 0x01);
555
    emit_u8(e, 0xE8);
556
}
557
558
/* Load from [r13+rax] into rax, with sign/zero extension. */
559
static void emit_load_mem_i8(struct emitter *e) {
560
    emit_add_rax_r13(e);
561
    emit_rexw(e);
562
    emit_u8(e, 0x0F);
563
    emit_u8(e, 0xBE);
564
    emit_u8(e, modrm(0, X_RAX, X_RAX));
565
}
566
567
static void emit_load_mem_u8(struct emitter *e) {
568
    emit_add_rax_r13(e);
569
    emit_u8(e, 0x0F);
570
    emit_u8(e, 0xB6);
571
    emit_u8(e, modrm(0, X_RAX, X_RAX));
572
}
573
574
static void emit_load_mem_i16(struct emitter *e) {
575
    emit_add_rax_r13(e);
576
    emit_rexw(e);
577
    emit_u8(e, 0x0F);
578
    emit_u8(e, 0xBF);
579
    emit_u8(e, modrm(0, X_RAX, X_RAX));
580
}
581
582
static void emit_load_mem_u16(struct emitter *e) {
583
    emit_add_rax_r13(e);
584
    emit_u8(e, 0x0F);
585
    emit_u8(e, 0xB7);
586
    emit_u8(e, modrm(0, X_RAX, X_RAX));
587
}
588
589
static void emit_load_mem_i32(struct emitter *e) {
590
    emit_add_rax_r13(e);
591
    emit_rexw(e);
592
    emit_u8(e, 0x63);
593
    emit_u8(e, modrm(0, X_RAX, X_RAX));
594
}
595
596
static void emit_load_mem_u32(struct emitter *e) {
597
    emit_add_rax_r13(e);
598
    emit_u8(e, 0x8B);
599
    emit_u8(e, modrm(0, X_RAX, X_RAX));
600
}
601
602
static void emit_load_mem_u64(struct emitter *e) {
603
    emit_add_rax_r13(e);
604
    emit_rexw(e);
605
    emit_u8(e, 0x8B);
606
    emit_u8(e, modrm(0, X_RAX, X_RAX));
607
}
608
609
/* Store from rcx to [r13+rax]. */
610
static void emit_store_mem_u8(struct emitter *e) {
611
    emit_add_rax_r13(e);
612
    emit_u8(e, 0x88);
613
    emit_u8(e, modrm(0, X_RCX, X_RAX));
614
}
615
616
static void emit_store_mem_u16(struct emitter *e) {
617
    emit_add_rax_r13(e);
618
    emit_u8(e, 0x66);
619
    emit_u8(e, 0x89);
620
    emit_u8(e, modrm(0, X_RCX, X_RAX));
621
}
622
623
static void emit_store_mem_u32(struct emitter *e) {
624
    emit_add_rax_r13(e);
625
    emit_u8(e, 0x89);
626
    emit_u8(e, modrm(0, X_RCX, X_RAX));
627
}
628
629
static void emit_store_mem_u64(struct emitter *e) {
630
    emit_add_rax_r13(e);
631
    emit_rexw(e);
632
    emit_u8(e, 0x89);
633
    emit_u8(e, modrm(0, X_RCX, X_RAX));
634
}
635
636
/* ---------- Translate one RV64I instruction ----------------------------- */
637
638
/*
639
 * Returns: true if the instruction ends the basic block (branch/jump/ecall),
640
 *          false if execution should continue to the next instruction.
641
 */
642
static bool translate_insn(struct emitter *e, instr_t ins, u32 pc) {
643
    u32 opcode  = ins.r.opcode;
644
    u32 pc_next = pc + INSTR_SIZE;
645
646
    switch (opcode) {
647
648
    case OP_LUI:
649
        if (ins.u.rd != 0) {
650
            emit_mov_imm32_sx(e, HTMP1, (i32)(ins.u.imm_31_12 << 12));
651
            emit_store_rd(e, ins.u.rd);
652
        }
653
        return false;
654
655
    case OP_AUIPC:
656
        if (ins.u.rd != 0) {
657
            i64 result = (i64)pc + (i64)(i32)(ins.u.imm_31_12 << 12);
658
            emit_mov_imm64(e, HTMP1, (u64)result);
659
            emit_store_rd(e, ins.u.rd);
660
        }
661
        return false;
662
663
    case OP_JAL: {
664
        u32 target = pc + (u32)get_j_imm(ins);
665
        if (ins.j.rd != 0) {
666
            emit_mov_imm64(e, HTMP1, (u64)pc_next);
667
            emit_store_guest(e, ins.j.rd, HTMP1);
668
        }
669
        emit_block_exit(e, JIT_EXIT_BRANCH, target);
670
        return true;
671
    }
672
673
    case OP_JALR: {
674
        i32 imm = get_i_imm(ins);
675
676
        /* Special-case RET: jalr x0, ra, 0. */
677
        if (ins.i.rd == 0 && ins.i.rs1 == RA && imm == 0) {
678
            emit_load_guest(e, HTMP1, RA);
679
680
            /* test rax, rax */
681
            emit_rexw(e);
682
            emit_u8(e, 0x85);
683
            emit_u8(e, modrm(3, X_RAX, X_RAX));
684
685
            u32 jnz_pos = emit_jnz_placeholder(e);
686
687
            /* RA == 0: program exit. */
688
            emit_block_exit(e, JIT_EXIT_RET, 0);
689
690
            /* RA != 0: compute target = RA & ~1. */
691
            patch_jcc(e, jnz_pos, e->pos);
692
            emit_rexw(e);
693
            emit_u8(e, 0x83);
694
            emit_u8(e, modrm(3, 4, X_RAX));
695
            emit_u8(e, 0xFE); /* and rax, ~1 */
696
697
            /* mov dword [r14], eax -- write PC */
698
            emit_u8(e, 0x41);
699
            emit_u8(e, 0x89);
700
            emit_u8(e, modrm(0, X_RAX, X_R14 & 7));
701
702
            emit_epilogue_with_exit(e, (u32)JIT_EXIT_BRANCH);
703
            return true;
704
        }
705
706
        /* General JALR: target = (rs1 + imm) & ~1. */
707
        emit_load_rs1(e, ins.i.rs1);
708
        emit_add_rax_imm32(e, imm);
709
        emit_rexw(e);
710
        emit_u8(e, 0x83);
711
        emit_u8(e, modrm(3, 4, X_RAX));
712
        emit_u8(e, 0xFE); /* and rax, ~1 */
713
714
        /* mov rcx, rax (save target) */
715
        emit_rexw(e);
716
        emit_u8(e, 0x89);
717
        emit_u8(e, modrm(3, X_RAX, X_RCX));
718
719
        if (ins.i.rd != 0) {
720
            emit_mov_imm64(e, HTMP1, (u64)pc_next);
721
            emit_store_guest(e, ins.i.rd, HTMP1);
722
        }
723
724
        /* mov dword [r14], ecx */
725
        emit_u8(e, 0x41);
726
        emit_u8(e, 0x89);
727
        emit_u8(e, modrm(0, X_RCX, X_R14 & 7));
728
729
        emit_epilogue_with_exit(e, (u32)JIT_EXIT_BRANCH);
730
        return true;
731
    }
732
733
    case OP_BRANCH: {
734
        u32 target = pc + (u32)get_b_imm(ins);
735
736
        emit_load_rs1(e, ins.b.rs1);
737
        emit_load_rs2(e, ins.b.rs2);
738
        emit_cmp_rax_rcx(e);
739
740
        /* jCC to .taken */
741
        u8 cc;
742
        switch (ins.b.funct3) {
743
        case 0x0:
744
            cc = 0x84;
745
            break; /* beq -> je */
746
        case 0x1:
747
            cc = 0x85;
748
            break; /* bne -> jne */
749
        case 0x4:
750
            cc = 0x8C;
751
            break; /* blt -> jl */
752
        case 0x5:
753
            cc = 0x8D;
754
            break; /* bge -> jge */
755
        case 0x6:
756
            cc = 0x82;
757
            break; /* bltu -> jb */
758
        case 0x7:
759
            cc = 0x83;
760
            break; /* bgeu -> jae */
761
        default:
762
            cc = 0x84;
763
            break;
764
        }
765
766
        u32 jcc_pos = e->pos;
767
        emit_u8(e, 0x0F);
768
        emit_u8(e, cc);
769
        emit_u32(e, 0);
770
771
        /* Not taken: fall through. */
772
        emit_block_exit(e, JIT_EXIT_BRANCH, pc_next);
773
774
        /* .taken: */
775
        patch_jcc(e, jcc_pos, e->pos);
776
        emit_block_exit(e, JIT_EXIT_BRANCH, target);
777
        return true;
778
    }
779
780
    case OP_LOAD: {
781
        i32 imm = get_i_imm(ins);
782
        if (ins.i.rd == 0)
783
            return false;
784
785
        emit_load_rs1(e, ins.i.rs1);
786
        emit_add_rax_imm32(e, imm);
787
788
        switch (ins.i.funct3) {
789
        case 0x0:
790
            emit_load_mem_i8(e);
791
            break; /* lb */
792
        case 0x1:
793
            emit_load_mem_i16(e);
794
            break; /* lh */
795
        case 0x2:
796
            emit_load_mem_i32(e);
797
            break; /* lw */
798
        case 0x3:
799
            emit_load_mem_u64(e);
800
            break; /* ld */
801
        case 0x4:
802
            emit_load_mem_u8(e);
803
            break; /* lbu */
804
        case 0x5:
805
            emit_load_mem_u16(e);
806
            break; /* lhu */
807
        case 0x6:
808
            emit_load_mem_u32(e);
809
            break; /* lwu */
810
        default:
811
            emit_block_exit(e, JIT_EXIT_FAULT, pc);
812
            return true;
813
        }
814
        emit_store_rd(e, ins.i.rd);
815
        return false;
816
    }
817
818
    case OP_STORE: {
819
        i32 imm = get_s_imm(ins);
820
        emit_load_guest(e, HTMP1, ins.s.rs1);
821
        emit_add_rax_imm32(e, imm);
822
        emit_load_guest(e, HTMP2, ins.s.rs2);
823
824
        switch (ins.s.funct3) {
825
        case 0x0:
826
            emit_store_mem_u8(e);
827
            break; /* sb */
828
        case 0x1:
829
            emit_store_mem_u16(e);
830
            break; /* sh */
831
        case 0x2:
832
            emit_store_mem_u32(e);
833
            break; /* sw */
834
        case 0x3:
835
            emit_store_mem_u64(e);
836
            break; /* sd */
837
        default:
838
            emit_block_exit(e, JIT_EXIT_FAULT, pc);
839
            return true;
840
        }
841
        return false;
842
    }
843
844
    case OP_IMM: {
845
        i32 imm = get_i_imm(ins);
846
        if (ins.i.rd == 0)
847
            return false;
848
849
        emit_load_rs1(e, ins.i.rs1);
850
851
        switch (ins.i.funct3) {
852
        case 0x0: /* addi */
853
            emit_add_rax_imm32(e, imm);
854
            break;
855
        case 0x1: /* slli */
856
            emit_rexw(e);
857
            emit_u8(e, 0xC1);
858
            emit_u8(e, modrm(3, 4, X_RAX));
859
            emit_u8(e, (u8)(imm & 0x3F));
860
            break;
861
        case 0x2: /* slti */
862
            emit_mov_imm32_sx(e, HTMP2, imm);
863
            emit_cmp_rax_rcx(e);
864
            emit_setcc_rax(e, 0x9C); /* setl */
865
            break;
866
        case 0x3: /* sltiu */
867
            emit_mov_imm32_sx(e, HTMP2, imm);
868
            emit_cmp_rax_rcx(e);
869
            emit_setcc_rax(e, 0x92); /* setb */
870
            break;
871
        case 0x4: /* xori */
872
            emit_mov_imm32_sx(e, HTMP2, imm);
873
            emit_alu_rax_rcx(e, 0x31); /* xor */
874
            break;
875
        case 0x5: /* srli/srai */
876
            emit_rexw(e);
877
            emit_u8(e, 0xC1);
878
            emit_u8(e, modrm(3, (imm & 0x400) ? 7 : 5, X_RAX));
879
            emit_u8(e, (u8)(imm & 0x3F));
880
            break;
881
        case 0x6: /* ori */
882
            emit_mov_imm32_sx(e, HTMP2, imm);
883
            emit_alu_rax_rcx(e, 0x09); /* or */
884
            break;
885
        case 0x7: /* andi */
886
            emit_mov_imm32_sx(e, HTMP2, imm);
887
            emit_alu_rax_rcx(e, 0x21); /* and */
888
            break;
889
        }
890
        emit_store_rd(e, ins.i.rd);
891
        return false;
892
    }
893
894
    case OP_IMM_32: {
895
        i32 imm = get_i_imm(ins);
896
        if (ins.i.rd == 0)
897
            return false;
898
899
        emit_load_rs1(e, ins.i.rs1);
900
901
        switch (ins.i.funct3) {
902
        case 0x0: /* addiw */
903
            emit_add_rax_imm32(e, imm);
904
            break;
905
        case 0x1: /* slliw */
906
            emit_u8(e, 0xC1);
907
            emit_u8(e, modrm(3, 4, X_RAX));
908
            emit_u8(e, (u8)(imm & 0x1F));
909
            break;
910
        case 0x5: /* srliw/sraiw */
911
            emit_u8(e, 0xC1);
912
            emit_u8(e, modrm(3, (imm & 0x400) ? 7 : 5, X_RAX));
913
            emit_u8(e, (u8)(imm & 0x1F));
914
            break;
915
        }
916
        emit_movsxd_rax_eax(e);
917
        emit_store_rd(e, ins.i.rd);
918
        return false;
919
    }
920
921
    case OP_OP: {
922
        if (ins.r.rd == 0)
923
            return false;
924
        emit_load_rs1(e, ins.r.rs1);
925
        emit_load_rs2(e, ins.r.rs2);
926
927
        switch (ins.r.funct7) {
928
        case FUNCT7_NORMAL:
929
            switch (ins.r.funct3) {
930
            case 0x0:
931
                emit_alu_rax_rcx(e, 0x01);
932
                break; /* add */
933
            case 0x1:  /* sll */
934
                emit_and_ecx_imm8(e, 0x3F);
935
                emit_shift_rax_cl(e, 4);
936
                break;
937
            case 0x2: /* slt */
938
                emit_cmp_rax_rcx(e);
939
                emit_setcc_rax(e, 0x9C); /* setl */
940
                break;
941
            case 0x3: /* sltu */
942
                emit_cmp_rax_rcx(e);
943
                emit_setcc_rax(e, 0x92); /* setb */
944
                break;
945
            case 0x4:
946
                emit_alu_rax_rcx(e, 0x31);
947
                break; /* xor */
948
            case 0x5:  /* srl */
949
                emit_and_ecx_imm8(e, 0x3F);
950
                emit_shift_rax_cl(e, 5);
951
                break;
952
            case 0x6:
953
                emit_alu_rax_rcx(e, 0x09);
954
                break; /* or */
955
            case 0x7:
956
                emit_alu_rax_rcx(e, 0x21);
957
                break; /* and */
958
            }
959
            break;
960
961
        case FUNCT7_SUB:
962
            switch (ins.r.funct3) {
963
            case 0x0:
964
                emit_alu_rax_rcx(e, 0x29);
965
                break; /* sub */
966
            case 0x5:  /* sra */
967
                emit_and_ecx_imm8(e, 0x3F);
968
                emit_shift_rax_cl(e, 7);
969
                break;
970
            }
971
            break;
972
973
        case FUNCT7_MUL:
974
            switch (ins.r.funct3) {
975
            case 0x0:
976
                emit_imul_rax_rcx(e);
977
                break; /* mul */
978
            case 0x4:
979
                emit_div64(e, true, false);
980
                break; /* div */
981
            case 0x5:
982
                emit_div64(e, false, false);
983
                break; /* divu */
984
            case 0x6:
985
                emit_div64(e, true, true);
986
                break; /* rem */
987
            case 0x7:
988
                emit_div64(e, false, true);
989
                break; /* remu */
990
            }
991
            break;
992
        }
993
        emit_store_rd(e, ins.r.rd);
994
        return false;
995
    }
996
997
    case OP_OP_32: {
998
        if (ins.r.rd == 0)
999
            return false;
1000
        emit_load_rs1(e, ins.r.rs1);
1001
        emit_load_rs2(e, ins.r.rs2);
1002
1003
        switch (ins.r.funct7) {
1004
        case FUNCT7_NORMAL:
1005
            switch (ins.r.funct3) {
1006
            case 0x0: /* addw */
1007
                emit_u8(e, 0x01);
1008
                emit_u8(e, modrm(3, X_RCX, X_RAX));
1009
                break;
1010
            case 0x1: /* sllw */
1011
                emit_and_ecx_imm8(e, 0x1F);
1012
                emit_u8(e, 0xD3);
1013
                emit_u8(e, modrm(3, 4, X_RAX));
1014
                break;
1015
            case 0x5: /* srlw */
1016
                emit_and_ecx_imm8(e, 0x1F);
1017
                emit_u8(e, 0xD3);
1018
                emit_u8(e, modrm(3, 5, X_RAX));
1019
                break;
1020
            }
1021
            break;
1022
1023
        case FUNCT7_SUB:
1024
            switch (ins.r.funct3) {
1025
            case 0x0: /* subw */
1026
                emit_u8(e, 0x29);
1027
                emit_u8(e, modrm(3, X_RCX, X_RAX));
1028
                break;
1029
            case 0x5: /* sraw */
1030
                emit_and_ecx_imm8(e, 0x1F);
1031
                emit_u8(e, 0xD3);
1032
                emit_u8(e, modrm(3, 7, X_RAX));
1033
                break;
1034
            }
1035
            break;
1036
1037
        case FUNCT7_MUL:
1038
            switch (ins.r.funct3) {
1039
            case 0x0: /* mulw */
1040
                emit_u8(e, 0x0F);
1041
                emit_u8(e, 0xAF);
1042
                emit_u8(e, modrm(3, X_RAX, X_RCX));
1043
                break;
1044
            case 0x4:
1045
                emit_div32(e, true, false);
1046
                break; /* divw */
1047
            case 0x5:
1048
                emit_div32(e, false, false);
1049
                break; /* divuw */
1050
            case 0x6:
1051
                emit_div32(e, true, true);
1052
                break; /* remw */
1053
            case 0x7:
1054
                emit_div32(e, false, true);
1055
                break; /* remuw */
1056
            }
1057
            break;
1058
        }
1059
        emit_movsxd_rax_eax(e);
1060
        emit_store_rd(e, ins.r.rd);
1061
        return false;
1062
    }
1063
1064
    case OP_SYSTEM: {
1065
        u32 funct12 = ins.i.imm_11_0;
1066
        if (funct12 == 0) {
1067
            emit_block_exit(e, JIT_EXIT_ECALL, pc);
1068
        } else if (funct12 == 1) {
1069
            emit_block_exit(e, JIT_EXIT_EBREAK, pc);
1070
        } else {
1071
            emit_block_exit(e, JIT_EXIT_FAULT, pc);
1072
        }
1073
        return true;
1074
    }
1075
1076
    case OP_FENCE:
1077
        return false;
1078
1079
    default:
1080
        emit_block_exit(e, JIT_EXIT_FAULT, pc);
1081
        return true;
1082
    }
1083
}
1084
1085
/* ---------- Block compiler --------------------------------------------- */
1086
1087
static struct jit_block *jit_alloc_block(struct jit_state *jit) {
1088
    if (jit->block_count >= JIT_MAX_BLOCKS)
1089
        return NULL;
1090
    return &jit->blocks[jit->block_count++];
1091
}
1092
1093
static void jit_insert_block(struct jit_state *jit, struct jit_block *block) {
1094
    u32 h              = (block->guest_pc >> 2) & (JIT_BLOCK_HASH_SIZE - 1);
1095
    block->hash_next   = jit->block_hash[h];
1096
    jit->block_hash[h] = block;
1097
}
1098
1099
struct jit_block *jit_compile_block(
1100
    struct jit_state *jit, u32 guest_pc, u8 *mem, u32 prog_base, u32 prog_bytes
1101
) {
1102
1103
#define JIT_EMIT_BUF_SIZE (JIT_MAX_BLOCK_INSNS * JIT_MAX_INSN_BYTES + 256)
1104
    static u8 emit_buf[JIT_EMIT_BUF_SIZE];
1105
1106
    struct emitter em = {
1107
        .buf      = emit_buf,
1108
        .pos      = 0,
1109
        .capacity = JIT_EMIT_BUF_SIZE,
1110
        .overflow = false,
1111
    };
1112
1113
    emit_prologue(&em);
1114
1115
    u32 pc          = guest_pc;
1116
    u32 insn_count  = 0;
1117
    u32 prog_end_pc = prog_base + prog_bytes;
1118
1119
    while (insn_count < JIT_MAX_BLOCK_INSNS) {
1120
        if (pc < prog_base || pc >= prog_end_pc) {
1121
            emit_block_exit(&em, JIT_EXIT_FAULT, pc);
1122
            break;
1123
        }
1124
        instr_t ins;
1125
        memcpy(&ins, &mem[pc], sizeof(instr_t));
1126
        insn_count++;
1127
1128
        bool ends_block  = translate_insn(&em, ins, pc);
1129
        pc              += INSTR_SIZE;
1130
1131
        if (ends_block)
1132
            break;
1133
1134
        if (em.pos + JIT_MAX_INSN_BYTES + 64 > em.capacity) {
1135
            emit_block_exit(&em, JIT_EXIT_BRANCH, pc);
1136
            break;
1137
        }
1138
    }
1139
1140
    if (insn_count >= JIT_MAX_BLOCK_INSNS) {
1141
        emit_block_exit(&em, JIT_EXIT_BRANCH, pc);
1142
    }
1143
1144
    if (em.overflow)
1145
        return NULL;
1146
1147
    u32 code_size = em.pos;
1148
    if (jit->code_cache_used + code_size > JIT_CODE_CACHE_SIZE)
1149
        return NULL;
1150
1151
    u8 *dest = jit->code_cache + jit->code_cache_used;
1152
    memcpy(dest, emit_buf, code_size);
1153
    jit->code_cache_used += code_size;
1154
1155
    struct jit_block *block = jit_alloc_block(jit);
1156
    if (!block)
1157
        return NULL;
1158
1159
    block->guest_pc     = guest_pc;
1160
    block->guest_end_pc = pc;
1161
    block->insn_count   = insn_count;
1162
    block->code         = dest;
1163
    block->code_size    = code_size;
1164
    block->hash_next    = NULL;
1165
1166
    jit_insert_block(jit, block);
1167
    jit->blocks_compiled++;
1168
1169
    return block;
1170
}
1171
1172
/* ---------- Init / Destroy / Flush ------------------------------------ */
1173
1174
bool jit_init(struct jit_state *jit) {
1175
    memset(jit, 0, sizeof(*jit));
1176
1177
#if defined(__x86_64__) || defined(_M_X64)
1178
    jit->code_cache = mmap(
1179
        NULL,
1180
        JIT_CODE_CACHE_SIZE,
1181
        PROT_READ | PROT_WRITE | PROT_EXEC,
1182
        MAP_PRIVATE | MAP_ANONYMOUS,
1183
        -1,
1184
        0
1185
    );
1186
    if (jit->code_cache == MAP_FAILED) {
1187
        jit->code_cache = NULL;
1188
        jit->available  = false;
1189
        return false;
1190
    }
1191
    jit->available = true;
1192
    return true;
1193
#else
1194
    jit->available = false;
1195
    return false;
1196
#endif
1197
}
1198
1199
void jit_destroy(struct jit_state *jit) {
1200
    if (jit->code_cache) {
1201
        munmap(jit->code_cache, JIT_CODE_CACHE_SIZE);
1202
        jit->code_cache = NULL;
1203
    }
1204
    jit->block_count = 0;
1205
}
1206
1207
void jit_flush(struct jit_state *jit) {
1208
    memset(jit->block_hash, 0, sizeof(jit->block_hash));
1209
    jit->block_count     = 0;
1210
    jit->code_cache_used = 0;
1211
}