Initial commit

f640ed27cbfb83b1ecd79b91d213e5be4d420ba92f7c03cf52d49ce3760bac10
Check-in of the Radiant RISC-V emulator.
MIT License.
Alexis Sellier committed ago
.clang-format added +19 -0
1 +
IndentWidth: 4
2 +
ColumnLimit: 80
3 +
UseTab: Never
4 +
AllowShortBlocksOnASingleLine: Always  # or use 'Empty' for only empty blocks
5 +
AllowShortFunctionsOnASingleLine: Empty
6 +
AlignConsecutiveMacros: AcrossComments
7 +
AlignAfterOpenBracket: BlockIndent
8 +
AlignConsecutiveBitFields: AcrossEmptyLinesAndComments
9 +
AlignConsecutiveDeclarations:
10 +
  Enabled: true
11 +
AlignConsecutiveAssignments:
12 +
  Enabled: true
13 +
  AlignCompound: true
14 +
  PadOperators: true
15 +
BinPackParameters: false
16 +
BinPackArguments: false
17 +
BreakAfterReturnType: Automatic
18 +
PenaltyReturnTypeOnItsOwnLine: 999
19 +
Cpp11BracedListStyle: false
.gitignore added +1 -0
1 +
/bin
.gitsigners added +1 -0
1 +
alexis ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICpDRmIwBm4ajzW+METm9tBdK4CG2/v0qmO4bPfi+s+c alexis@radiant.computer
LICENSE added +19 -0
1 +
Copyright (c) 2025-2026 Radiant Computer (https://radiant.computer)
2 +
3 +
Permission is hereby granted, free of charge, to any person obtaining a copy of
4 +
this software and associated documentation files (the "Software"), to deal in
5 +
the Software without restriction, including without limitation the rights to
6 +
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7 +
of the Software, and to permit persons to whom the Software is furnished to do
8 +
so, subject to the following conditions:
9 +
10 +
The above copyright notice and this permission notice shall be included in all
11 +
copies or substantial portions of the Software.
12 +
13 +
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 +
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 +
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 +
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 +
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 +
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 +
SOFTWARE.
Makefile added +40 -0
1 +
# Builds the RISC-V emulator.
2 +
3 +
PREFIX  ?= $(HOME)/bin
4 +
CC      := clang
5 +
CFLAGS  := -fvisibility=hidden -std=c99 -O3 -g \
6 +
           -Wall -Wextra -Wpedantic \
7 +
           -Wformat=2 -Wformat-security \
8 +
           -Wnull-dereference \
9 +
           -Wno-format-nonliteral \
10 +
           -Wcast-align \
11 +
           -Wunused -Wuninitialized \
12 +
           -Wmissing-field-initializers \
13 +
           -fno-common -fstack-protector-all \
14 +
           -mcmodel=medium \
15 +
           -march=native -flto
16 +
LDFLAGS := -fuse-ld=lld -Wl,-z,stack-size=33554432
17 +
EMULATOR_SRCS  := emulator.c jit.c riscv/debug.c io.c riscv.c
18 +
19 +
default: bin/emulator
20 +
21 +
bin/emulator: $(EMULATOR_SRCS) jit.h color.h types.h io.h riscv.h riscv/debug.h
22 +
	@echo "cc    emulator => $@"
23 +
	@mkdir -p bin
24 +
	@$(CC) $(CFLAGS) $(LDFLAGS) $(EMULATOR_SRCS) -o $@
25 +
26 +
install: bin/emulator
27 +
	@echo "copy  bin/emulator => $(PREFIX)/emulator"
28 +
	@mkdir -p $(PREFIX)
29 +
	@cp bin/emulator $(PREFIX)/emulator
30 +
31 +
fmt:
32 +
	git ls-files "*.c" "*.h" | xargs clang-format -i
33 +
34 +
clean:
35 +
	@rm -f bin/emulator
36 +
37 +
.PHONY: default clean install
38 +
.SUFFIXES:
39 +
.DELETE_ON_ERROR:
40 +
.SILENT:
README added +92 -0
1 +
2 +
RADIANT RISC-V EMULATOR
3 +
4 +
A RISC-V RV64I emulator with an x86-64 JIT compiler, interactive TUI
5 +
debugger, and headless execution mode. Part of the Radiant project.
6 +
7 +
Supports the RV64I base integer instruction set, M (multiply/divide)
8 +
extension, and F (single-precision floating-point) extension.
9 +
10 +
BUILDING
11 +
12 +
    $ make
13 +
14 +
  Requires `clang` and `lld`. The binary is written to `bin/emulator`.
15 +
16 +
INSTALLATION
17 +
18 +
    $ make install
19 +
20 +
  Installed as `~/bin/emulator` by default. Can be overridden via the `PREFIX`
21 +
  variable.
22 +
23 +
USAGE
24 +
25 +
  Interactive (TUI):
26 +
27 +
    $ bin/emulator <program.bin>
28 +
29 +
  Headless:
30 +
31 +
    $ bin/emulator -run <program.bin> [<option>..]
32 +
33 +
  The emulator loads a flat binary along with optional data sections:
34 +
35 +
    <program.bin>           Program text (machine instructions)
36 +
    <program.bin>.ro.data   Read-only data section
37 +
    <program.bin>.rw.data   Read-write data section
38 +
    <program.bin>.debug     Debug info (optional, for source locations)
39 +
40 +
41 +
OPTIONS
42 +
43 +
    -run                    Run headless (no TUI).
44 +
    -debug                  Load debug info for source-level diagnostics.
45 +
    -no-jit                 Disable the JIT compiler; interpret only.
46 +
    -memory-size=KB         Physical memory size in KB (default 128 MB).
47 +
    -data-size=KB           Data memory size in KB.
48 +
    -stack-size=KB          Stack size in KB (default 256 KB).
49 +
    -no-guard-stack         Disable stack guard zones (enabled by default, 16 bytes).
50 +
    -no-validate            Disable memory bounds checking.
51 +
    -trace                  Enable instruction tracing.
52 +
    -trace-headless         Enable instruction tracing in headless mode.
53 +
    -trace-instructions     Print each instruction during headless tracing.
54 +
    -trace-depth=N          Number of trace entries to display on fault.
55 +
    -max-steps=N            Maximum steps before timeout in headless mode.
56 +
    -count-instructions     Print instruction count on exit.
57 +
    -watch=ADDR             Set a memory watchpoint at ADDR.
58 +
    -watch-size=BYTES       Size of watched region (default 4).
59 +
    -watch-arm-pc=ADDR      Only trigger watchpoint after reaching ADDR.
60 +
    -watch-zero-only        Only trigger on zero-value stores.
61 +
    -watch-skip=N           Skip the first N watchpoint hits.
62 +
    -watch-backtrace        Print backtrace on watchpoint hit.
63 +
    -watch-bt-depth=N       Backtrace depth (default 8).
64 +
65 +
MEMORY LAYOUT
66 +
67 +
    0x00010000                 Read-only data (.ro.data)
68 +
    Program base               Program text (instructions)
69 +
    0x00FFFFF0                 Read-write data (.rw.data)
70 +
    Memory top - stack size    Stack
71 +
    Memory top                 Top of memory
72 +
73 +
ARCHITECTURE
74 +
75 +
  The emulator operates in two execution modes:
76 +
77 +
    Interpreter    Steps through instructions one at a time. Used for the
78 +
                   TUI debugger, ecalls, ebreak, and as a fallback.
79 +
80 +
    JIT            Translates basic blocks of RV64I instructions to native
81 +
                   x86-64 machine code. Blocks are compiled on first
82 +
                   encounter and cached (16 MB code cache, up to 256K
83 +
                   blocks). Falls back to the interpreter for system
84 +
                   calls and faults.
85 +
86 +
  The TUI debugger supports single-stepping, reverse execution (via
87 +
  snapshots), register and stack inspection, and memory watchpoints.
88 +
89 +
LICENSE
90 +
91 +
  Licensed under the MIT License,
92 +
  Copyright (c) 2025-2026 Radiant Computer (https://radiant.computer)
color.h added +18 -0
1 +
/*
2 +
 * ANSI color definitions.
3 +
 */
4 +
#define COLOR_NORMAL     ""
5 +
#define COLOR_RESET      "\033[m"
6 +
#define COLOR_BOLD       "\033[1m"
7 +
#define COLOR_ITALIC     "\033[3m"
8 +
#define COLOR_INVERSE    "\033[7m"
9 +
#define COLOR_RED        "\033[31m"
10 +
#define COLOR_GREEN      "\033[32m"
11 +
#define COLOR_YELLOW     "\033[33m"
12 +
#define COLOR_BLUE       "\033[34m"
13 +
#define COLOR_MAGENTA    "\033[35m"
14 +
#define COLOR_CYAN       "\033[36m"
15 +
#define COLOR_GREY       "\033[37m"
16 +
#define COLOR_BOLD_RED   "\033[1;31m"
17 +
#define COLOR_BOLD_GREEN "\033[1;32m"
18 +
#define COLOR_BOLD_BLUE  "\033[1;34m"
emulator.c added +2589 -0
1 +
#include <errno.h>
2 +
#include <fcntl.h>
3 +
#include <limits.h>
4 +
#include <stdint.h>
5 +
#include <stdio.h>
6 +
#include <stdlib.h>
7 +
#include <string.h>
8 +
#include <sys/ioctl.h>
9 +
#include <termios.h>
10 +
#include <unistd.h>
11 +
12 +
#include "color.h"
13 +
#include "io.h"
14 +
#include "jit.h"
15 +
#include "riscv.h"
16 +
#include "riscv/debug.h"
17 +
#include "types.h"
18 +
19 +
#ifndef PATH_MAX
20 +
#define PATH_MAX 4096
21 +
#endif
22 +
23 +
/* Define BINARY for `bail` and `assert` functions. */
24 +
#undef BINARY
25 +
#define BINARY "emulator"
26 +
27 +
#undef assert
28 +
#define assert(condition)                                                      \
29 +
    ((condition) ? (void)0 : _assert_failed(#condition, __FILE__, __LINE__))
30 +
31 +
static inline __attribute__((noreturn)) void _assert_failed(
32 +
    const char *condition, const char *file, int line
33 +
) {
34 +
    fprintf(stderr, "%s:%d: assertion `%s` failed\n", file, line, condition);
35 +
    abort();
36 +
}
37 +
38 +
/* Maximum physical memory size (384MB). The runtime can choose any size up to
39 +
 * this value with `-memory-size=...`. */
40 +
#define MEMORY_SIZE              (384 * 1024 * 1024)
41 +
/* Default physical memory size (128MB). */
42 +
#define DEFAULT_MEMORY_SIZE      (128 * 1024 * 1024)
43 +
/* Program memory size (4MB), reserved at start of memory for program code. */
44 +
#define PROGRAM_SIZE             (4 * 1024 * 1024)
45 +
/* Writable data region base. */
46 +
#define DATA_RW_OFFSET           0xFFFFF0
47 +
/* Data memory starts at writable data region. */
48 +
#define DATA_MEMORY_START        DATA_RW_OFFSET
49 +
/* Default data memory size. */
50 +
#define DEFAULT_DATA_MEMORY_SIZE (DEFAULT_MEMORY_SIZE - DATA_MEMORY_START)
51 +
/* Default stack size (256KB), allocated at the end of memory. */
52 +
#define DEFAULT_STACK_SIZE       (256 * 1024)
53 +
/* Maximum instructions to show in the TUI. */
54 +
#define MAX_INSTR_DISPLAY        40
55 +
/* Stack words to display in the TUI. */
56 +
#define STACK_DISPLAY_WORDS      32
57 +
/* Maximum number of CPU state snapshots to store for undo. */
58 +
#define MAX_SNAPSHOTS            64
59 +
/* Maximum open files in guest runtime. */
60 +
#define MAX_OPEN_FILES           32
61 +
/* Number of history entries to print when reporting faults. */
62 +
#define FAULT_TRACE_DEPTH        8
63 +
/* Instruction trace depth for headless tracing. */
64 +
#define TRACE_HISTORY            64
65 +
/* Maximum number of steps executed in headless mode before timing out. */
66 +
#define HEADLESS_MAX_STEPS       ((u64)1000000000000)
67 +
/* Height of header and footer in rows. */
68 +
#define HEADER_HEIGHT            2
69 +
#define FOOTER_HEIGHT            3
70 +
/* Read-only data offset. */
71 +
#define DATA_RO_OFFSET           0x10000
72 +
/* TTY escape codes. */
73 +
#define TTY_CLEAR                "\033[2J\033[H"
74 +
#define TTY_GOTO_RC              "\033[%d;%dH"
75 +
/* Exit code returned on EBREAK. */
76 +
#define EBREAK_EXIT_CODE         133
77 +
78 +
/* Registers displayed in the TUI, in order. */
79 +
static const reg_t registers_displayed[] = {
80 +
    SP, FP, RA, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T4, T5, T6
81 +
};
82 +
83 +
/* Display mode for immediates and values. */
84 +
enum display { DISPLAY_HEX, DISPLAY_DEC };
85 +
86 +
/* Debug info entry mapping PC to source location. */
87 +
struct debug_entry {
88 +
    u32  pc;
89 +
    u32  offset;
90 +
    char file[PATH_MAX];
91 +
};
92 +
93 +
/* Debug info table. */
94 +
struct debug_info {
95 +
    struct debug_entry *entries;
96 +
    size_t              count;
97 +
    size_t              capacity;
98 +
};
99 +
100 +
/* Global debug info. */
101 +
static struct debug_info g_debug = { 0 };
102 +
103 +
/* CPU state. */
104 +
struct cpu {
105 +
    u64      regs[REGISTERS];
106 +
    u32      pc;          /* Program counter. */
107 +
    u32      programsize; /* Size of loaded program. */
108 +
    instr_t *program;     /* Program instructions. */
109 +
    bool     running;     /* Execution status. */
110 +
    bool     faulted;     /* There was a fault in execution. */
111 +
    bool     ebreak;      /* Program terminated via EBREAK. */
112 +
    reg_t    modified;    /* Index of the last modified register. */
113 +
};
114 +
115 +
/* Snapshot of CPU and memory state for reversing execution. */
116 +
struct snapshot {
117 +
    struct cpu cpu;                 /* Copy of CPU state. */
118 +
    u8         memory[MEMORY_SIZE]; /* Copy of memory. */
119 +
};
120 +
121 +
/* Circular buffer for snapshots. */
122 +
struct snapshot_buffer {
123 +
    struct snapshot snapshots[MAX_SNAPSHOTS];
124 +
    int             head; /* Index of most recent snapshot. */
125 +
    int             count;
126 +
};
127 +
128 +
/* CPU memory. */
129 +
static u8 memory[MEMORY_SIZE];
130 +
131 +
/* Loaded section sizes, used for bounds checking and diagnostics. */
132 +
static u32 program_base  = 0;
133 +
static u32 program_bytes = 0;
134 +
static u32 rodata_bytes  = 0;
135 +
static u32 data_bytes    = 0;
136 +
137 +
/* Snapshot buffer. */
138 +
static struct snapshot_buffer snapshots;
139 +
140 +
/* File descriptor table for guest file operations. */
141 +
static int guest_fds[MAX_OPEN_FILES];
142 +
143 +
/* Initialize the guest file descriptor table. */
144 +
static void guest_fd_table_init(void) {
145 +
    for (int i = 0; i < MAX_OPEN_FILES; i++) {
146 +
        guest_fds[i] = -1;
147 +
    }
148 +
}
149 +
150 +
/* Add a host file descriptor to the guest table. */
151 +
static int guest_fd_table_add(int host_fd) {
152 +
    /* Start at 3 to skip stdin/stdout/stderr. */
153 +
    for (int i = 3; i < MAX_OPEN_FILES; i++) {
154 +
        if (guest_fds[i] == -1) {
155 +
            guest_fds[i] = host_fd;
156 +
            return i;
157 +
        }
158 +
    }
159 +
    return -1;
160 +
}
161 +
162 +
/* Get the host fd for a guest file descriptor. */
163 +
static int guest_fd_table_get(int guest_fd) {
164 +
    if (guest_fd < 0 || guest_fd >= MAX_OPEN_FILES) {
165 +
        return -1;
166 +
    }
167 +
    /* Standard streams map directly. */
168 +
    if (guest_fd < 3) {
169 +
        return guest_fd;
170 +
    }
171 +
    return guest_fds[guest_fd];
172 +
}
173 +
174 +
/* Remove a file descriptor from the guest table. */
175 +
static void guest_fd_table_remove(int guest_fd) {
176 +
    if (guest_fd >= 3 && guest_fd < MAX_OPEN_FILES) {
177 +
        guest_fds[guest_fd] = -1;
178 +
    }
179 +
}
180 +
181 +
/* Single entry in the instruction trace ring buffer. */
182 +
struct trace_entry {
183 +
    u32     pc;
184 +
    instr_t instr;
185 +
    u64     regs[REGISTERS];
186 +
};
187 +
188 +
/* Circular buffer of recent instruction traces. */
189 +
struct trace_ring {
190 +
    struct trace_entry entries[TRACE_HISTORY];
191 +
    int                head;
192 +
    int                count;
193 +
};
194 +
195 +
/* Headless-mode instruction trace buffer. */
196 +
static struct trace_ring headless_trace = { .head = -1, .count = 0 };
197 +
198 +
/* Terminal dimensions in rows and columns. */
199 +
struct termsize {
200 +
    int rows;
201 +
    int cols;
202 +
};
203 +
204 +
/* Forward declarations. */
205 +
static void ui_render_instructions(
206 +
    struct cpu *, int col, int width, int height
207 +
);
208 +
static void ui_render_registers(
209 +
    struct cpu *, enum display, int col, int height
210 +
);
211 +
static void ui_render_stack(struct cpu *, enum display, int col, int height);
212 +
static void ui_render(struct cpu *, enum display);
213 +
static void cpu_execute(struct cpu *, enum display, bool headless);
214 +
static void emit_fault_diagnostics(struct cpu *, u32 pc);
215 +
216 +
/* Emulator runtime options, populated from CLI flags. */
217 +
struct emulator_options {
218 +
    bool stack_guard;
219 +
    u32  stack_size;
220 +
    bool debug_enabled;
221 +
    bool trace_headless;
222 +
    bool trace_enabled;
223 +
    bool trace_print_instructions;
224 +
    u32  trace_depth;
225 +
    u64  headless_max_steps;
226 +
    u32  memory_size;
227 +
    u32  data_memory_size;
228 +
    bool watch_enabled;
229 +
    u32  watch_addr;
230 +
    u32  watch_size;
231 +
    u32  watch_arm_pc;
232 +
    bool watch_zero_only;
233 +
    u32  watch_skip;
234 +
    bool watch_backtrace;
235 +
    u32  watch_backtrace_depth;
236 +
    bool validate_memory;
237 +
    bool count_instructions;
238 +
    bool jit_disabled;
239 +
};
240 +
241 +
/* Global emulator options. */
242 +
static struct emulator_options g_opts = {
243 +
    .stack_guard              = true,
244 +
    .stack_size               = DEFAULT_STACK_SIZE,
245 +
    .debug_enabled            = false,
246 +
    .trace_headless           = false,
247 +
    .trace_enabled            = false,
248 +
    .trace_print_instructions = false,
249 +
    .trace_depth              = 32,
250 +
    .headless_max_steps       = HEADLESS_MAX_STEPS,
251 +
    .memory_size              = DEFAULT_MEMORY_SIZE,
252 +
    .data_memory_size         = DEFAULT_DATA_MEMORY_SIZE,
253 +
    .watch_enabled            = false,
254 +
    .watch_addr               = 0,
255 +
    .watch_size               = 0,
256 +
    .watch_arm_pc             = 0,
257 +
    .watch_zero_only          = false,
258 +
    .watch_skip               = 0,
259 +
    .watch_backtrace          = false,
260 +
    .watch_backtrace_depth    = 8,
261 +
    .validate_memory          = true,
262 +
    .count_instructions       = false,
263 +
    .jit_disabled             = false,
264 +
};
265 +
266 +
static void dump_watch_context(struct cpu *, u32 addr, u32 size, u32 value);
267 +
268 +
/* Return true if the given address range overlaps the watched region. */
269 +
static inline bool watch_hit(u32 addr, u32 size) {
270 +
    if (!g_opts.watch_enabled)
271 +
        return false;
272 +
    u32 start = g_opts.watch_addr;
273 +
    u32 end   = start + (g_opts.watch_size ? g_opts.watch_size : 1);
274 +
    return addr < end && (addr + size) > start;
275 +
}
276 +
277 +
/* Check a store against the memory watchpoint and halt on a hit. */
278 +
static inline void watch_store(struct cpu *cpu, u32 addr, u32 size, u32 value) {
279 +
    if (!watch_hit(addr, size))
280 +
        return;
281 +
    if (g_opts.watch_arm_pc && cpu && cpu->pc < g_opts.watch_arm_pc)
282 +
        return;
283 +
    if (g_opts.watch_zero_only && value != 0)
284 +
        return;
285 +
    if (g_opts.watch_skip > 0) {
286 +
        g_opts.watch_skip--;
287 +
        return;
288 +
    }
289 +
    fprintf(
290 +
        stderr,
291 +
        "[WATCH] pc=%08x addr=%08x size=%u value=%08x\n",
292 +
        cpu ? cpu->pc : 0,
293 +
        addr,
294 +
        size,
295 +
        value
296 +
    );
297 +
    dump_watch_context(cpu, addr, size, value);
298 +
    if (cpu) {
299 +
        cpu->running = false;
300 +
        cpu->faulted = true;
301 +
        cpu->ebreak  = true;
302 +
    }
303 +
}
304 +
305 +
/* Fixed stack guard zone size. */
306 +
#define STACK_GUARD_BYTES 16
307 +
308 +
/* Clamp and align stack size to a valid range. */
309 +
static inline u32 sanitize_stack_bytes(u32 bytes) {
310 +
    if (bytes < WORD_SIZE)
311 +
        bytes = WORD_SIZE;
312 +
    bytes = (u32)align((i32)bytes, WORD_SIZE);
313 +
314 +
    /* Keep at least one word for guard computations. */
315 +
    if (bytes >= g_opts.memory_size)
316 +
        bytes = g_opts.memory_size - WORD_SIZE;
317 +
318 +
    return bytes;
319 +
}
320 +
321 +
/* Return the active stack guard size, or 0 if guards are disabled. */
322 +
static inline u32 stack_guard_bytes(void) {
323 +
    if (!g_opts.stack_guard)
324 +
        return 0;
325 +
    return STACK_GUARD_BYTES;
326 +
}
327 +
328 +
/* Return the configured stack size. */
329 +
static inline u32 stack_size(void) {
330 +
    return g_opts.stack_size;
331 +
}
332 +
333 +
/* Return the highest addressable word-aligned memory address. */
334 +
static inline u32 memory_top(void) {
335 +
    return g_opts.memory_size - WORD_SIZE;
336 +
}
337 +
338 +
/* Return the lowest address in the stack region. */
339 +
static inline u32 stack_bottom(void) {
340 +
    return memory_top() - stack_size() + WORD_SIZE;
341 +
}
342 +
343 +
/* Return the highest usable stack address (inside the guard zone). */
344 +
static inline u32 stack_usable_top(void) {
345 +
    u32 guard = stack_guard_bytes();
346 +
    u32 size  = stack_size();
347 +
    if (guard >= size)
348 +
        guard = size - WORD_SIZE;
349 +
    return memory_top() - guard;
350 +
}
351 +
352 +
/* Return the lowest usable stack address (inside the guard zone). */
353 +
static inline u32 stack_usable_bottom(void) {
354 +
    u32 guard = stack_guard_bytes();
355 +
    u32 size  = stack_size();
356 +
    if (guard >= size)
357 +
        guard = size - WORD_SIZE;
358 +
    return stack_bottom() + guard;
359 +
}
360 +
361 +
/* Return true if addr falls within the stack region. */
362 +
static inline bool stack_contains(u32 addr) {
363 +
    return addr >= stack_bottom() && addr <= memory_top();
364 +
}
365 +
366 +
/* Return true if the range [start, end] overlaps a stack guard zone. */
367 +
static inline bool stack_guard_overlaps(u32 guard, u32 start, u32 end) {
368 +
    if (guard == 0)
369 +
        return false;
370 +
371 +
    u32 low_guard_end    = stack_bottom() + guard - 1;
372 +
    u32 high_guard_start = memory_top() - guard + 1;
373 +
374 +
    return (start <= low_guard_end && end >= stack_bottom()) ||
375 +
           (end >= high_guard_start && start <= memory_top());
376 +
}
377 +
378 +
/* Return true if addr falls inside a stack guard zone. */
379 +
static inline bool stack_guard_contains(u32 guard, u32 addr) {
380 +
    return stack_guard_overlaps(guard, addr, addr);
381 +
}
382 +
383 +
/* Load a 16-bit value from memory in little-endian byte order. */
384 +
static inline u16 memory_load_u16(u32 addr) {
385 +
    return (u16)(memory[addr] | (memory[addr + 1] << 8));
386 +
}
387 +
388 +
/* Load a 32-bit value from memory in little-endian byte order. */
389 +
static inline u32 memory_load_u32(u32 addr) {
390 +
    return memory[addr] | (memory[addr + 1] << 8) | (memory[addr + 2] << 16) |
391 +
           (memory[addr + 3] << 24);
392 +
}
393 +
394 +
/* Load a 64-bit value from memory in little-endian byte order. */
395 +
static inline u64 memory_load_u64(u32 addr) {
396 +
    return (u64)memory[addr] | ((u64)memory[addr + 1] << 8) |
397 +
           ((u64)memory[addr + 2] << 16) | ((u64)memory[addr + 3] << 24) |
398 +
           ((u64)memory[addr + 4] << 32) | ((u64)memory[addr + 5] << 40) |
399 +
           ((u64)memory[addr + 6] << 48) | ((u64)memory[addr + 7] << 56);
400 +
}
401 +
402 +
/* Store a byte to memory. */
403 +
static inline void memory_store_u8(u32 addr, u8 value) {
404 +
    memory[addr] = value;
405 +
}
406 +
407 +
/* Store a 16-bit value to memory in little-endian byte order. */
408 +
static inline void memory_store_u16(u32 addr, u16 value) {
409 +
    memory[addr]     = (u8)(value & 0xFF);
410 +
    memory[addr + 1] = (u8)((value >> 8) & 0xFF);
411 +
}
412 +
413 +
/* Store a 32-bit value to memory in little-endian byte order. */
414 +
static inline void memory_store_u32(u32 addr, u32 value) {
415 +
    assert(addr + 3 < g_opts.memory_size);
416 +
    memory[addr]     = (u8)(value & 0xFF);
417 +
    memory[addr + 1] = (u8)((value >> 8) & 0xFF);
418 +
    memory[addr + 2] = (u8)((value >> 16) & 0xFF);
419 +
    memory[addr + 3] = (u8)((value >> 24) & 0xFF);
420 +
}
421 +
422 +
/* Store a 64-bit value to memory in little-endian byte order. */
423 +
static inline void memory_store_u64(u32 addr, u64 value) {
424 +
    assert(addr + 7 < g_opts.memory_size);
425 +
    memory_store_u32(addr, (u32)(value & 0xFFFFFFFF));
426 +
    memory_store_u32(addr + 4, (u32)(value >> 32));
427 +
}
428 +
429 +
/* Load a 32-bit word from memory, returning false if out of bounds. */
430 +
static inline bool load_word_safe(u32 addr, u32 *out) {
431 +
    if (addr > g_opts.memory_size - WORD_SIZE)
432 +
        return false;
433 +
    *out = memory_load_u32(addr);
434 +
    return true;
435 +
}
436 +
437 +
/* Dump register state and backtrace when a watchpoint fires. */
438 +
static void dump_watch_context(struct cpu *cpu, u32 addr, u32 size, u32 value) {
439 +
    if (!g_opts.watch_backtrace || !cpu)
440 +
        return;
441 +
442 +
    (void)addr;
443 +
    (void)size;
444 +
    (void)value;
445 +
446 +
    fprintf(
447 +
        stderr,
448 +
        "         regs: SP=%08x FP=%08x RA=%08x A0=%08x A1=%08x A2=%08x "
449 +
        "A3=%08x\n",
450 +
        (u32)cpu->regs[SP],
451 +
        (u32)cpu->regs[FP],
452 +
        (u32)cpu->regs[RA],
453 +
        (u32)cpu->regs[A0],
454 +
        (u32)cpu->regs[A1],
455 +
        (u32)cpu->regs[A2],
456 +
        (u32)cpu->regs[A3]
457 +
    );
458 +
459 +
    u64 fp64 = cpu->regs[FP];
460 +
    u32 fp   = (fp64 <= (u64)UINT32_MAX) ? (u32)fp64 : 0;
461 +
    u32 pc   = cpu->pc;
462 +
463 +
    fprintf(
464 +
        stderr, "         backtrace (depth %u):\n", g_opts.watch_backtrace_depth
465 +
    );
466 +
467 +
    for (u32 depth = 0; depth < g_opts.watch_backtrace_depth; depth++) {
468 +
        bool has_frame = stack_contains(fp) && fp >= (2 * WORD_SIZE);
469 +
        u32  saved_ra  = 0;
470 +
        u32  prev_fp   = 0;
471 +
472 +
        if (has_frame) {
473 +
            has_frame = load_word_safe(fp - WORD_SIZE, &saved_ra) &&
474 +
                        load_word_safe(fp - 2 * WORD_SIZE, &prev_fp) &&
475 +
                        stack_contains(prev_fp);
476 +
        }
477 +
478 +
        fprintf(
479 +
            stderr,
480 +
            "           #%u pc=%08x fp=%08x ra=%08x%s\n",
481 +
            depth,
482 +
            pc,
483 +
            fp,
484 +
            has_frame ? saved_ra : 0,
485 +
            has_frame ? "" : " (?)"
486 +
        );
487 +
488 +
        if (!has_frame || prev_fp == fp || prev_fp == 0)
489 +
            break;
490 +
491 +
        pc = saved_ra;
492 +
        fp = prev_fp;
493 +
    }
494 +
}
495 +
496 +
/* Print usage information and return 1. */
497 +
static int usage(const char *prog) {
498 +
    fprintf(
499 +
        stderr,
500 +
        "usage: %s [-run] [-no-guard-stack]"
501 +
        " [-stack-size=KB] [-no-validate] [-debug]"
502 +
        " [-trace|-trace-headless] [-trace-depth=n] [-trace-instructions]"
503 +
        " [-max-steps=n] [-memory-size=KB] [-data-size=KB]"
504 +
        " [-watch=addr] [-watch-size=bytes] [-watch-arm-pc=addr]"
505 +
        " [-watch-zero-only] [-watch-skip=n]"
506 +
        " [-watch-backtrace] [-watch-bt-depth=n]"
507 +
        " [-count-instructions] [-no-jit]"
508 +
        " <file.bin> [program args...]\n",
509 +
        prog
510 +
    );
511 +
    return 1;
512 +
}
513 +
514 +
/* Configuration parsed from CLI flags prior to launching the emulator. */
515 +
struct cli_config {
516 +
    bool        headless;
517 +
    const char *program_path;
518 +
    int         arg_index;
519 +
};
520 +
521 +
/* Parse a string as an unsigned 32-bit integer.  Returns false on error. */
522 +
static bool parse_u32(const char *str, const char *label, int base, u32 *out) {
523 +
    char *end         = NULL;
524 +
    errno             = 0;
525 +
    unsigned long val = strtoul(str, &end, base);
526 +
    if (errno != 0 || end == str || *end != '\0') {
527 +
        fprintf(stderr, "invalid %s '%s'; expected integer\n", label, str);
528 +
        return false;
529 +
    }
530 +
    if (val > UINT32_MAX)
531 +
        val = UINT32_MAX;
532 +
    *out = (u32)val;
533 +
    return true;
534 +
}
535 +
536 +
/* Parse a string as an unsigned 64-bit integer.  Returns false on error. */
537 +
static bool parse_u64(const char *str, const char *label, u64 *out) {
538 +
    char *end              = NULL;
539 +
    errno                  = 0;
540 +
    unsigned long long val = strtoull(str, &end, 10);
541 +
    if (errno != 0 || end == str || *end != '\0') {
542 +
        fprintf(stderr, "invalid %s '%s'; expected integer\n", label, str);
543 +
        return false;
544 +
    }
545 +
    *out = (u64)val;
546 +
    return true;
547 +
}
548 +
549 +
/* Parse and validate the physical memory size passed to -memory-size=. */
550 +
static bool parse_memory_size_value(const char *value) {
551 +
    u64 parsed;
552 +
    if (!parse_u64(value, "memory size", &parsed))
553 +
        return false;
554 +
    u64 bytes = parsed * 1024;
555 +
    if (bytes <= (u64)(DATA_MEMORY_START + WORD_SIZE)) {
556 +
        fprintf(
557 +
            stderr,
558 +
            "memory size too small; minimum is %u KB\n",
559 +
            (DATA_MEMORY_START + WORD_SIZE + 1024) / 1024
560 +
        );
561 +
        return false;
562 +
    }
563 +
    if (bytes > (u64)MEMORY_SIZE) {
564 +
        fprintf(
565 +
            stderr,
566 +
            "memory size too large; maximum is %u KB (recompile emulator "
567 +
            "to increase)\n",
568 +
            MEMORY_SIZE / 1024
569 +
        );
570 +
        return false;
571 +
    }
572 +
    g_opts.memory_size = (u32)bytes;
573 +
    return true;
574 +
}
575 +
576 +
/* Parse and validate the depth passed to -trace-depth=. */
577 +
static bool parse_trace_depth_value(const char *value) {
578 +
    u32 parsed;
579 +
    if (!parse_u32(value, "trace depth", 10, &parsed))
580 +
        return false;
581 +
    if (parsed == 0) {
582 +
        fprintf(stderr, "trace depth must be greater than zero\n");
583 +
        return false;
584 +
    }
585 +
    if (parsed > TRACE_HISTORY)
586 +
        parsed = TRACE_HISTORY;
587 +
    g_opts.trace_depth = parsed;
588 +
    return true;
589 +
}
590 +
591 +
/* Parse and validate the step limit passed to -max-steps=. */
592 +
static bool parse_max_steps_value(const char *value) {
593 +
    u64 parsed;
594 +
    if (!parse_u64(value, "max steps", &parsed))
595 +
        return false;
596 +
    if (parsed == 0) {
597 +
        fprintf(stderr, "max steps must be greater than zero\n");
598 +
        return false;
599 +
    }
600 +
    g_opts.headless_max_steps = parsed;
601 +
    return true;
602 +
}
603 +
604 +
/* Parse and validate the stack size passed to -stack-size=. */
605 +
static bool parse_stack_size_value(const char *value) {
606 +
    u64 parsed;
607 +
    if (!parse_u64(value, "stack size", &parsed))
608 +
        return false;
609 +
    if (parsed == 0) {
610 +
        fprintf(stderr, "stack size must be greater than zero\n");
611 +
        return false;
612 +
    }
613 +
    u64 bytes = parsed * 1024;
614 +
    if (bytes >= MEMORY_SIZE) {
615 +
        fprintf(
616 +
            stderr,
617 +
            "stack size too large; maximum is %u KB\n",
618 +
            MEMORY_SIZE / 1024
619 +
        );
620 +
        return false;
621 +
    }
622 +
    g_opts.stack_size = sanitize_stack_bytes((u32)bytes);
623 +
    return true;
624 +
}
625 +
626 +
/* Parse and validate the data size passed to -data-size=. */
627 +
static bool parse_data_size_value(const char *value) {
628 +
    u64 parsed;
629 +
    if (!parse_u64(value, "data size", &parsed))
630 +
        return false;
631 +
    if (parsed == 0) {
632 +
        fprintf(stderr, "data size must be greater than zero\n");
633 +
        return false;
634 +
    }
635 +
    u64 bytes = parsed * 1024;
636 +
    if (bytes > (u64)MEMORY_SIZE) {
637 +
        fprintf(
638 +
            stderr,
639 +
            "data size too large; maximum is %u KB\n",
640 +
            MEMORY_SIZE / 1024
641 +
        );
642 +
        return false;
643 +
    }
644 +
    g_opts.data_memory_size = (u32)bytes;
645 +
    return true;
646 +
}
647 +
648 +
/* Validate that the stack fits within available memory. */
649 +
static bool validate_memory_layout(void) {
650 +
    if (g_opts.stack_size >= g_opts.memory_size) {
651 +
        fprintf(
652 +
            stderr,
653 +
            "stack size (%u) must be smaller than memory size (%u)\n",
654 +
            g_opts.stack_size,
655 +
            g_opts.memory_size
656 +
        );
657 +
        return false;
658 +
    }
659 +
    return true;
660 +
}
661 +
662 +
/* Parse emulator CLI arguments, returning the selected mode and file path. */
663 +
static bool parse_cli_args(int argc, char *argv[], struct cli_config *cfg) {
664 +
    bool headless = false;
665 +
    int  argi     = 1;
666 +
667 +
    while (argi < argc) {
668 +
        const char *arg = argv[argi];
669 +
670 +
        if (strcmp(arg, "--") == 0) {
671 +
            argi++;
672 +
            break;
673 +
        }
674 +
        if (arg[0] != '-')
675 +
            break;
676 +
677 +
        if (strcmp(arg, "-run") == 0) {
678 +
            headless = true;
679 +
            argi++;
680 +
            continue;
681 +
        }
682 +
683 +
        if (strncmp(arg, "-stack-size=", 12) == 0) {
684 +
            if (!parse_stack_size_value(arg + 12))
685 +
                return false;
686 +
            argi++;
687 +
            continue;
688 +
        }
689 +
        if (strcmp(arg, "-no-guard-stack") == 0) {
690 +
            g_opts.stack_guard = false;
691 +
            argi++;
692 +
            continue;
693 +
        }
694 +
        if (strcmp(arg, "-no-validate") == 0) {
695 +
            g_opts.validate_memory = false;
696 +
            argi++;
697 +
            continue;
698 +
        }
699 +
        if (strcmp(arg, "-debug") == 0) {
700 +
            g_opts.debug_enabled = true;
701 +
            argi++;
702 +
            continue;
703 +
        }
704 +
        if (strcmp(arg, "-trace") == 0 || strcmp(arg, "-trace-headless") == 0) {
705 +
            g_opts.trace_enabled = true;
706 +
            argi++;
707 +
            continue;
708 +
        }
709 +
        if (strcmp(arg, "-trace-instructions") == 0) {
710 +
            g_opts.trace_print_instructions = true;
711 +
            argi++;
712 +
            continue;
713 +
        }
714 +
        if (strncmp(arg, "-trace-depth=", 13) == 0) {
715 +
            if (!parse_trace_depth_value(arg + 13))
716 +
                return false;
717 +
            argi++;
718 +
            continue;
719 +
        }
720 +
        if (strncmp(arg, "-max-steps=", 11) == 0) {
721 +
            if (!parse_max_steps_value(arg + 11))
722 +
                return false;
723 +
            argi++;
724 +
            continue;
725 +
        }
726 +
        if (strncmp(arg, "-memory-size=", 13) == 0) {
727 +
            if (!parse_memory_size_value(arg + 13))
728 +
                return false;
729 +
            argi++;
730 +
            continue;
731 +
        }
732 +
        if (strncmp(arg, "-data-size=", 11) == 0) {
733 +
            if (!parse_data_size_value(arg + 11))
734 +
                return false;
735 +
            argi++;
736 +
            continue;
737 +
        }
738 +
        if (strncmp(arg, "-watch=", 7) == 0) {
739 +
            if (!parse_u32(arg + 7, "watch address", 0, &g_opts.watch_addr))
740 +
                return false;
741 +
            g_opts.watch_enabled = true;
742 +
            argi++;
743 +
            continue;
744 +
        }
745 +
        if (strncmp(arg, "-watch-size=", 12) == 0) {
746 +
            if (!parse_u32(arg + 12, "watch size", 0, &g_opts.watch_size))
747 +
                return false;
748 +
            if (g_opts.watch_size == 0) {
749 +
                fprintf(stderr, "watch size must be greater than zero\n");
750 +
                return false;
751 +
            }
752 +
            argi++;
753 +
            continue;
754 +
        }
755 +
        if (strcmp(arg, "-watch-zero-only") == 0) {
756 +
            g_opts.watch_zero_only = true;
757 +
            argi++;
758 +
            continue;
759 +
        }
760 +
        if (strncmp(arg, "-watch-skip=", 12) == 0) {
761 +
            if (!parse_u32(arg + 12, "watch skip", 0, &g_opts.watch_skip))
762 +
                return false;
763 +
            argi++;
764 +
            continue;
765 +
        }
766 +
        if (strcmp(arg, "-watch-disable") == 0) {
767 +
            g_opts.watch_enabled = false;
768 +
            argi++;
769 +
            continue;
770 +
        }
771 +
        if (strncmp(arg, "-watch-arm-pc=", 14) == 0) {
772 +
            if (!parse_u32(arg + 14, "watch arm pc", 0, &g_opts.watch_arm_pc))
773 +
                return false;
774 +
            argi++;
775 +
            continue;
776 +
        }
777 +
        if (strcmp(arg, "-watch-backtrace") == 0) {
778 +
            g_opts.watch_backtrace = true;
779 +
            argi++;
780 +
            continue;
781 +
        }
782 +
        if (strncmp(arg, "-watch-bt-depth=", 16) == 0) {
783 +
            u32 depth;
784 +
            if (!parse_u32(arg + 16, "watch backtrace depth", 0, &depth))
785 +
                return false;
786 +
            if (depth == 0) {
787 +
                fprintf(
788 +
                    stderr, "watch backtrace depth must be greater than zero\n"
789 +
                );
790 +
                return false;
791 +
            }
792 +
            g_opts.watch_backtrace       = true;
793 +
            g_opts.watch_backtrace_depth = depth;
794 +
            argi++;
795 +
            continue;
796 +
        }
797 +
        if (strcmp(arg, "-count-instructions") == 0) {
798 +
            g_opts.count_instructions = true;
799 +
            argi++;
800 +
            continue;
801 +
        }
802 +
        if (strcmp(arg, "-no-jit") == 0) {
803 +
            g_opts.jit_disabled = true;
804 +
            argi++;
805 +
            continue;
806 +
        }
807 +
        usage(argv[0]);
808 +
809 +
        return false;
810 +
    }
811 +
    if (argi >= argc) {
812 +
        usage(argv[0]);
813 +
        return false;
814 +
    }
815 +
    cfg->program_path = argv[argi++];
816 +
    cfg->arg_index    = argi;
817 +
    cfg->headless     = headless;
818 +
    if (g_opts.watch_enabled && g_opts.watch_size == 0)
819 +
        g_opts.watch_size = 4;
820 +
    if (!validate_memory_layout())
821 +
        return false;
822 +
823 +
    g_opts.stack_size = sanitize_stack_bytes(g_opts.stack_size);
824 +
825 +
    return true;
826 +
}
827 +
828 +
/* Validate a load or store against memory bounds and stack guards. */
829 +
static bool validate_memory_access(
830 +
    struct cpu *cpu,
831 +
    u64         addr,
832 +
    u32         size,
833 +
    reg_t       base_reg,
834 +
    const char *op,
835 +
    bool        is_store
836 +
) {
837 +
    /* Skip validation for performance if disabled. */
838 +
    if (!g_opts.validate_memory)
839 +
        return true;
840 +
841 +
    if (size == 0)
842 +
        size = 1;
843 +
844 +
    const char *kind = is_store ? "store" : "load";
845 +
846 +
    u64 span_end = addr + (u64)size;
847 +
    if (addr > (u64)g_opts.memory_size || span_end > (u64)g_opts.memory_size) {
848 +
        printf(
849 +
            "Memory %s out of bounds at PC=%08x: addr=%016llx size=%u (%s)\n",
850 +
            kind,
851 +
            cpu->pc,
852 +
            (unsigned long long)addr,
853 +
            size,
854 +
            op
855 +
        );
856 +
        cpu->running = false;
857 +
        emit_fault_diagnostics(cpu, cpu->pc);
858 +
        return false;
859 +
    }
860 +
861 +
    u32 addr32 = (u32)addr;
862 +
    u32 end    = (u32)(span_end - 1);
863 +
864 +
    if (addr32 < DATA_MEMORY_START) {
865 +
        if (is_store) {
866 +
            printf(
867 +
                "Read-only memory store at PC=%08x: addr=%08x size=%u (%s)\n",
868 +
                cpu->pc,
869 +
                addr32,
870 +
                size,
871 +
                op
872 +
            );
873 +
            cpu->running = false;
874 +
            emit_fault_diagnostics(cpu, cpu->pc);
875 +
            return false;
876 +
        }
877 +
        return true;
878 +
    }
879 +
880 +
    u32  guard    = stack_guard_bytes();
881 +
    u64  base_val = cpu->regs[base_reg];
882 +
    bool base_in_stack =
883 +
        base_val <= (u64)UINT32_MAX && stack_contains((u32)base_val);
884 +
    bool start_in_stack = stack_contains(addr32);
885 +
    bool end_in_stack   = stack_contains(end);
886 +
887 +
    if (base_in_stack || start_in_stack || end_in_stack) {
888 +
        u32 bottom = stack_bottom();
889 +
        if (addr32 < bottom || end > memory_top()) {
890 +
            printf(
891 +
                "Stack %s out of bounds at PC=%08x: base=%s (0x%08x) addr=%08x "
892 +
                "size=%u (%s)\n",
893 +
                kind,
894 +
                cpu->pc,
895 +
                reg_names[base_reg],
896 +
                (u32)cpu->regs[base_reg],
897 +
                addr32,
898 +
                size,
899 +
                op
900 +
            );
901 +
            cpu->running = false;
902 +
            emit_fault_diagnostics(cpu, cpu->pc);
903 +
            return false;
904 +
        }
905 +
        if (stack_guard_overlaps(guard, addr32, end)) {
906 +
            printf(
907 +
                "Stack guard %s violation at PC=%08x: base=%s (0x%08x) "
908 +
                "addr=%08x "
909 +
                "size=%u guard=%u (%s)\n",
910 +
                kind,
911 +
                cpu->pc,
912 +
                reg_names[base_reg],
913 +
                (u32)cpu->regs[base_reg],
914 +
                addr32,
915 +
                size,
916 +
                guard,
917 +
                op
918 +
            );
919 +
            cpu->running = false;
920 +
            emit_fault_diagnostics(cpu, cpu->pc);
921 +
            return false;
922 +
        }
923 +
    }
924 +
    return true;
925 +
}
926 +
927 +
/* Validate that a register holds a valid stack address. */
928 +
static bool validate_stack_register(
929 +
    struct cpu *cpu, reg_t reg, const char *label, u32 pc, bool optional
930 +
) {
931 +
    /* Skip validation for performance if disabled. */
932 +
    if (!g_opts.validate_memory)
933 +
        return true;
934 +
935 +
    u64 value = cpu->regs[reg];
936 +
937 +
    if (optional && value == 0)
938 +
        return true;
939 +
940 +
    /* Detect addresses with upper bits set -- these can never be valid
941 +
     * stack addresses in the emulator's physical memory. */
942 +
    if (value > (u64)UINT32_MAX || (u32)value < stack_bottom() ||
943 +
        (u32)value > memory_top()) {
944 +
        printf(
945 +
            "%s (%s) out of stack bounds at PC=%08x: value=%016llx\n",
946 +
            label,
947 +
            reg_names[reg],
948 +
            pc,
949 +
            (unsigned long long)value
950 +
        );
951 +
        cpu->running = false;
952 +
        emit_fault_diagnostics(cpu, pc);
953 +
        return false;
954 +
    }
955 +
956 +
    u32 guard = stack_guard_bytes();
957 +
958 +
    if (stack_guard_contains(guard, (u32)value)) {
959 +
        printf(
960 +
            "Stack guard triggered by %s (%s) at PC=%08x: value=%08x "
961 +
            "guard=%u\n",
962 +
            label,
963 +
            reg_names[reg],
964 +
            pc,
965 +
            (u32)value,
966 +
            guard
967 +
        );
968 +
        cpu->running = false;
969 +
        emit_fault_diagnostics(cpu, pc);
970 +
        return false;
971 +
    }
972 +
    return true;
973 +
}
974 +
975 +
/* Toggle stack guarding in the TUI and re-validate live stack registers. */
976 +
static void toggle_stack_guard(struct cpu *cpu) {
977 +
    g_opts.stack_guard = !g_opts.stack_guard;
978 +
979 +
    printf(
980 +
        "\nStack guard %s (%u bytes)\n",
981 +
        g_opts.stack_guard ? "enabled" : "disabled",
982 +
        STACK_GUARD_BYTES
983 +
    );
984 +
985 +
    if (g_opts.stack_guard && cpu->running) {
986 +
        validate_stack_register(cpu, SP, "SP", cpu->pc, false);
987 +
        if (cpu->running) {
988 +
            validate_stack_register(cpu, FP, "FP", cpu->pc, true);
989 +
        }
990 +
    }
991 +
}
992 +
993 +
/* Get terminal dimensions. */
994 +
static struct termsize termsize(void) {
995 +
    struct winsize  w;
996 +
    struct termsize size = { 24, 80 }; /* Default fallback. */
997 +
998 +
    if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) != -1) {
999 +
        size.rows = w.ws_row;
1000 +
        size.cols = w.ws_col;
1001 +
    }
1002 +
    return size;
1003 +
}
1004 +
1005 +
/* Take a snapshot of the current CPU and memory state. */
1006 +
static void snapshot_save(struct cpu *cpu) {
1007 +
    int nexti = (snapshots.head + 1 + MAX_SNAPSHOTS) % MAX_SNAPSHOTS;
1008 +
1009 +
    memcpy(&snapshots.snapshots[nexti].cpu, cpu, sizeof(struct cpu));
1010 +
    memcpy(snapshots.snapshots[nexti].memory, memory, g_opts.memory_size);
1011 +
1012 +
    /* Fix the program pointer to reference the snapshot's own memory. */
1013 +
    snapshots.snapshots[nexti].cpu.program =
1014 +
        (instr_t *)snapshots.snapshots[nexti].memory;
1015 +
1016 +
    snapshots.head = nexti;
1017 +
    if (snapshots.count < MAX_SNAPSHOTS)
1018 +
        snapshots.count++;
1019 +
}
1020 +
1021 +
/* Restore the most recent snapshot, returning false if none remain. */
1022 +
static bool snapshot_restore(struct cpu *cpu) {
1023 +
    if (snapshots.count <= 1)
1024 +
        return false;
1025 +
1026 +
    snapshots.head = (snapshots.head + MAX_SNAPSHOTS - 1) % MAX_SNAPSHOTS;
1027 +
    snapshots.count--;
1028 +
1029 +
    int previ = snapshots.head;
1030 +
    memcpy(cpu, &snapshots.snapshots[previ].cpu, sizeof(struct cpu));
1031 +
    memcpy(memory, snapshots.snapshots[previ].memory, g_opts.memory_size);
1032 +
1033 +
    /* Fix the program pointer to reference the live memory buffer. */
1034 +
    cpu->program = (instr_t *)memory;
1035 +
1036 +
    return true;
1037 +
}
1038 +
1039 +
/* Initialize the snapshot buffer with an initial snapshot. */
1040 +
static void snapshot_init(struct cpu *cpu) {
1041 +
    snapshots.head  = -1;
1042 +
    snapshots.count = 0;
1043 +
    snapshot_save(cpu);
1044 +
}
1045 +
1046 +
/* Reset the headless instruction trace buffer. */
1047 +
static void trace_reset(void) {
1048 +
    headless_trace.head  = -1;
1049 +
    headless_trace.count = 0;
1050 +
}
1051 +
1052 +
/* Record the current instruction into the trace ring buffer. */
1053 +
static void trace_record(struct cpu *cpu, instr_t ins) {
1054 +
    if (!g_opts.trace_enabled || !g_opts.trace_headless)
1055 +
        return;
1056 +
1057 +
    int next = (headless_trace.head + 1 + TRACE_HISTORY) % TRACE_HISTORY;
1058 +
1059 +
    headless_trace.head                = next;
1060 +
    headless_trace.entries[next].pc    = cpu->pc;
1061 +
    headless_trace.entries[next].instr = ins;
1062 +
    memcpy(headless_trace.entries[next].regs, cpu->regs, sizeof(cpu->regs));
1063 +
    if (headless_trace.count < TRACE_HISTORY)
1064 +
        headless_trace.count++;
1065 +
}
1066 +
1067 +
/* Dump the headless instruction trace to stdout. */
1068 +
static bool trace_dump(u32 fault_pc) {
1069 +
    if (!g_opts.trace_enabled || !g_opts.trace_headless ||
1070 +
        headless_trace.count == 0)
1071 +
        return false;
1072 +
1073 +
    int limit = (int)g_opts.trace_depth;
1074 +
    if (limit <= 0)
1075 +
        limit = FAULT_TRACE_DEPTH;
1076 +
    if (limit > TRACE_HISTORY)
1077 +
        limit = TRACE_HISTORY;
1078 +
    if (limit > headless_trace.count)
1079 +
        limit = headless_trace.count;
1080 +
1081 +
    printf("Headless trace (newest first):\n");
1082 +
    for (int i = 0; i < limit; i++) {
1083 +
        int idx = (headless_trace.head - i + TRACE_HISTORY) % TRACE_HISTORY;
1084 +
        struct trace_entry *entry = &headless_trace.entries[idx];
1085 +
        char                istr[MAX_INSTR_STR_LEN] = { 0 };
1086 +
1087 +
        sprint_instr(entry->instr, istr, true);
1088 +
1089 +
        printf(
1090 +
            "  [%d] PC=%08x %s%s\n",
1091 +
            i,
1092 +
            entry->pc,
1093 +
            istr,
1094 +
            (entry->pc == fault_pc) ? "  <-- fault" : ""
1095 +
        );
1096 +
        printf(
1097 +
            "       SP=%08x FP=%08x RA=%08x A0=%08x A1=%08x A2=%08x\n",
1098 +
            (u32)entry->regs[SP],
1099 +
            (u32)entry->regs[FP],
1100 +
            (u32)entry->regs[RA],
1101 +
            (u32)entry->regs[A0],
1102 +
            (u32)entry->regs[A1],
1103 +
            (u32)entry->regs[A2]
1104 +
        );
1105 +
    }
1106 +
    return true;
1107 +
}
1108 +
1109 +
/* Dump recent snapshot history to stdout for fault diagnostics. */
1110 +
static bool snapshot_dump_history(struct cpu *cpu, u32 fault_pc) {
1111 +
    (void)cpu;
1112 +
    if (snapshots.count == 0)
1113 +
        return false;
1114 +
1115 +
    int limit = FAULT_TRACE_DEPTH;
1116 +
    if (limit > snapshots.count)
1117 +
        limit = snapshots.count;
1118 +
1119 +
    printf("Snapshot history (newest first):\n");
1120 +
    for (int i = 0; i < limit; i++) {
1121 +
        int idx = (snapshots.head - i + MAX_SNAPSHOTS) % MAX_SNAPSHOTS;
1122 +
        struct snapshot *snap    = &snapshots.snapshots[idx];
1123 +
        u32              next_pc = snap->cpu.pc;
1124 +
        u32  exec_pc = next_pc >= INSTR_SIZE ? next_pc - INSTR_SIZE : next_pc;
1125 +
        char istr[MAX_INSTR_STR_LEN] = { 0 };
1126 +
        u32  instr_index             = exec_pc / INSTR_SIZE;
1127 +
1128 +
        if (instr_index < snap->cpu.programsize) {
1129 +
            sprint_instr(snap->cpu.program[instr_index], istr, true);
1130 +
        } else {
1131 +
            snprintf(istr, sizeof(istr), "<pc %08x>", exec_pc);
1132 +
        }
1133 +
1134 +
        printf(
1135 +
            "  [%d] PC next=%08x prev=%08x %s%s\n",
1136 +
            i,
1137 +
            next_pc,
1138 +
            exec_pc,
1139 +
            istr,
1140 +
            (exec_pc == fault_pc || next_pc == fault_pc) ? "  <-- fault" : ""
1141 +
        );
1142 +
        printf(
1143 +
            "       SP=%08x FP=%08x RA=%08x A0=%08x\n",
1144 +
            (u32)snap->cpu.regs[SP],
1145 +
            (u32)snap->cpu.regs[FP],
1146 +
            (u32)snap->cpu.regs[RA],
1147 +
            (u32)snap->cpu.regs[A0]
1148 +
        );
1149 +
    }
1150 +
    return true;
1151 +
}
1152 +
1153 +
/* Emit runtime fault diagnostics including trace and snapshot history. */
1154 +
static void emit_fault_diagnostics(struct cpu *cpu, u32 pc) {
1155 +
    if (cpu->faulted)
1156 +
        return;
1157 +
1158 +
    cpu->faulted = true;
1159 +
1160 +
    printf("\n--- runtime fault diagnostics ---\n");
1161 +
    bool printed = false;
1162 +
1163 +
    printed |= trace_dump(pc);
1164 +
    printed |= snapshot_dump_history(cpu, pc);
1165 +
1166 +
    if (!printed) {
1167 +
        printf("No trace data available.\n");
1168 +
    }
1169 +
    printf("--- end diagnostics ---\n");
1170 +
    fflush(stdout);
1171 +
}
1172 +
1173 +
/* Return true if the CPU's PC is outside the loaded program bounds. */
1174 +
static inline bool cpu_out_of_bounds(struct cpu *cpu) {
1175 +
    if (!g_opts.validate_memory)
1176 +
        return false;
1177 +
    if (program_bytes == 0)
1178 +
        return true;
1179 +
    if (cpu->pc < program_base)
1180 +
        return true;
1181 +
    return (cpu->pc - program_base) >= program_bytes;
1182 +
}
1183 +
1184 +
/* Last executed PC, used for detecting branches/jumps in trace mode. */
1185 +
static u32 last_executed_pc = 0;
1186 +
1187 +
/* Reset CPU state (keeping program loaded). */
1188 +
static void cpu_reset(struct cpu *cpu) {
1189 +
    trace_reset();
1190 +
    memset(cpu->regs, 0, sizeof(cpu->regs));
1191 +
1192 +
    /* Set SP to the top of the usable stack, aligned to 16 bytes
1193 +
     * as required by the RISC-V ABI. */
1194 +
    cpu->regs[SP]    = stack_usable_top() & ~0xF;
1195 +
    cpu->pc          = program_base;
1196 +
    cpu->running     = true;
1197 +
    cpu->faulted     = false;
1198 +
    cpu->ebreak      = false;
1199 +
    cpu->modified    = ZERO;
1200 +
    last_executed_pc = 0;
1201 +
}
1202 +
1203 +
/* Initialize CPU and memory to a clean state. */
1204 +
static void cpu_init(struct cpu *cpu) {
1205 +
    memset(memory, 0, g_opts.memory_size);
1206 +
    cpu->program     = (instr_t *)memory;
1207 +
    cpu->programsize = 0;
1208 +
    trace_reset();
1209 +
    guest_fd_table_init();
1210 +
    cpu_reset(cpu);
1211 +
}
1212 +
1213 +
/* Open a file via the openat syscall (56). */
1214 +
static i32 ecall_openat(u32 pathname_addr, i32 flags) {
1215 +
    if (pathname_addr >= g_opts.memory_size)
1216 +
        return -1;
1217 +
1218 +
    /* Find the null terminator to validate the string is in bounds. */
1219 +
    u32 path_end = pathname_addr;
1220 +
    while (path_end < g_opts.memory_size && memory[path_end] != 0)
1221 +
        path_end++;
1222 +
    if (path_end >= g_opts.memory_size)
1223 +
        return -1;
1224 +
1225 +
    i32 host_fd = open((const char *)&memory[pathname_addr], flags, 0644);
1226 +
    if (host_fd < 0)
1227 +
        return -1;
1228 +
1229 +
    i32 guest_fd = guest_fd_table_add(host_fd);
1230 +
    if (guest_fd < 0) {
1231 +
        close(host_fd);
1232 +
        return -1;
1233 +
    }
1234 +
    return guest_fd;
1235 +
}
1236 +
1237 +
/* Close a file descriptor via the close syscall (57). */
1238 +
static i32 ecall_close(i32 guest_fd) {
1239 +
    /* Don't close standard streams. */
1240 +
    if (guest_fd < 3)
1241 +
        return 0;
1242 +
1243 +
    i32 host_fd = guest_fd_table_get(guest_fd);
1244 +
    if (host_fd >= 0) {
1245 +
        i32 result = close(host_fd);
1246 +
        guest_fd_table_remove(guest_fd);
1247 +
        return result;
1248 +
    }
1249 +
    return -1;
1250 +
}
1251 +
1252 +
/* Load a binary section from disk into emulator memory at the given offset. */
1253 +
static u32 load_section(
1254 +
    const char *filepath,
1255 +
    const char *suffix,
1256 +
    u32         offset,
1257 +
    u32         limit,
1258 +
    const char *label
1259 +
) {
1260 +
    char path[PATH_MAX];
1261 +
    snprintf(path, sizeof(path), "%s.%s", filepath, suffix);
1262 +
1263 +
    FILE *file = fopen(path, "rb");
1264 +
    if (!file)
1265 +
        return 0;
1266 +
1267 +
    if (fseek(file, 0, SEEK_END) != 0) {
1268 +
        fclose(file);
1269 +
        bail("failed to seek %s section", label);
1270 +
    }
1271 +
    long size = ftell(file);
1272 +
    if (size < 0) {
1273 +
        fclose(file);
1274 +
        bail("failed to determine size of %s section", label);
1275 +
    }
1276 +
    if (fseek(file, 0, SEEK_SET) != 0) {
1277 +
        fclose(file);
1278 +
        bail("failed to rewind %s section", label);
1279 +
    }
1280 +
    if (size == 0) {
1281 +
        fclose(file);
1282 +
        return 0;
1283 +
    }
1284 +
1285 +
    u32 u_size = (u32)size;
1286 +
    u64 end    = (u64)offset + (u64)u_size;
1287 +
    if (end > (u64)limit) {
1288 +
        fclose(file);
1289 +
        u32 max_size = limit - offset;
1290 +
        bail(
1291 +
            "%s section too large for emulator memory: required %u bytes, max "
1292 +
            "%u bytes",
1293 +
            label,
1294 +
            u_size,
1295 +
            max_size
1296 +
        );
1297 +
    }
1298 +
    if (end > (u64)g_opts.memory_size) {
1299 +
        fclose(file);
1300 +
        u32 max_size = g_opts.memory_size - offset;
1301 +
        bail(
1302 +
            "%s section exceeds physical memory: required %u bytes at offset "
1303 +
            "%u, "
1304 +
            "but only %u bytes available (total memory=%u, use "
1305 +
            "-memory-size=... or recompile emulator with a larger "
1306 +
            "MEMORY_SIZE)",
1307 +
            label,
1308 +
            u_size,
1309 +
            offset,
1310 +
            max_size,
1311 +
            g_opts.memory_size
1312 +
        );
1313 +
    }
1314 +
    size_t read = fread(&memory[offset], 1, u_size, file);
1315 +
    fclose(file);
1316 +
1317 +
    if (read != u_size) {
1318 +
        bail(
1319 +
            "could not read entire %s section: read %zu bytes, expected %u "
1320 +
            "bytes (offset=%u, limit=%u)",
1321 +
            label,
1322 +
            read,
1323 +
            u_size,
1324 +
            offset,
1325 +
            limit
1326 +
        );
1327 +
    }
1328 +
    return u_size;
1329 +
}
1330 +
1331 +
/* Prepare the environment block (argv) on the guest stack. */
1332 +
static void prepare_env(struct cpu *cpu, int argc, char **argv) {
1333 +
    if (argc < 0 || argv == NULL)
1334 +
        argc = 0;
1335 +
1336 +
    usize bytes = 0;
1337 +
    for (int i = 0; i < argc; i++)
1338 +
        bytes += strlen(argv[i]) + 1; /* Include terminating NUL. */
1339 +
1340 +
    /* In RV64, slices are 16 bytes (8-byte ptr + 4-byte len + 4 padding). */
1341 +
    u32 slice_size       = 16;
1342 +
    u32 slice_array_size = (u32)argc * slice_size;
1343 +
    u32 base_size        = slice_size + slice_array_size;
1344 +
    u32 total_size       = align(base_size + (u32)bytes, 16);
1345 +
1346 +
    /* Place the env block below the current stack pointer so it doesn't
1347 +
     * overlap with uninitialized static data.  The .rw.data file only
1348 +
     * contains initialized statics; undefined statics occupy memory after
1349 +
     * the loaded data but are not in the file.  Placing the env block in
1350 +
     * the data region would clobber those zero-initialized areas. */
1351 +
    u32 sp       = (u32)cpu->regs[SP];
1352 +
    u32 env_addr = (sp - total_size) & ~0xFu;
1353 +
1354 +
    if (env_addr <= DATA_MEMORY_START + data_bytes)
1355 +
        bail("not enough memory to prepare environment block");
1356 +
1357 +
    /* Move SP below the env block so the program's stack doesn't overwrite it.
1358 +
     */
1359 +
    cpu->regs[SP] = env_addr;
1360 +
1361 +
    u32 slices_addr  = env_addr + slice_size;
1362 +
    u32 strings_addr = slices_addr + (argc > 0 ? slice_array_size : 0);
1363 +
1364 +
    /* Write the Env slice header. */
1365 +
    memory_store_u64(env_addr, argc > 0 ? slices_addr : 0);
1366 +
    memory_store_u32(env_addr + 8, (u32)argc);
1367 +
1368 +
    /* Copy argument strings and populate slices. */
1369 +
    u32 curr = strings_addr;
1370 +
    for (int i = 0; i < argc; i++) {
1371 +
        size_t len = strlen(argv[i]);
1372 +
        if (curr + len >= g_opts.memory_size)
1373 +
            bail("environment string does not fit in emulator memory");
1374 +
1375 +
        memcpy(&memory[curr], argv[i], len);
1376 +
        memory[curr + len] = 0; /* Null-terminate for syscall compatibility. */
1377 +
1378 +
        u32 slice_entry = slices_addr + (u32)i * slice_size;
1379 +
        memory_store_u64(slice_entry, curr);
1380 +
        memory_store_u32(slice_entry + 8, (u32)len);
1381 +
1382 +
        curr += (u32)len + 1;
1383 +
    }
1384 +
    cpu->regs[A0] = env_addr;
1385 +
    cpu->regs[A1] = env_addr;
1386 +
}
1387 +
1388 +
/* Load debug information from the .debug file. */
1389 +
static void debug_load(const char *program_path) {
1390 +
    char debugpath[PATH_MAX];
1391 +
    snprintf(debugpath, sizeof(debugpath), "%s.debug", program_path);
1392 +
1393 +
    FILE *file = fopen(debugpath, "rb");
1394 +
    if (!file)
1395 +
        return; /* Debug file is optional. */
1396 +
1397 +
    g_debug.capacity = 64;
1398 +
    g_debug.entries  = malloc(sizeof(struct debug_entry) * g_debug.capacity);
1399 +
    g_debug.count    = 0;
1400 +
1401 +
    if (!g_debug.entries) {
1402 +
        fclose(file);
1403 +
        return;
1404 +
    }
1405 +
    while (!feof(file)) {
1406 +
        struct debug_entry entry;
1407 +
1408 +
        if (fread(&entry.pc, sizeof(u32), 1, file) != 1)
1409 +
            break;
1410 +
        if (fread(&entry.offset, sizeof(u32), 1, file) != 1)
1411 +
            break;
1412 +
1413 +
        /* Read null-terminated file path. */
1414 +
        size_t i = 0;
1415 +
        int    c;
1416 +
        while (i < PATH_MAX - 1 && (c = fgetc(file)) != EOF && c != '\0') {
1417 +
            entry.file[i++] = (char)c;
1418 +
        }
1419 +
        entry.file[i] = '\0';
1420 +
1421 +
        if (c == EOF && i == 0)
1422 +
            break;
1423 +
1424 +
        /* Grow array if needed. */
1425 +
        if (g_debug.count >= g_debug.capacity) {
1426 +
            g_debug.capacity *= 2;
1427 +
            g_debug.entries   = realloc(
1428 +
                g_debug.entries, sizeof(struct debug_entry) * g_debug.capacity
1429 +
            );
1430 +
            if (!g_debug.entries) {
1431 +
                fclose(file);
1432 +
                return;
1433 +
            }
1434 +
        }
1435 +
        g_debug.entries[g_debug.count++] = entry;
1436 +
    }
1437 +
    fclose(file);
1438 +
}
1439 +
1440 +
/* Look up source location for a given PC. */
1441 +
static struct debug_entry *debug_lookup(u32 pc) {
1442 +
    struct debug_entry *best = NULL;
1443 +
1444 +
    for (size_t i = 0; i < g_debug.count; i++) {
1445 +
        if (g_debug.entries[i].pc == pc) {
1446 +
            return &g_debug.entries[i];
1447 +
        }
1448 +
        /* Track the closest entry at or before this PC. */
1449 +
        if (g_debug.entries[i].pc <= pc) {
1450 +
            best = &g_debug.entries[i];
1451 +
        }
1452 +
    }
1453 +
    return best;
1454 +
}
1455 +
1456 +
/* Compute the line number from a file path and byte offset. */
1457 +
static int line_from_offset(const char *filepath, u32 offset) {
1458 +
    FILE *file = fopen(filepath, "r");
1459 +
    if (!file)
1460 +
        return 0;
1461 +
1462 +
    u32 line = 1;
1463 +
    for (u32 i = 0; i < offset; i++) {
1464 +
        int c = fgetc(file);
1465 +
        if (c == EOF)
1466 +
            break;
1467 +
        if (c == '\n')
1468 +
            line++;
1469 +
    }
1470 +
    fclose(file);
1471 +
1472 +
    return line;
1473 +
}
1474 +
1475 +
/* Load the program binary and data sections into memory. */
1476 +
static void program_init(struct cpu *cpu, const char *filepath) {
1477 +
    program_bytes = 0;
1478 +
    data_bytes    = 0;
1479 +
    rodata_bytes  = load_section(
1480 +
        filepath, "ro.data", DATA_RO_OFFSET, DATA_MEMORY_START, "ro.data"
1481 +
    );
1482 +
    program_base = align(DATA_RO_OFFSET + rodata_bytes, WORD_SIZE);
1483 +
1484 +
    if (g_opts.debug_enabled)
1485 +
        debug_load(filepath);
1486 +
1487 +
    FILE *file = fopen(filepath, "rb");
1488 +
    if (!file)
1489 +
        bail("failed to open file '%s'", filepath);
1490 +
    if (fseek(file, 0, SEEK_END) != 0) {
1491 +
        fclose(file);
1492 +
        bail("failed to seek program '%s'", filepath);
1493 +
    }
1494 +
    long size = ftell(file);
1495 +
    if (size <= 0 || size > PROGRAM_SIZE) {
1496 +
        fclose(file);
1497 +
        bail(
1498 +
            "invalid file size: %ld; maximum program size is %d bytes",
1499 +
            size,
1500 +
            PROGRAM_SIZE
1501 +
        );
1502 +
    }
1503 +
    if (program_base + (u32)size > DATA_MEMORY_START) {
1504 +
        fclose(file);
1505 +
        bail("text section exceeds available program memory");
1506 +
    }
1507 +
    if (fseek(file, 0, SEEK_SET) != 0) {
1508 +
        fclose(file);
1509 +
        bail("failed to rewind program '%s'", filepath);
1510 +
    }
1511 +
1512 +
    usize read = fread(&memory[program_base], 1, size, file);
1513 +
    fclose(file);
1514 +
    if (read != (size_t)size)
1515 +
        bail("could not read entire file");
1516 +
1517 +
    program_bytes    = (u32)size;
1518 +
    cpu->programsize = (u32)size / sizeof(instr_t);
1519 +
1520 +
    u32 data_limit = DATA_MEMORY_START + g_opts.data_memory_size;
1521 +
    if (data_limit > g_opts.memory_size)
1522 +
        data_limit = g_opts.memory_size;
1523 +
1524 +
    data_bytes = load_section(
1525 +
        filepath, "rw.data", DATA_MEMORY_START, data_limit, "rw.data"
1526 +
    );
1527 +
    cpu->pc = program_base;
1528 +
}
1529 +
1530 +
/* Execute a single instruction. */
1531 +
static void cpu_execute(struct cpu *cpu, enum display display, bool headless) {
1532 +
    if (cpu_out_of_bounds(cpu)) {
1533 +
        cpu->running = false;
1534 +
        emit_fault_diagnostics(cpu, cpu->pc);
1535 +
        if (headless) {
1536 +
            fprintf(stderr, "program is out of bounds\n");
1537 +
            return;
1538 +
        }
1539 +
        bail("program is out of bounds");
1540 +
    }
1541 +
1542 +
    u32     executed_pc = cpu->pc;
1543 +
    instr_t ins         = cpu->program[cpu->pc / sizeof(instr_t)];
1544 +
    u32     pc_next     = cpu->pc + INSTR_SIZE;
1545 +
    u32     opcode      = ins.r.opcode;
1546 +
1547 +
    cpu->modified = ZERO;
1548 +
    trace_record(cpu, ins);
1549 +
1550 +
    /* Print instruction if tracing is enabled in headless mode.
1551 +
     * Skip NOPs (addi x0, x0, 0 = 0x00000013). */
1552 +
    if (headless && g_opts.trace_print_instructions && ins.raw != 0x00000013) {
1553 +
        /* Print ellipsis if we jumped to a non-sequential instruction. */
1554 +
        if (last_executed_pc != 0 &&
1555 +
            executed_pc != last_executed_pc + INSTR_SIZE) {
1556 +
            printf("%s  :%s\n", COLOR_GREY, COLOR_RESET);
1557 +
        }
1558 +
1559 +
        char istr[MAX_INSTR_STR_LEN] = { 0 };
1560 +
        int  len                     = sprint_instr(ins, istr, true);
1561 +
        int  padding                 = INSTR_STR_LEN - len;
1562 +
        if (padding < 0)
1563 +
            padding = 0;
1564 +
        printf(
1565 +
            "%s%08x%s %s%-*s%s",
1566 +
            COLOR_GREY,
1567 +
            executed_pc,
1568 +
            COLOR_RESET,
1569 +
            istr,
1570 +
            padding,
1571 +
            "",
1572 +
            COLOR_GREY
1573 +
        );
1574 +
1575 +
        /* Print all non-zero registers. */
1576 +
        bool first = true;
1577 +
        for (int i = 0; i < REGISTERS; i++) {
1578 +
            if (cpu->regs[i] != 0) {
1579 +
                if (!first)
1580 +
                    printf(" ");
1581 +
                printf("%s=%08x", reg_names[i], (u32)cpu->regs[i]);
1582 +
                first = false;
1583 +
            }
1584 +
        }
1585 +
        printf("%s\n", COLOR_RESET);
1586 +
    }
1587 +
1588 +
    switch (opcode) {
1589 +
    case OP_LUI:
1590 +
        if (ins.u.rd != 0) {
1591 +
            u32 lui_val = ins.u.imm_31_12 << 12;
1592 +
            cpu->regs[ins.u.rd] =
1593 +
                (u64)(i64)(i32)lui_val; /* RV64: sign-extend to 64 bits. */
1594 +
            cpu->modified = ins.u.rd;
1595 +
        }
1596 +
        break;
1597 +
1598 +
    case OP_AUIPC:
1599 +
        if (ins.u.rd != 0) {
1600 +
            u32 auipc_val = ins.u.imm_31_12 << 12;
1601 +
            cpu->regs[ins.u.rd] =
1602 +
                cpu->pc +
1603 +
                (u64)(i64)(i32)auipc_val; /* RV64: sign-extend offset. */
1604 +
            cpu->modified = (reg_t)ins.u.rd;
1605 +
        }
1606 +
        break;
1607 +
1608 +
    case OP_JAL: {
1609 +
        i32 imm = get_j_imm(ins);
1610 +
        if (ins.j.rd != 0) {
1611 +
            cpu->regs[ins.j.rd] = pc_next;
1612 +
            cpu->modified       = (reg_t)ins.j.rd;
1613 +
        }
1614 +
        pc_next = cpu->pc + imm;
1615 +
        break;
1616 +
    }
1617 +
1618 +
    case OP_JALR: {
1619 +
        i32 imm = get_i_imm(ins);
1620 +
        if (ins.i.rd != 0) {
1621 +
            cpu->regs[ins.i.rd] = pc_next;
1622 +
            cpu->modified       = (reg_t)ins.i.rd;
1623 +
        }
1624 +
        /* Calculate target address in full 64-bit precision. */
1625 +
        u64 jalr_target = (cpu->regs[ins.i.rs1] + (i64)imm) & ~(u64)1;
1626 +
        /* Check if this is a RET instruction (jalr x0, ra, 0). */
1627 +
        if (ins.i.rd == 0 && ins.i.rs1 == 1 && imm == 0 && jalr_target == 0) {
1628 +
            cpu->running = false;
1629 +
            if (!headless) {
1630 +
                ui_render(cpu, display);
1631 +
1632 +
                printf(
1633 +
                    "\n%sProgram terminated with return value %d (0x%08x)%s ",
1634 +
                    COLOR_BOLD_GREEN,
1635 +
                    (i32)cpu->regs[A0],
1636 +
                    (u32)cpu->regs[A0],
1637 +
                    COLOR_RESET
1638 +
                );
1639 +
            }
1640 +
        } else {
1641 +
            pc_next = (u32)jalr_target;
1642 +
        }
1643 +
        break;
1644 +
    }
1645 +
1646 +
    case OP_BRANCH: {
1647 +
        bool jump = false;
1648 +
        i32  imm  = get_b_imm(ins);
1649 +
1650 +
        switch (ins.b.funct3) {
1651 +
        case FUNCT3_BYTE: /* beq.  */
1652 +
            jump = (cpu->regs[ins.b.rs1] == cpu->regs[ins.b.rs2]);
1653 +
            break;
1654 +
        case FUNCT3_HALF: /* bne.  */
1655 +
            jump = (cpu->regs[ins.b.rs1] != cpu->regs[ins.b.rs2]);
1656 +
            break;
1657 +
        case FUNCT3_BYTE_U: /* blt.  */
1658 +
            jump = ((i64)cpu->regs[ins.b.rs1] < (i64)cpu->regs[ins.b.rs2]);
1659 +
            break;
1660 +
        case FUNCT3_HALF_U: /* bge.  */
1661 +
            jump = ((i64)cpu->regs[ins.b.rs1] >= (i64)cpu->regs[ins.b.rs2]);
1662 +
            break;
1663 +
        case FUNCT3_OR: /* bltu. */
1664 +
            jump = (cpu->regs[ins.b.rs1] < cpu->regs[ins.b.rs2]);
1665 +
            break;
1666 +
        case FUNCT3_AND: /* bgeu. */
1667 +
            jump = (cpu->regs[ins.b.rs1] >= cpu->regs[ins.b.rs2]);
1668 +
            break;
1669 +
        }
1670 +
        if (jump) {
1671 +
            pc_next = cpu->pc + imm;
1672 +
        }
1673 +
        break;
1674 +
    }
1675 +
1676 +
    case OP_LOAD: {
1677 +
        i32 imm  = get_i_imm(ins);
1678 +
        u64 addr = cpu->regs[ins.i.rs1] + (i64)imm;
1679 +
1680 +
        if (ins.i.rd == ZERO)
1681 +
            break;
1682 +
1683 +
        cpu->modified = (reg_t)ins.i.rd;
1684 +
        bool fault    = false;
1685 +
1686 +
        switch (ins.i.funct3) {
1687 +
        case FUNCT3_BYTE: /* lb. */
1688 +
            if (!validate_memory_access(cpu, addr, 1, ins.i.rs1, "lb", false)) {
1689 +
                fault = true;
1690 +
                break;
1691 +
            }
1692 +
            /* sign_extend returns i32; on RV64 we sign-extend to 64 bits. */
1693 +
            cpu->regs[ins.i.rd] = (u64)(i64)sign_extend(memory[addr], 8);
1694 +
            break;
1695 +
        case FUNCT3_HALF: /* lh. */
1696 +
            if (!validate_memory_access(cpu, addr, 2, ins.i.rs1, "lh", false)) {
1697 +
                fault = true;
1698 +
                break;
1699 +
            }
1700 +
            cpu->regs[ins.i.rd] =
1701 +
                (u64)(i64)sign_extend(memory_load_u16(addr), 16);
1702 +
            break;
1703 +
        case FUNCT3_WORD: /* lw. */
1704 +
            if (!validate_memory_access(cpu, addr, 4, ins.i.rs1, "lw", false)) {
1705 +
                fault = true;
1706 +
                break;
1707 +
            }
1708 +
            /* RV64: lw sign-extends the 32-bit value to 64 bits. */
1709 +
            cpu->regs[ins.i.rd] = (u64)(i64)(i32)memory_load_u32(addr);
1710 +
            break;
1711 +
        case 0x6: /* lwu (RV64). */
1712 +
            if (!validate_memory_access(
1713 +
                    cpu, addr, 4, ins.i.rs1, "lwu", false
1714 +
                )) {
1715 +
                fault = true;
1716 +
                break;
1717 +
            }
1718 +
            cpu->regs[ins.i.rd] = (u64)memory_load_u32(addr);
1719 +
            break;
1720 +
        case FUNCT3_BYTE_U: /* lbu. */
1721 +
            if (!validate_memory_access(
1722 +
                    cpu, addr, 1, ins.i.rs1, "lbu", false
1723 +
                )) {
1724 +
                fault = true;
1725 +
                break;
1726 +
            }
1727 +
            cpu->regs[ins.i.rd] = memory[addr];
1728 +
            break;
1729 +
        case FUNCT3_HALF_U: /* lhu. */
1730 +
            if (!validate_memory_access(
1731 +
                    cpu, addr, 2, ins.i.rs1, "lhu", false
1732 +
                )) {
1733 +
                fault = true;
1734 +
                break;
1735 +
            }
1736 +
            cpu->regs[ins.i.rd] = memory_load_u16(addr);
1737 +
            break;
1738 +
        case 0x3: /* ld (RV64). */
1739 +
            if (!validate_memory_access(cpu, addr, 8, ins.i.rs1, "ld", false)) {
1740 +
                fault = true;
1741 +
                break;
1742 +
            }
1743 +
            cpu->regs[ins.i.rd] = memory_load_u64(addr);
1744 +
            break;
1745 +
        }
1746 +
        if (fault || !cpu->running)
1747 +
            break;
1748 +
        break;
1749 +
    }
1750 +
1751 +
    case OP_STORE: {
1752 +
        i32 imm  = get_s_imm(ins);
1753 +
        u64 addr = cpu->regs[ins.s.rs1] + (i64)imm;
1754 +
1755 +
        switch (ins.s.funct3) {
1756 +
        case FUNCT3_BYTE: /* sb. */
1757 +
            if (!validate_memory_access(cpu, addr, 1, ins.s.rs1, "sb", true))
1758 +
                break;
1759 +
            watch_store(cpu, (u32)addr, 1, (u32)cpu->regs[ins.s.rs2]);
1760 +
            memory_store_u8(addr, (u8)cpu->regs[ins.s.rs2]);
1761 +
            break;
1762 +
        case FUNCT3_HALF: /* sh. */
1763 +
            if (!validate_memory_access(cpu, addr, 2, ins.s.rs1, "sh", true))
1764 +
                break;
1765 +
            watch_store(cpu, (u32)addr, 2, (u32)cpu->regs[ins.s.rs2]);
1766 +
            memory_store_u16(addr, (u16)cpu->regs[ins.s.rs2]);
1767 +
            break;
1768 +
        case FUNCT3_WORD: /* sw. */
1769 +
            if (!validate_memory_access(cpu, addr, 4, ins.s.rs1, "sw", true))
1770 +
                break;
1771 +
            watch_store(cpu, (u32)addr, 4, (u32)cpu->regs[ins.s.rs2]);
1772 +
            memory_store_u32(addr, (u32)cpu->regs[ins.s.rs2]);
1773 +
            break;
1774 +
        case 0x3: /* sd (RV64). */
1775 +
            if (!validate_memory_access(cpu, addr, 8, ins.s.rs1, "sd", true))
1776 +
                break;
1777 +
            watch_store(cpu, (u32)addr, 8, (u32)cpu->regs[ins.s.rs2]);
1778 +
            memory_store_u64(addr, cpu->regs[ins.s.rs2]);
1779 +
            break;
1780 +
        }
1781 +
        break;
1782 +
    }
1783 +
1784 +
    case OP_IMM: {
1785 +
        i32 imm        = get_i_imm(ins);
1786 +
        u32 shamt_mask = 0x3F; /* RV64: 6-bit shift amounts. */
1787 +
1788 +
        if (ins.i.rd == ZERO)
1789 +
            break;
1790 +
1791 +
        cpu->modified = (reg_t)ins.i.rd;
1792 +
1793 +
        switch (ins.i.funct3) {
1794 +
        case FUNCT3_ADD: /* addi.  */
1795 +
            cpu->regs[ins.i.rd] = cpu->regs[ins.i.rs1] + imm;
1796 +
            break;
1797 +
        case FUNCT3_SLL: /* slli.  */
1798 +
            cpu->regs[ins.i.rd] = cpu->regs[ins.i.rs1] << (imm & shamt_mask);
1799 +
            break;
1800 +
        case FUNCT3_SLT: /* slti.  */
1801 +
            cpu->regs[ins.i.rd] =
1802 +
                ((i64)cpu->regs[ins.i.rs1] < (i64)imm) ? 1 : 0;
1803 +
            break;
1804 +
        case FUNCT3_SLTU: /* sltiu. */
1805 +
            cpu->regs[ins.i.rd] =
1806 +
                (cpu->regs[ins.i.rs1] < (u64)(i64)imm) ? 1 : 0;
1807 +
            break;
1808 +
        case FUNCT3_XOR: /* xori.  */
1809 +
            cpu->regs[ins.i.rd] = cpu->regs[ins.i.rs1] ^ imm;
1810 +
            break;
1811 +
        case FUNCT3_SRL: /* srli/srai. */
1812 +
            if ((imm & 0x400) == 0) {
1813 +
                /* srli -- logical right shift. */
1814 +
                cpu->regs[ins.i.rd] =
1815 +
                    cpu->regs[ins.i.rs1] >> (imm & shamt_mask);
1816 +
            } else {
1817 +
                /* srai -- arithmetic right shift. */
1818 +
                cpu->regs[ins.i.rd] =
1819 +
                    (u64)((i64)cpu->regs[ins.i.rs1] >> (imm & shamt_mask));
1820 +
            }
1821 +
            break;
1822 +
        case FUNCT3_OR: /* ori.   */
1823 +
            cpu->regs[ins.i.rd] = cpu->regs[ins.i.rs1] | imm;
1824 +
            break;
1825 +
        case FUNCT3_AND: /* andi.  */
1826 +
            cpu->regs[ins.i.rd] = cpu->regs[ins.i.rs1] & imm;
1827 +
            break;
1828 +
        }
1829 +
        break;
1830 +
    }
1831 +
1832 +
    case OP_IMM_32: {
1833 +
        /* RV64I: 32-bit immediate operations (ADDIW, SLLIW, SRLIW, SRAIW).
1834 +
         * These operate on the lower 32 bits and sign-extend the result. */
1835 +
        i32 imm = get_i_imm(ins);
1836 +
1837 +
        if (ins.i.rd == ZERO)
1838 +
            break;
1839 +
1840 +
        cpu->modified = (reg_t)ins.i.rd;
1841 +
1842 +
        switch (ins.i.funct3) {
1843 +
        case FUNCT3_ADD: { /* addiw. */
1844 +
            i32 result          = (i32)cpu->regs[ins.i.rs1] + imm;
1845 +
            cpu->regs[ins.i.rd] = (u64)(i64)result;
1846 +
            break;
1847 +
        }
1848 +
        case FUNCT3_SLL: { /* slliw. */
1849 +
            i32 result = (i32)((u32)cpu->regs[ins.i.rs1] << (imm & 0x1F));
1850 +
            cpu->regs[ins.i.rd] = (u64)(i64)result;
1851 +
            break;
1852 +
        }
1853 +
        case FUNCT3_SRL: { /* srliw/sraiw. */
1854 +
            if ((imm & 0x400) == 0) {
1855 +
                /* srliw -- logical right shift, then sign-extend. */
1856 +
                i32 result = (i32)((u32)cpu->regs[ins.i.rs1] >> (imm & 0x1F));
1857 +
                cpu->regs[ins.i.rd] = (u64)(i64)result;
1858 +
            } else {
1859 +
                /* sraiw -- arithmetic right shift, then sign-extend. */
1860 +
                i32 result          = (i32)cpu->regs[ins.i.rs1] >> (imm & 0x1F);
1861 +
                cpu->regs[ins.i.rd] = (u64)(i64)result;
1862 +
            }
1863 +
            break;
1864 +
        }
1865 +
        }
1866 +
        break;
1867 +
    }
1868 +
1869 +
    case OP_OP: {
1870 +
        if (ins.r.rd == ZERO)
1871 +
            break;
1872 +
1873 +
        cpu->modified = (reg_t)ins.r.rd;
1874 +
1875 +
        switch (ins.r.funct7) {
1876 +
        case FUNCT7_NORMAL: {
1877 +
            u32 shamt_mask = 0x3F;
1878 +
            switch (ins.r.funct3) {
1879 +
            case FUNCT3_ADD: /* add.  */
1880 +
                cpu->regs[ins.r.rd] =
1881 +
                    cpu->regs[ins.r.rs1] + cpu->regs[ins.r.rs2];
1882 +
                break;
1883 +
            case FUNCT3_SLL: /* sll.  */
1884 +
                cpu->regs[ins.r.rd] = cpu->regs[ins.r.rs1]
1885 +
                                      << (cpu->regs[ins.r.rs2] & shamt_mask);
1886 +
                break;
1887 +
            case FUNCT3_SLT: /* slt.  */
1888 +
                cpu->regs[ins.r.rd] =
1889 +
                    ((i64)cpu->regs[ins.r.rs1] < (i64)cpu->regs[ins.r.rs2]) ? 1
1890 +
                                                                            : 0;
1891 +
                break;
1892 +
            case FUNCT3_SLTU: /* sltu. */
1893 +
                cpu->regs[ins.r.rd] =
1894 +
                    (cpu->regs[ins.r.rs1] < cpu->regs[ins.r.rs2]) ? 1 : 0;
1895 +
                break;
1896 +
            case FUNCT3_XOR: /* xor.  */
1897 +
                cpu->regs[ins.r.rd] =
1898 +
                    cpu->regs[ins.r.rs1] ^ cpu->regs[ins.r.rs2];
1899 +
                break;
1900 +
            case FUNCT3_SRL: /* srl.  */
1901 +
                cpu->regs[ins.r.rd] =
1902 +
                    cpu->regs[ins.r.rs1] >> (cpu->regs[ins.r.rs2] & shamt_mask);
1903 +
                break;
1904 +
            case FUNCT3_OR: /* or.   */
1905 +
                cpu->regs[ins.r.rd] =
1906 +
                    cpu->regs[ins.r.rs1] | cpu->regs[ins.r.rs2];
1907 +
                break;
1908 +
            case FUNCT3_AND: /* and.  */
1909 +
                cpu->regs[ins.r.rd] =
1910 +
                    cpu->regs[ins.r.rs1] & cpu->regs[ins.r.rs2];
1911 +
                break;
1912 +
            }
1913 +
            break;
1914 +
        }
1915 +
1916 +
        case FUNCT7_SUB:
1917 +
            switch (ins.r.funct3) {
1918 +
            case FUNCT3_ADD: /* sub. */
1919 +
                cpu->regs[ins.r.rd] =
1920 +
                    cpu->regs[ins.r.rs1] - cpu->regs[ins.r.rs2];
1921 +
                break;
1922 +
            case FUNCT3_SRL: /* sra. */
1923 +
                cpu->regs[ins.r.rd] = (u64)((i64)cpu->regs[ins.r.rs1] >>
1924 +
                                            (cpu->regs[ins.r.rs2] & 0x3F));
1925 +
                break;
1926 +
            }
1927 +
            break;
1928 +
1929 +
        case FUNCT7_MUL:
1930 +
            switch (ins.r.funct3) {
1931 +
            case FUNCT3_ADD: /* mul.  */
1932 +
                cpu->regs[ins.r.rd] =
1933 +
                    cpu->regs[ins.r.rs1] * cpu->regs[ins.r.rs2];
1934 +
                break;
1935 +
            case FUNCT3_XOR: /* div.  */
1936 +
                if (cpu->regs[ins.r.rs2] != 0) {
1937 +
                    cpu->regs[ins.r.rd] = (u64)((i64)cpu->regs[ins.r.rs1] /
1938 +
                                                (i64)cpu->regs[ins.r.rs2]);
1939 +
                } else {
1940 +
                    cpu->regs[ins.r.rd] = (u64)-1; /* Division by zero. */
1941 +
                }
1942 +
                break;
1943 +
            case FUNCT3_SRL: /* divu. */
1944 +
                if (cpu->regs[ins.r.rs2] != 0) {
1945 +
                    cpu->regs[ins.r.rd] =
1946 +
                        cpu->regs[ins.r.rs1] / cpu->regs[ins.r.rs2];
1947 +
                } else {
1948 +
                    cpu->regs[ins.r.rd] = (u64)-1; /* Division by zero. */
1949 +
                }
1950 +
                break;
1951 +
            case FUNCT3_OR: /* rem.  */
1952 +
                if (cpu->regs[ins.r.rs2] != 0) {
1953 +
                    cpu->regs[ins.r.rd] = (u64)((i64)cpu->regs[ins.r.rs1] %
1954 +
                                                (i64)cpu->regs[ins.r.rs2]);
1955 +
                } else {
1956 +
                    cpu->regs[ins.r.rd] = cpu->regs[ins.r.rs1];
1957 +
                }
1958 +
                break;
1959 +
            case FUNCT3_AND: /* remu. */
1960 +
                if (cpu->regs[ins.r.rs2] != 0) {
1961 +
                    cpu->regs[ins.r.rd] =
1962 +
                        cpu->regs[ins.r.rs1] % cpu->regs[ins.r.rs2];
1963 +
                } else {
1964 +
                    cpu->regs[ins.r.rd] = cpu->regs[ins.r.rs1];
1965 +
                }
1966 +
                break;
1967 +
            }
1968 +
            break;
1969 +
        }
1970 +
        break;
1971 +
    }
1972 +
1973 +
    case OP_OP_32: {
1974 +
        /* RV64I: 32-bit register-register operations (ADDW, SUBW, SLLW, SRLW,
1975 +
         * SRAW, MULW, DIVW, DIVUW, REMW, REMUW). These operate on the lower 32
1976 +
         * bits and sign-extend the result to 64 bits. */
1977 +
        if (ins.r.rd == ZERO)
1978 +
            break;
1979 +
1980 +
        cpu->modified = (reg_t)ins.r.rd;
1981 +
        u32 rs1_32    = (u32)cpu->regs[ins.r.rs1];
1982 +
        u32 rs2_32    = (u32)cpu->regs[ins.r.rs2];
1983 +
1984 +
        switch (ins.r.funct7) {
1985 +
        case FUNCT7_NORMAL:
1986 +
            switch (ins.r.funct3) {
1987 +
            case FUNCT3_ADD: { /* addw. */
1988 +
                i32 result          = (i32)(rs1_32 + rs2_32);
1989 +
                cpu->regs[ins.r.rd] = (u64)(i64)result;
1990 +
                break;
1991 +
            }
1992 +
            case FUNCT3_SLL: { /* sllw. */
1993 +
                i32 result          = (i32)(rs1_32 << (rs2_32 & 0x1F));
1994 +
                cpu->regs[ins.r.rd] = (u64)(i64)result;
1995 +
                break;
1996 +
            }
1997 +
            case FUNCT3_SRL: { /* srlw. */
1998 +
                i32 result          = (i32)(rs1_32 >> (rs2_32 & 0x1F));
1999 +
                cpu->regs[ins.r.rd] = (u64)(i64)result;
2000 +
                break;
2001 +
            }
2002 +
            }
2003 +
            break;
2004 +
2005 +
        case FUNCT7_SUB:
2006 +
            switch (ins.r.funct3) {
2007 +
            case FUNCT3_ADD: { /* subw. */
2008 +
                i32 result          = (i32)(rs1_32 - rs2_32);
2009 +
                cpu->regs[ins.r.rd] = (u64)(i64)result;
2010 +
                break;
2011 +
            }
2012 +
            case FUNCT3_SRL: { /* sraw. */
2013 +
                i32 result          = (i32)rs1_32 >> (rs2_32 & 0x1F);
2014 +
                cpu->regs[ins.r.rd] = (u64)(i64)result;
2015 +
                break;
2016 +
            }
2017 +
            }
2018 +
            break;
2019 +
2020 +
        case FUNCT7_MUL:
2021 +
            switch (ins.r.funct3) {
2022 +
            case FUNCT3_ADD: { /* mulw.  */
2023 +
                i32 result          = (i32)(rs1_32 * rs2_32);
2024 +
                cpu->regs[ins.r.rd] = (u64)(i64)result;
2025 +
                break;
2026 +
            }
2027 +
            case FUNCT3_XOR: { /* divw.  */
2028 +
                if (rs2_32 != 0) {
2029 +
                    i32 result          = (i32)rs1_32 / (i32)rs2_32;
2030 +
                    cpu->regs[ins.r.rd] = (u64)(i64)result;
2031 +
                } else {
2032 +
                    cpu->regs[ins.r.rd] = (u64)(i64)(i32)-1;
2033 +
                }
2034 +
                break;
2035 +
            }
2036 +
            case FUNCT3_SRL: { /* divuw. */
2037 +
                if (rs2_32 != 0) {
2038 +
                    i32 result          = (i32)(rs1_32 / rs2_32);
2039 +
                    cpu->regs[ins.r.rd] = (u64)(i64)result;
2040 +
                } else {
2041 +
                    cpu->regs[ins.r.rd] = (u64)(i64)(i32)-1;
2042 +
                }
2043 +
                break;
2044 +
            }
2045 +
            case FUNCT3_OR: { /* remw.  */
2046 +
                if (rs2_32 != 0) {
2047 +
                    i32 result          = (i32)rs1_32 % (i32)rs2_32;
2048 +
                    cpu->regs[ins.r.rd] = (u64)(i64)result;
2049 +
                } else {
2050 +
                    cpu->regs[ins.r.rd] = (u64)(i64)(i32)rs1_32;
2051 +
                }
2052 +
                break;
2053 +
            }
2054 +
            case FUNCT3_AND: { /* remuw. */
2055 +
                if (rs2_32 != 0) {
2056 +
                    i32 result          = (i32)(rs1_32 % rs2_32);
2057 +
                    cpu->regs[ins.r.rd] = (u64)(i64)result;
2058 +
                } else {
2059 +
                    cpu->regs[ins.r.rd] = (u64)(i64)(i32)rs1_32;
2060 +
                }
2061 +
                break;
2062 +
            }
2063 +
            }
2064 +
            break;
2065 +
        }
2066 +
        break;
2067 +
    }
2068 +
2069 +
    case OP_SYSTEM: {
2070 +
        u32 funct12 = ins.i.imm_11_0;
2071 +
2072 +
        if (funct12 == 0) {
2073 +
            u32 syscall_num = (u32)cpu->regs[A7];
2074 +
2075 +
            switch (syscall_num) {
2076 +
            case 64: { /* write. */
2077 +
                int guest_fd = (int)cpu->regs[A0];
2078 +
                u64 addr     = cpu->regs[A1];
2079 +
                u64 count    = cpu->regs[A2];
2080 +
2081 +
                if (addr + count > g_opts.memory_size ||
2082 +
                    addr > (u64)g_opts.memory_size) {
2083 +
                    printf(
2084 +
                        "sys_write out of bounds: addr=%016llx len=%llu\n",
2085 +
                        (unsigned long long)addr,
2086 +
                        (unsigned long long)count
2087 +
                    );
2088 +
                    cpu->running = false;
2089 +
                    emit_fault_diagnostics(cpu, executed_pc);
2090 +
                    break;
2091 +
                }
2092 +
                ssize_t written = 0;
2093 +
                int     host_fd = guest_fd_table_get(guest_fd);
2094 +
2095 +
                if (host_fd >= 0 && count > 0) {
2096 +
                    written = write(host_fd, &memory[(u32)addr], (u32)count);
2097 +
                    if (written < 0) {
2098 +
                        written = 0;
2099 +
                    }
2100 +
                }
2101 +
                cpu->regs[A0] = (u64)written;
2102 +
                break;
2103 +
            }
2104 +
            case 63: { /* read. */
2105 +
                int guest_fd = (int)cpu->regs[A0];
2106 +
                u64 addr     = cpu->regs[A1];
2107 +
                u64 count    = cpu->regs[A2];
2108 +
2109 +
                if (addr + count > g_opts.memory_size ||
2110 +
                    addr > (u64)g_opts.memory_size) {
2111 +
                    printf(
2112 +
                        "sys_read out of bounds: addr=%016llx len=%llu\n",
2113 +
                        (unsigned long long)addr,
2114 +
                        (unsigned long long)count
2115 +
                    );
2116 +
                    cpu->running = false;
2117 +
                    emit_fault_diagnostics(cpu, executed_pc);
2118 +
                    break;
2119 +
                }
2120 +
                ssize_t read_bytes = 0;
2121 +
                int     host_fd    = guest_fd_table_get(guest_fd);
2122 +
2123 +
                if (host_fd >= 0 && count > 0) {
2124 +
                    read_bytes = read(host_fd, &memory[(u32)addr], (u32)count);
2125 +
                    if (read_bytes < 0) {
2126 +
                        read_bytes = 0;
2127 +
                    }
2128 +
                }
2129 +
                cpu->regs[A0] = (u64)read_bytes;
2130 +
                break;
2131 +
            }
2132 +
            case 93: { /* exit. */
2133 +
                cpu->running = false;
2134 +
                break;
2135 +
            }
2136 +
            case 56: { /* openat. */
2137 +
                u64 pathname_addr = cpu->regs[A1];
2138 +
                i32 flags         = (i32)cpu->regs[A2];
2139 +
                if (pathname_addr > (u64)g_opts.memory_size) {
2140 +
                    cpu->regs[A0] = (u64)(i64)(i32)-1;
2141 +
                    break;
2142 +
                }
2143 +
                cpu->regs[A0] =
2144 +
                    (u64)(i64)(i32)ecall_openat((u32)pathname_addr, flags);
2145 +
                break;
2146 +
            }
2147 +
            case 57: { /* close. */
2148 +
                i32 guest_fd  = (i32)cpu->regs[A0];
2149 +
                cpu->regs[A0] = (u64)(i64)ecall_close(guest_fd);
2150 +
                break;
2151 +
            }
2152 +
            default:
2153 +
                cpu->regs[A0] = (u32)syscall_num;
2154 +
                break;
2155 +
            }
2156 +
        } else if (funct12 == 1) {
2157 +
            /* Look up source location for this EBREAK.  PC in the debug
2158 +
             * file is relative to program start, so subtract base. */
2159 +
            u32                 relative_pc = executed_pc - program_base;
2160 +
            struct debug_entry *entry       = debug_lookup(relative_pc);
2161 +
2162 +
            printf("\n%sRuntime error (EBREAK)%s", COLOR_BOLD_RED, COLOR_RESET);
2163 +
            if (entry) {
2164 +
                u32 line = line_from_offset(entry->file, entry->offset);
2165 +
                printf(
2166 +
                    " at %s%s:%d%s", COLOR_CYAN, entry->file, line, COLOR_RESET
2167 +
                );
2168 +
            }
2169 +
            printf("\n");
2170 +
2171 +
            cpu->running  = false;
2172 +
            cpu->regs[A0] = EBREAK_EXIT_CODE;
2173 +
            cpu->ebreak   = true;
2174 +
            emit_fault_diagnostics(cpu, executed_pc);
2175 +
            cpu->faulted = false;
2176 +
        } else {
2177 +
            printf(
2178 +
                "\n%sUnknown system instruction (imm=%08x)%s\n",
2179 +
                COLOR_BOLD_RED,
2180 +
                funct12,
2181 +
                COLOR_RESET
2182 +
            );
2183 +
            cpu->running = false;
2184 +
            emit_fault_diagnostics(cpu, executed_pc);
2185 +
        }
2186 +
        break;
2187 +
    }
2188 +
2189 +
    case OP_FENCE:
2190 +
        /* Memory barriers are not implemented. */
2191 +
        break;
2192 +
2193 +
    default:
2194 +
        printf("Unknown opcode %02x at PC=%08x\n", opcode, cpu->pc);
2195 +
        cpu->running = false;
2196 +
        emit_fault_diagnostics(cpu, executed_pc);
2197 +
        break;
2198 +
    }
2199 +
    /* Register x0 is hardwired to zero. */
2200 +
    cpu->regs[ZERO] = 0;
2201 +
    cpu->pc         = pc_next;
2202 +
2203 +
    if (cpu->running) {
2204 +
        validate_stack_register(cpu, SP, "SP", executed_pc, false);
2205 +
        if (cpu->running)
2206 +
            validate_stack_register(cpu, FP, "FP", executed_pc, true);
2207 +
    }
2208 +
2209 +
    /* Track last executed PC for trace mode. */
2210 +
    if (headless && g_opts.trace_print_instructions)
2211 +
        last_executed_pc = executed_pc;
2212 +
}
2213 +
2214 +
/* Render the instructions column. */
2215 +
static void ui_render_instructions(
2216 +
    struct cpu *cpu, int col, int width, int height
2217 +
) {
2218 +
    int row = 1;
2219 +
2220 +
    u32 program_start_idx = program_base / INSTR_SIZE;
2221 +
    u32 program_end_idx   = program_start_idx + cpu->programsize;
2222 +
2223 +
    /* Calculate PC index in program. */
2224 +
    u32 pc_idx = cpu->pc / sizeof(instr_t);
2225 +
    if (pc_idx < program_start_idx || pc_idx >= program_end_idx)
2226 +
        pc_idx = program_start_idx;
2227 +
2228 +
    /* Calculate first instruction to display, centering PC if possible. */
2229 +
    i32 progstart = (i32)pc_idx - height / 2;
2230 +
    i32 min_start = (i32)program_start_idx;
2231 +
    i32 max_start = (i32)program_end_idx - height;
2232 +
2233 +
    if (max_start < min_start)
2234 +
        max_start = min_start;
2235 +
    if (progstart < min_start)
2236 +
        progstart = min_start;
2237 +
    if (progstart > max_start)
2238 +
        progstart = max_start;
2239 +
2240 +
    printf(TTY_GOTO_RC, row++, col);
2241 +
    printf("  INSTRUCTIONS");
2242 +
2243 +
    for (int i = 0; i < height; i++) {
2244 +
        u32 idx = (u32)progstart + i;
2245 +
        if (idx >= program_end_idx)
2246 +
            break;
2247 +
2248 +
        printf(TTY_GOTO_RC, row + i + 1, col);
2249 +
2250 +
        char istr[MAX_INSTR_STR_LEN] = { 0 };
2251 +
        int  len = sprint_instr(cpu->program[idx], istr, true);
2252 +
2253 +
        if (idx == pc_idx) { /* Highlight current instruction. */
2254 +
            printf("%s>%s %04x: ", COLOR_GREEN, COLOR_RESET, idx * INSTR_SIZE);
2255 +
        } else {
2256 +
            printf("  %s%04x:%s ", COLOR_GREY, idx * INSTR_SIZE, COLOR_RESET);
2257 +
        }
2258 +
        printf("%s", istr);
2259 +
        printf("%-*s", width - len - 8, "");
2260 +
    }
2261 +
}
2262 +
2263 +
/* Render the registers column. */
2264 +
static void ui_render_registers(
2265 +
    struct cpu *cpu, enum display display, int col, int height
2266 +
) {
2267 +
    int row = 1;
2268 +
2269 +
    printf(TTY_GOTO_RC, row++, col);
2270 +
    printf("REGISTERS");
2271 +
2272 +
    int reg_count =
2273 +
        sizeof(registers_displayed) / sizeof(registers_displayed[0]);
2274 +
    if (reg_count > height)
2275 +
        reg_count = height;
2276 +
2277 +
    for (int i = 0; i < reg_count; i++) {
2278 +
        printf(TTY_GOTO_RC, row + i + 1, col);
2279 +
2280 +
        reg_t       r = registers_displayed[i];
2281 +
        const char *reg_color =
2282 +
            (r == cpu->modified) ? COLOR_BOLD_BLUE : COLOR_BLUE;
2283 +
        u64  reg_value = cpu->regs[r];
2284 +
        bool is_stack_addr =
2285 +
            reg_value <= (u64)UINT32_MAX && stack_contains((u32)reg_value);
2286 +
2287 +
        /* Always show registers that contain stack addresses in hex. */
2288 +
        printf("%s%-2s%s = ", COLOR_GREEN, reg_names[r], COLOR_RESET);
2289 +
        if (display == DISPLAY_HEX || is_stack_addr) {
2290 +
            printf("%s0x%08x%s", reg_color, (u32)reg_value, COLOR_RESET);
2291 +
        } else {
2292 +
            printf("%s%-10d%s", reg_color, (i32)reg_value, COLOR_RESET);
2293 +
        }
2294 +
    }
2295 +
}
2296 +
2297 +
/* Render the stack column. */
2298 +
static void ui_render_stack(
2299 +
    struct cpu *cpu, enum display display, int col, int height
2300 +
) {
2301 +
    int row = 1;
2302 +
2303 +
    printf(TTY_GOTO_RC, row++, col);
2304 +
    printf("     STACK FRAME");
2305 +
2306 +
    assert(cpu->regs[SP] <= memory_top() && cpu->regs[FP] <= memory_top());
2307 +
2308 +
    u32 fp   = (u32)cpu->regs[FP];
2309 +
    u32 sp   = (u32)cpu->regs[SP];
2310 +
    u32 rows = (u32)height;
2311 +
    if (rows > STACK_DISPLAY_WORDS)
2312 +
        rows = STACK_DISPLAY_WORDS;
2313 +
    if (rows == 0)
2314 +
        return;
2315 +
2316 +
    u32 top    = stack_usable_top();
2317 +
    u32 bottom = stack_usable_bottom();
2318 +
    if (sp > top)
2319 +
        sp = top;
2320 +
    if (fp > top)
2321 +
        fp = top;
2322 +
2323 +
    u32 used_bytes  = (top >= sp) ? (top - sp) : 0;
2324 +
    u32 total_words = (used_bytes / WORD_SIZE) + 1;
2325 +
    u32 frame_words = total_words;
2326 +
    if (frame_words > rows)
2327 +
        frame_words = rows;
2328 +
    if (frame_words == 0)
2329 +
        return;
2330 +
2331 +
    u32 start;
2332 +
    if (frame_words == total_words) {
2333 +
        start = top;
2334 +
    } else {
2335 +
        start = sp + (frame_words - 1) * WORD_SIZE;
2336 +
        if (start > top)
2337 +
            start = top;
2338 +
    }
2339 +
2340 +
    if (start < bottom)
2341 +
        start = bottom;
2342 +
2343 +
    u32 addr   = start;
2344 +
    i32 offset = (i32)(start - sp);
2345 +
2346 +
    for (u32 i = 0; i < frame_words; i++) {
2347 +
        if (addr < bottom)
2348 +
            break;
2349 +
2350 +
        assert(addr <= memory_top());
2351 +
        printf(TTY_GOTO_RC, row + i + 1, col);
2352 +
2353 +
        /* Mark SP and FP positions. */
2354 +
        const char *marker = "  ";
2355 +
2356 +
        if (addr == sp) {
2357 +
            marker = "sp";
2358 +
        } else if (addr == fp) {
2359 +
            marker = "fp";
2360 +
        }
2361 +
        u32 word = memory_load_u32(addr);
2362 +
2363 +
        char offset_buf[6];
2364 +
        if (addr == sp) {
2365 +
            memcpy(offset_buf, "    ", 5);
2366 +
        } else {
2367 +
            snprintf(offset_buf, sizeof(offset_buf), "%+4d", offset);
2368 +
        }
2369 +
2370 +
        printf(
2371 +
            "%s%s %s%s%s %08x: ",
2372 +
            COLOR_GREEN,
2373 +
            marker,
2374 +
            COLOR_GREY,
2375 +
            offset_buf,
2376 +
            COLOR_RESET,
2377 +
            addr
2378 +
        );
2379 +
        bool is_stack_addr = stack_contains(word);
2380 +
2381 +
        if (display == DISPLAY_HEX || is_stack_addr) {
2382 +
            printf("%s0x%08x%s", COLOR_BLUE, word, COLOR_RESET);
2383 +
        } else {
2384 +
            printf("%s%-10d%s", COLOR_BLUE, (i32)word, COLOR_RESET);
2385 +
        }
2386 +
        if (addr < WORD_SIZE)
2387 +
            break;
2388 +
2389 +
        addr   -= WORD_SIZE;
2390 +
        offset -= WORD_SIZE;
2391 +
    }
2392 +
}
2393 +
2394 +
/* Render the full debugger TUI. */
2395 +
static void ui_render(struct cpu *cpu, enum display display) {
2396 +
    printf(TTY_CLEAR);
2397 +
2398 +
    struct termsize tsize = termsize();
2399 +
2400 +
    /* Enforce a minimum display size. */
2401 +
    if (tsize.cols < 60)
2402 +
        tsize.cols = 60;
2403 +
    if (tsize.rows < 15)
2404 +
        tsize.rows = 15;
2405 +
2406 +
    /* Column layout: 40% instructions, 20% registers, rest for stack. */
2407 +
    int instr_width = (tsize.cols * 2) / 5;
2408 +
    int reg_width   = tsize.cols / 5;
2409 +
2410 +
    int instr_col = 1;
2411 +
    int reg_col   = instr_col + instr_width + 2;
2412 +
    int stack_col = reg_col + reg_width + 2;
2413 +
2414 +
    int display_height = tsize.rows - FOOTER_HEIGHT - HEADER_HEIGHT;
2415 +
    if (display_height > MAX_INSTR_DISPLAY)
2416 +
        display_height = MAX_INSTR_DISPLAY;
2417 +
    if (display_height <= 0)
2418 +
        display_height = 1;
2419 +
2420 +
    ui_render_instructions(cpu, instr_col, instr_width, display_height);
2421 +
    ui_render_registers(cpu, display, reg_col, display_height);
2422 +
    ui_render_stack(cpu, display, stack_col, display_height);
2423 +
2424 +
    printf(TTY_GOTO_RC, display_height + FOOTER_HEIGHT, 1);
2425 +
    printf(
2426 +
        "%sPress `j` to step forward, `k` to step backward, `q` to quit,\n"
2427 +
        "`d` to toggle decimal display, `r` to reset program.%s ",
2428 +
        COLOR_GREY,
2429 +
        COLOR_RESET
2430 +
    );
2431 +
}
2432 +
2433 +
/* Set up the terminal for interactive mode, saving the original settings. */
2434 +
static void term_init(struct termios *oldterm) {
2435 +
    struct termios term;
2436 +
2437 +
    tcgetattr(STDIN_FILENO, oldterm);
2438 +
    term          = *oldterm;
2439 +
    term.c_lflag &= ~(ICANON | ECHO);
2440 +
    tcsetattr(STDIN_FILENO, TCSANOW, &term);
2441 +
}
2442 +
2443 +
/* Restore terminal settings. */
2444 +
static void term_restore(struct termios *old) {
2445 +
    tcsetattr(STDIN_FILENO, TCSANOW, old);
2446 +
}
2447 +
2448 +
int main(int argc, char *argv[]) {
2449 +
    struct cpu        cpu;
2450 +
    enum display      display = DISPLAY_DEC;
2451 +
    struct cli_config cli     = { 0 };
2452 +
2453 +
    if (!parse_cli_args(argc, argv, &cli))
2454 +
        return 1;
2455 +
2456 +
    bool headless = cli.headless;
2457 +
2458 +
    g_opts.trace_headless = headless;
2459 +
2460 +
    cpu_init(&cpu);
2461 +
    program_init(&cpu, cli.program_path);
2462 +
    int    prog_argc = argc - cli.arg_index;
2463 +
    char **prog_argv = &argv[cli.arg_index];
2464 +
    prepare_env(&cpu, prog_argc, prog_argv);
2465 +
2466 +
    if (headless) {
2467 +
        u64 max_steps = g_opts.headless_max_steps;
2468 +
        u64 steps     = 0;
2469 +
2470 +
        /* Try to initialise the JIT for headless mode.  Falls back to the
2471 +
         * interpreter automatically when JIT is disabled, unavailable,
2472 +
         * or when the code cache fills up. */
2473 +
        static struct jit_state jit;
2474 +
        bool                    use_jit = false;
2475 +
2476 +
        if (!g_opts.jit_disabled && !g_opts.trace_enabled &&
2477 +
            !g_opts.trace_print_instructions && !g_opts.watch_enabled) {
2478 +
            use_jit = jit_init(&jit);
2479 +
        }
2480 +
2481 +
        if (use_jit) {
2482 +
            /* ---- JIT execution loop ---- */
2483 +
            while (cpu.running && steps < max_steps) {
2484 +
                struct jit_block *block = jit_get_block(
2485 +
                    &jit, cpu.pc, memory, program_base, program_bytes
2486 +
                );
2487 +
                if (!block) {
2488 +
                    /* Cache full or compilation error -- fall back to
2489 +
                     * interpreter for remainder. */
2490 +
                    while (cpu.running && steps++ < max_steps) {
2491 +
                        cpu_execute(&cpu, display, true);
2492 +
                    }
2493 +
                    break;
2494 +
                }
2495 +
                u32 next_pc = 0;
2496 +
                int exit_reason =
2497 +
                    jit_exec_block(block, cpu.regs, memory, &next_pc);
2498 +
                steps += block->insn_count;
2499 +
                jit.blocks_executed++;
2500 +
                jit.insns_executed += block->insn_count;
2501 +
2502 +
                switch (exit_reason) {
2503 +
                case JIT_EXIT_BRANCH:
2504 +
                case JIT_EXIT_CHAIN:
2505 +
                    cpu.pc = next_pc;
2506 +
                    break;
2507 +
2508 +
                case JIT_EXIT_RET:
2509 +
                    cpu.running = false;
2510 +
                    break;
2511 +
2512 +
                default:
2513 +
                    /* ECALL, EBREAK, FAULT -- interpreter handles it. */
2514 +
                    cpu.pc = next_pc;
2515 +
                    cpu_execute(&cpu, display, true);
2516 +
                    steps++;
2517 +
                    break;
2518 +
                }
2519 +
            }
2520 +
            jit_destroy(&jit);
2521 +
        } else {
2522 +
            /* ---- Interpreter-only loop ---- */
2523 +
            while (cpu.running && steps++ < max_steps) {
2524 +
                cpu_execute(&cpu, display, true);
2525 +
            }
2526 +
        }
2527 +
2528 +
        if (cpu.running) {
2529 +
            fprintf(
2530 +
                stderr,
2531 +
                "program did not terminate within %zu steps\n",
2532 +
                (size_t)max_steps
2533 +
            );
2534 +
            return -1;
2535 +
        }
2536 +
        if (cpu.faulted) {
2537 +
            fprintf(stderr, "program terminated due to runtime fault\n");
2538 +
            return -1;
2539 +
        }
2540 +
        if (g_opts.count_instructions) {
2541 +
            fprintf(
2542 +
                stderr,
2543 +
                "Processed %llu instructions\n",
2544 +
                (unsigned long long)steps
2545 +
            );
2546 +
        }
2547 +
        return (int)cpu.regs[A0];
2548 +
    }
2549 +
    struct termios oldterm;
2550 +
    term_init(&oldterm);
2551 +
    snapshot_init(&cpu);
2552 +
2553 +
    for (;;) {
2554 +
        if (cpu.running)
2555 +
            ui_render(&cpu, display);
2556 +
2557 +
        int ch = getchar();
2558 +
2559 +
        if (ch == 'q' || ch == 'Q') {
2560 +
            printf("\n");
2561 +
            break;
2562 +
        } else if (ch == 'd' || ch == 'D') { /* Toggle display mode. */
2563 +
            display = (display == DISPLAY_HEX) ? DISPLAY_DEC : DISPLAY_HEX;
2564 +
        } else if (ch == 'r' || ch == 'R') { /* Reset program and state. */
2565 +
            cpu_reset(&cpu);
2566 +
            snapshot_init(&cpu);
2567 +
        } else if (ch == 'g' || ch == 'G') { /* Toggle stack guard. */
2568 +
            toggle_stack_guard(&cpu);
2569 +
        } else if (ch == 'j' && cpu.running) { /* Step forward. */
2570 +
            cpu_execute(&cpu, display, false);
2571 +
            snapshot_save(&cpu);
2572 +
        } else if (ch == 'k' && cpu.pc > 0) { /* Step backward. */
2573 +
            bool restored = snapshot_restore(&cpu);
2574 +
2575 +
            if (restored) {
2576 +
                cpu.running = true;
2577 +
            } else {
2578 +
                printf(
2579 +
                    "\n%sNo more history to go back to.%s\n",
2580 +
                    COLOR_BOLD_RED,
2581 +
                    COLOR_RESET
2582 +
                );
2583 +
            }
2584 +
        }
2585 +
    }
2586 +
    term_restore(&oldterm);
2587 +
2588 +
    return 0;
2589 +
}
io.c added +58 -0
1 +
#include <stdarg.h>
2 +
#include <stdio.h>
3 +
#include <stdlib.h>
4 +
#include <string.h>
5 +
6 +
#include "io.h"
7 +
#include "types.h"
8 +
9 +
i32 readfile(const char *path, char **data) {
10 +
    FILE *fp   = NULL;
11 +
    i32   size = -1;
12 +
13 +
    *data = NULL;
14 +
15 +
    if (!(fp = fopen(path, "r"))) {
16 +
        goto cleanup;
17 +
    }
18 +
    if (fseek(fp, 0L, SEEK_END) != 0) {
19 +
        goto cleanup;
20 +
    }
21 +
    if ((size = ftell(fp)) < 0) {
22 +
        goto cleanup;
23 +
    }
24 +
    if (fseek(fp, 0L, SEEK_SET) != 0) {
25 +
        goto cleanup;
26 +
    }
27 +
    if ((*data = malloc((size_t)size + 1)) == NULL) {
28 +
        goto cleanup;
29 +
    }
30 +
    if (fread(*data, 1, (size_t)size, fp) != (size_t)size) {
31 +
        size = -1;
32 +
        goto cleanup;
33 +
    }
34 +
    (*data)[size] = '\0';
35 +
36 +
cleanup:
37 +
    if (fp) {
38 +
        fclose(fp);
39 +
    }
40 +
    if (size < 0 && *data) {
41 +
        free(*data);
42 +
        *data = NULL;
43 +
    }
44 +
    return size;
45 +
}
46 +
47 +
void _bail(const char *file, i32 line, const char *restrict fmt, ...) {
48 +
    va_list ap;
49 +
    va_start(ap, fmt);
50 +
51 +
    fflush(stdout);
52 +
    fprintf(stderr, "%s:%d: fatal: ", file, line);
53 +
    vfprintf(stderr, fmt, ap);
54 +
    fprintf(stderr, "\n");
55 +
    va_end(ap);
56 +
57 +
    exit(1);
58 +
}
io.h added +19 -0
1 +
#ifndef IO_H
2 +
#define IO_H
3 +
4 +
#include "types.h"
5 +
6 +
/* Abort execution and exit with an error code. */
7 +
#define bail(...) _bail(__FILE__, __LINE__, __VA_ARGS__)
8 +
9 +
/* Debug output - disabled for bootstrap compiler. */
10 +
#define debug(...) ((void)0)
11 +
12 +
__attribute__((noreturn)) void _bail(
13 +
    const char *file, int line, const char *restrict fmt, ...
14 +
);
15 +
16 +
/* Read a file in its entirety into `data`. */
17 +
i32 readfile(const char *path, char **data);
18 +
19 +
#endif
jit.c added +1211 -0
1 +
/*
2 +
 * JIT compiler: RV64I basic-block -> x86-64 native code.
3 +
 *
4 +
 * Strategy:
5 +
 *   - Guest registers live in a u64[32] array pointed to by the first arg
6 +
 *     (rdi on entry).  Memory base is in rsi.  The third arg (rdx) is a
7 +
 *     pointer where we write the next guest PC on exit.
8 +
 *   - During block execution we pin:
9 +
 *       r12 = &regs[0]   (guest register file)
10 +
 *       r13 = memory base
11 +
 *       r14 = pc_out pointer
12 +
 *   - Guest registers are loaded/stored on demand from/to the array.
13 +
 *   - At block boundaries we store the next guest PC into *pc_out and
14 +
 *     return an exit-reason code in eax.
15 +
 *
16 +
 *  We emit raw x86-64 bytes into the code cache.
17 +
 */
18 +
19 +
#include <string.h>
20 +
#include <sys/mman.h>
21 +
22 +
#include "jit.h"
23 +
#include "riscv.h"
24 +
#include "types.h"
25 +
26 +
/* ---------- x86-64 code emitter helpers --------------------------------- */
27 +
28 +
/* A small code buffer that we fill up, then copy into the cache. */
29 +
struct emitter {
30 +
    u8  *buf;      /* Start of buffer. */
31 +
    u32  pos;      /* Current write position. */
32 +
    u32  capacity; /* Maximum bytes. */
33 +
    bool overflow; /* Set if we ran out of space. */
34 +
};
35 +
36 +
static inline void emit_u8(struct emitter *e, u8 b) {
37 +
    if (e->pos < e->capacity)
38 +
        e->buf[e->pos++] = b;
39 +
    else
40 +
        e->overflow = true;
41 +
}
42 +
43 +
static inline void emit_u32(struct emitter *e, u32 v) {
44 +
    emit_u8(e, (u8)(v));
45 +
    emit_u8(e, (u8)(v >> 8));
46 +
    emit_u8(e, (u8)(v >> 16));
47 +
    emit_u8(e, (u8)(v >> 24));
48 +
}
49 +
50 +
static inline void emit_u64(struct emitter *e, u64 v) {
51 +
    emit_u32(e, (u32)v);
52 +
    emit_u32(e, (u32)(v >> 32));
53 +
}
54 +
55 +
/* x86-64 register encoding (for ModR/M, SIB, REX). */
56 +
enum x86reg {
57 +
    X_RAX = 0,
58 +
    X_RCX = 1,
59 +
    X_RDX = 2,
60 +
    X_RBX = 3,
61 +
    X_RSP = 4,
62 +
    X_RBP = 5,
63 +
    X_RSI = 6,
64 +
    X_RDI = 7,
65 +
    X_R8  = 8,
66 +
    X_R9  = 9,
67 +
    X_R10 = 10,
68 +
    X_R11 = 11,
69 +
    X_R12 = 12,
70 +
    X_R13 = 13,
71 +
    X_R14 = 14,
72 +
    X_R15 = 15,
73 +
};
74 +
75 +
/* REX prefix byte. W=64-bit, R=reg extension, X=SIB index, B=rm/base. */
76 +
static inline u8 rex(bool w, bool r, bool x, bool b) {
77 +
    return (u8)(0x40 | (w ? 8 : 0) | (r ? 4 : 0) | (x ? 2 : 0) | (b ? 1 : 0));
78 +
}
79 +
80 +
/* ModR/M byte. */
81 +
static inline u8 modrm(u8 mod, u8 reg, u8 rm) {
82 +
    return (u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7));
83 +
}
84 +
85 +
/* SIB byte. */
86 +
static inline u8 sib(u8 scale, u8 index, u8 base) {
87 +
    return (u8)((scale << 6) | ((index & 7) << 3) | (base & 7));
88 +
}
89 +
90 +
/* Pinned host registers. */
91 +
#define HREGS  X_R12 /* &regs[0]  */
92 +
#define HMEM   X_R13 /* memory    */
93 +
#define HPCOUT X_R14 /* pc_out    */
94 +
95 +
/* Scratch registers for codegen (caller-saved, not pinned). */
96 +
#define HTMP1 X_RAX
97 +
#define HTMP2 X_RCX
98 +
99 +
/* ---------- Common encoding helpers ------------------------------------ */
100 +
101 +
/* Emit REX.W prefix (64-bit operand size, no extended regs). */
102 +
static inline void emit_rexw(struct emitter *e) {
103 +
    emit_u8(e, 0x48);
104 +
}
105 +
106 +
/* Emit a 2-register ALU op: <op> rax, rcx (64-bit).
107 +
 * `opcode` is the x86 opcode byte (e.g. 0x01=add, 0x29=sub, etc). */
108 +
static void emit_alu_rax_rcx(struct emitter *e, u8 opcode) {
109 +
    emit_rexw(e);
110 +
    emit_u8(e, opcode);
111 +
    emit_u8(e, modrm(3, X_RCX, X_RAX));
112 +
}
113 +
114 +
/* Emit a shift: <shift> rax, cl (64-bit).
115 +
 * `ext` is the ModR/M extension (4=SHL, 5=SHR, 7=SAR). */
116 +
static void emit_shift_rax_cl(struct emitter *e, u8 ext) {
117 +
    emit_rexw(e);
118 +
    emit_u8(e, 0xD3);
119 +
    emit_u8(e, modrm(3, ext, X_RAX));
120 +
}
121 +
122 +
/* ---------- Load/store guest register from register file --------------- */
123 +
124 +
/* Emit mov with [r12 + disp] addressing (r12 needs SIB). */
125 +
static void emit_r12_disp(
126 +
    struct emitter *e, u8 opcode, enum x86reg reg, u32 off
127 +
) {
128 +
    emit_u8(e, rex(true, reg >= 8, false, true)); /* B=1 for r12 */
129 +
    emit_u8(e, opcode);
130 +
    if (off < 128) {
131 +
        emit_u8(e, modrm(1, reg, X_R12 & 7));
132 +
        emit_u8(e, sib(0, 4, X_R12 & 7));
133 +
        emit_u8(e, (u8)off);
134 +
    } else {
135 +
        emit_u8(e, modrm(2, reg, X_R12 & 7));
136 +
        emit_u8(e, sib(0, 4, X_R12 & 7));
137 +
        emit_u32(e, off);
138 +
    }
139 +
}
140 +
141 +
/* Load guest register `guest` into host `dst` from [r12 + guest*8]. */
142 +
static void emit_load_guest(struct emitter *e, enum x86reg dst, u32 guest) {
143 +
    if (guest == 0) {
144 +
        /* x0 is always zero -- xor dst, dst. */
145 +
        emit_u8(e, rex(true, dst >= 8, false, dst >= 8));
146 +
        emit_u8(e, 0x31);
147 +
        emit_u8(e, modrm(3, dst, dst));
148 +
        return;
149 +
    }
150 +
    emit_r12_disp(e, 0x8B, dst, guest * 8);
151 +
}
152 +
153 +
/* Store host `src` to guest register [r12 + guest*8]. */
154 +
static void emit_store_guest(struct emitter *e, u32 guest, enum x86reg src) {
155 +
    if (guest == 0)
156 +
        return;
157 +
    emit_r12_disp(e, 0x89, src, guest * 8);
158 +
}
159 +
160 +
/* mov reg, imm64 (REX.W + B8+rd) */
161 +
static void emit_mov_imm64(struct emitter *e, enum x86reg dst, u64 imm) {
162 +
    emit_u8(e, rex(true, false, false, dst >= 8));
163 +
    emit_u8(e, (u8)(0xB8 + (dst & 7)));
164 +
    emit_u64(e, imm);
165 +
}
166 +
167 +
/* mov reg, imm32 (sign-extended to 64 via mov r/m64, imm32) */
168 +
static void emit_mov_imm32_sx(struct emitter *e, enum x86reg dst, i32 imm) {
169 +
    emit_u8(e, rex(true, false, false, dst >= 8));
170 +
    emit_u8(e, 0xC7);
171 +
    emit_u8(e, modrm(3, 0, dst));
172 +
    emit_u32(e, (u32)imm);
173 +
}
174 +
175 +
/* Careful prologue using known encodings. */
176 +
static void emit_prologue(struct emitter *e) {
177 +
    emit_u8(e, 0x55); /* push rbp */
178 +
    emit_u8(e, 0x41);
179 +
    emit_u8(e, 0x54); /* push r12 */
180 +
    emit_u8(e, 0x41);
181 +
    emit_u8(e, 0x55); /* push r13 */
182 +
    emit_u8(e, 0x41);
183 +
    emit_u8(e, 0x56); /* push r14 */
184 +
    emit_u8(e, 0x49);
185 +
    emit_u8(e, 0x89);
186 +
    emit_u8(e, 0xFC); /* mov r12, rdi */
187 +
    emit_u8(e, 0x49);
188 +
    emit_u8(e, 0x89);
189 +
    emit_u8(e, 0xF5); /* mov r13, rsi */
190 +
    emit_u8(e, 0x49);
191 +
    emit_u8(e, 0x89);
192 +
    emit_u8(e, 0xD6); /* mov r14, rdx */
193 +
}
194 +
195 +
/* Emit function epilogue + ret with exit reason. */
196 +
static void emit_epilogue_with_exit(struct emitter *e, u32 exit_reason) {
197 +
    emit_u8(e, 0xB8);
198 +
    emit_u32(e, exit_reason); /* mov eax, reason */
199 +
    emit_u8(e, 0x41);
200 +
    emit_u8(e, 0x5E); /* pop r14 */
201 +
    emit_u8(e, 0x41);
202 +
    emit_u8(e, 0x5D); /* pop r13 */
203 +
    emit_u8(e, 0x41);
204 +
    emit_u8(e, 0x5C); /* pop r12 */
205 +
    emit_u8(e, 0x5D); /* pop rbp */
206 +
    emit_u8(e, 0xC3); /* ret */
207 +
}
208 +
209 +
/* Write next-PC to *r14, then epilogue+ret. */
210 +
static void emit_block_exit(
211 +
    struct emitter *e, enum jit_exit reason, u32 next_pc
212 +
) {
213 +
    /* mov dword [r14], next_pc */
214 +
    emit_u8(e, 0x41);
215 +
    emit_u8(e, 0xC7);
216 +
    emit_u8(e, modrm(0, 0, X_R14 & 7));
217 +
    emit_u32(e, next_pc);
218 +
    emit_epilogue_with_exit(e, (u32)reason);
219 +
}
220 +
221 +
/* ---------- Jump patching ---------------------------------------------- */
222 +
223 +
/* Emit jnz rel32 (0F 85), return position for patching. */
224 +
static u32 emit_jnz_placeholder(struct emitter *e) {
225 +
    u32 pos = e->pos;
226 +
    emit_u8(e, 0x0F);
227 +
    emit_u8(e, 0x85);
228 +
    emit_u32(e, 0);
229 +
    return pos;
230 +
}
231 +
232 +
/* Emit jne rel32 (0F 85), return position for patching. */
233 +
#define emit_jne_placeholder emit_jnz_placeholder
234 +
235 +
/* Emit jmp rel32 (E9), return position for patching. */
236 +
static u32 emit_jmp_placeholder(struct emitter *e) {
237 +
    u32 pos = e->pos;
238 +
    emit_u8(e, 0xE9);
239 +
    emit_u32(e, 0);
240 +
    return pos;
241 +
}
242 +
243 +
/* Patch a jcc rel32 (6-byte: 0F xx rel32) to jump to `target`. */
244 +
static void patch_jcc(struct emitter *e, u32 jcc_pos, u32 target) {
245 +
    u32 rel             = target - (jcc_pos + 6);
246 +
    e->buf[jcc_pos + 2] = (u8)rel;
247 +
    e->buf[jcc_pos + 3] = (u8)(rel >> 8);
248 +
    e->buf[jcc_pos + 4] = (u8)(rel >> 16);
249 +
    e->buf[jcc_pos + 5] = (u8)(rel >> 24);
250 +
}
251 +
252 +
/* Patch a jmp rel32 (5-byte: E9 rel32) to jump to `target`. */
253 +
static void patch_jmp(struct emitter *e, u32 jmp_pos, u32 target) {
254 +
    u32 rel             = target - (jmp_pos + 5);
255 +
    e->buf[jmp_pos + 1] = (u8)rel;
256 +
    e->buf[jmp_pos + 2] = (u8)(rel >> 8);
257 +
    e->buf[jmp_pos + 3] = (u8)(rel >> 16);
258 +
    e->buf[jmp_pos + 4] = (u8)(rel >> 24);
259 +
}
260 +
261 +
/* ---------- Per-instruction translation helpers ------------------------ */
262 +
263 +
/* Load rs1 into rax. */
264 +
static void emit_load_rs1(struct emitter *e, u32 rs1) {
265 +
    emit_load_guest(e, HTMP1, rs1);
266 +
}
267 +
268 +
/* Load rs2 into rcx. */
269 +
static void emit_load_rs2(struct emitter *e, u32 rs2) {
270 +
    emit_load_guest(e, HTMP2, rs2);
271 +
}
272 +
273 +
/* Store rax to rd. */
274 +
static void emit_store_rd(struct emitter *e, u32 rd) {
275 +
    emit_store_guest(e, rd, HTMP1);
276 +
}
277 +
278 +
/* add rax, imm32 (sign-extended) */
279 +
static void emit_add_rax_imm32(struct emitter *e, i32 imm) {
280 +
    if (imm == 0)
281 +
        return;
282 +
    emit_rexw(e);
283 +
    if (imm >= -128 && imm <= 127) {
284 +
        emit_u8(e, 0x83);
285 +
        emit_u8(e, modrm(3, 0, X_RAX));
286 +
        emit_u8(e, (u8)(i8)imm);
287 +
    } else {
288 +
        emit_u8(e, 0x05); /* add rax, imm32 (short form) */
289 +
        emit_u32(e, (u32)imm);
290 +
    }
291 +
}
292 +
293 +
/* imul rax, rcx */
294 +
static void emit_imul_rax_rcx(struct emitter *e) {
295 +
    emit_rexw(e);
296 +
    emit_u8(e, 0x0F);
297 +
    emit_u8(e, 0xAF);
298 +
    emit_u8(e, modrm(3, X_RAX, X_RCX));
299 +
}
300 +
301 +
/* cqo (sign-extend rax into rdx:rax) */
302 +
static void emit_cqo(struct emitter *e) {
303 +
    emit_rexw(e);
304 +
    emit_u8(e, 0x99);
305 +
}
306 +
307 +
/* idiv rcx (signed divide rdx:rax by rcx, 64-bit) */
308 +
static void emit_idiv_rcx(struct emitter *e) {
309 +
    emit_rexw(e);
310 +
    emit_u8(e, 0xF7);
311 +
    emit_u8(e, modrm(3, 7, X_RCX));
312 +
}
313 +
314 +
/* div rcx (unsigned divide rdx:rax by rcx, 64-bit) */
315 +
static void emit_div_rcx(struct emitter *e) {
316 +
    emit_rexw(e);
317 +
    emit_u8(e, 0xF7);
318 +
    emit_u8(e, modrm(3, 6, X_RCX));
319 +
}
320 +
321 +
/* xor rdx, rdx (zero rdx for unsigned division) */
322 +
static void emit_xor_rdx_rdx(struct emitter *e) {
323 +
    emit_rexw(e);
324 +
    emit_u8(e, 0x31);
325 +
    emit_u8(e, modrm(3, X_RDX, X_RDX));
326 +
}
327 +
328 +
/* movsxd rax, eax (sign-extend 32-bit result to 64-bit) */
329 +
static void emit_movsxd_rax_eax(struct emitter *e) {
330 +
    emit_rexw(e);
331 +
    emit_u8(e, 0x63);
332 +
    emit_u8(e, modrm(3, X_RAX, X_RAX));
333 +
}
334 +
335 +
/* test rcx, rcx (64-bit) */
336 +
static void emit_test_rcx(struct emitter *e) {
337 +
    emit_rexw(e);
338 +
    emit_u8(e, 0x85);
339 +
    emit_u8(e, modrm(3, X_RCX, X_RCX));
340 +
}
341 +
342 +
/* test ecx, ecx (32-bit) */
343 +
static void emit_test_ecx(struct emitter *e) {
344 +
    emit_u8(e, 0x85);
345 +
    emit_u8(e, modrm(3, X_RCX, X_RCX));
346 +
}
347 +
348 +
/* cmp rcx, -1 (64-bit) */
349 +
static void emit_cmp_rcx_neg1_64(struct emitter *e) {
350 +
    emit_rexw(e);
351 +
    emit_u8(e, 0x83);
352 +
    emit_u8(e, modrm(3, 7, X_RCX));
353 +
    emit_u8(e, 0xFF);
354 +
}
355 +
356 +
/* cmp ecx, -1 (32-bit) */
357 +
static void emit_cmp_ecx_neg1_32(struct emitter *e) {
358 +
    emit_u8(e, 0x83);
359 +
    emit_u8(e, modrm(3, 7, X_RCX));
360 +
    emit_u8(e, 0xFF);
361 +
}
362 +
363 +
/* mov rax, r11 (via: REX.WR mov rax, r11) */
364 +
static void emit_mov_rax_r11(struct emitter *e) {
365 +
    emit_u8(e, 0x4C);
366 +
    emit_u8(e, 0x89);
367 +
    emit_u8(e, modrm(3, X_R11 & 7, X_RAX));
368 +
}
369 +
370 +
/* mov rax, rdx (64-bit) */
371 +
static void emit_mov_rax_rdx(struct emitter *e) {
372 +
    emit_rexw(e);
373 +
    emit_u8(e, 0x89);
374 +
    emit_u8(e, modrm(3, X_RDX, X_RAX));
375 +
}
376 +
377 +
/* cmp rax, rcx (64-bit) */
378 +
static void emit_cmp_rax_rcx(struct emitter *e) {
379 +
    emit_rexw(e);
380 +
    emit_u8(e, 0x39);
381 +
    emit_u8(e, modrm(3, X_RCX, X_RAX));
382 +
}
383 +
384 +
/* setCC al + movzx rax, al (64-bit).
385 +
 * `cc` is the setcc secondary opcode (e.g. 0x9C=setl, 0x92=setb). */
386 +
static void emit_setcc_rax(struct emitter *e, u8 cc) {
387 +
    emit_u8(e, 0x0F);
388 +
    emit_u8(e, cc);
389 +
    emit_u8(e, modrm(3, 0, X_RAX));
390 +
    emit_rexw(e);
391 +
    emit_u8(e, 0x0F);
392 +
    emit_u8(e, 0xB6);
393 +
    emit_u8(e, modrm(3, X_RAX, X_RAX));
394 +
}
395 +
396 +
/* and ecx, imm8 (for masking shift amounts). */
397 +
static void emit_and_ecx_imm8(struct emitter *e, u8 mask) {
398 +
    emit_u8(e, 0x83);
399 +
    emit_u8(e, modrm(3, 4, X_RCX));
400 +
    emit_u8(e, mask);
401 +
}
402 +
403 +
/* ---------- Division helpers ------------------------------------------- */
404 +
405 +
/*
406 +
 * Shared skeleton for 64-bit div/rem with RISC-V corner cases:
407 +
 *   - divisor == 0: emit_zero_case (custom per variant)
408 +
 *   - signed overflow (INT_MIN / -1): emit_overflow_case
409 +
 *   - otherwise: normal division
410 +
 *
411 +
 * `is_signed`: whether to check for INT_MIN/-1 overflow.
412 +
 * `is_rem`:    whether to move rdx->rax after division (remainder result).
413 +
 * `save_dividend`: whether to save rax to r11 before testing (needed for
414 +
 *                  rem(x,0)=x semantics).
415 +
 */
416 +
static void emit_div64(struct emitter *e, bool is_signed, bool is_rem) {
417 +
    /* For rem: save dividend to r11 (needed if divisor==0). */
418 +
    if (is_rem) {
419 +
        /* mov r11, rax */
420 +
        emit_u8(e, rex(true, false, false, true));
421 +
        emit_u8(e, 0x89);
422 +
        emit_u8(e, modrm(3, X_RAX, X_R11 & 7));
423 +
    }
424 +
425 +
    emit_test_rcx(e);
426 +
    u32 jnz = emit_jnz_placeholder(e);
427 +
428 +
    /* Divisor == 0. */
429 +
    if (is_rem) {
430 +
        emit_mov_rax_r11(e); /* result = dividend */
431 +
    } else {
432 +
        emit_mov_imm32_sx(e, HTMP1, -1); /* result = -1 (all ones) */
433 +
    }
434 +
    u32 jmp_end1 = emit_jmp_placeholder(e);
435 +
436 +
    /* .nonzero: */
437 +
    u32 nonzero  = e->pos;
438 +
    u32 jne_safe = 0, jmp_end2 = 0;
439 +
440 +
    if (is_signed) {
441 +
        emit_cmp_rcx_neg1_64(e);
442 +
        jne_safe = emit_jne_placeholder(e);
443 +
        /* rcx == -1: overflow case. */
444 +
        if (is_rem) {
445 +
            /* result = 0 */
446 +
            emit_rexw(e);
447 +
            emit_u8(e, 0x31);
448 +
            emit_u8(e, modrm(3, X_RAX, X_RAX));
449 +
        }
450 +
        /* else: result = rax (already INT_MIN, which is correct) */
451 +
        jmp_end2 = emit_jmp_placeholder(e);
452 +
    }
453 +
454 +
    /* .safe: perform the actual division. */
455 +
    u32 safe = e->pos;
456 +
    if (is_signed) {
457 +
        emit_cqo(e);
458 +
        emit_idiv_rcx(e);
459 +
    } else {
460 +
        emit_xor_rdx_rdx(e);
461 +
        emit_div_rcx(e);
462 +
    }
463 +
    if (is_rem) {
464 +
        emit_mov_rax_rdx(e); /* remainder is in rdx */
465 +
    }
466 +
467 +
    /* .end: */
468 +
    u32 end = e->pos;
469 +
    patch_jcc(e, jnz, nonzero);
470 +
    patch_jmp(e, jmp_end1, end);
471 +
    if (is_signed) {
472 +
        patch_jcc(e, jne_safe, safe);
473 +
        patch_jmp(e, jmp_end2, end);
474 +
    }
475 +
}
476 +
477 +
/* Same pattern for 32-bit division (W-suffix instructions). */
478 +
static void emit_div32(struct emitter *e, bool is_signed, bool is_rem) {
479 +
    /* For rem: save dividend in edx. */
480 +
    if (is_rem) {
481 +
        /* mov edx, eax */
482 +
        emit_u8(e, 0x89);
483 +
        emit_u8(e, modrm(3, X_RAX, X_RDX));
484 +
    }
485 +
486 +
    emit_test_ecx(e);
487 +
    u32 jnz = emit_jnz_placeholder(e);
488 +
489 +
    /* Divisor == 0. */
490 +
    if (is_rem) {
491 +
        /* mov eax, edx (result = dividend) */
492 +
        emit_u8(e, 0x89);
493 +
        emit_u8(e, modrm(3, X_RDX, X_RAX));
494 +
    } else {
495 +
        /* mov eax, -1 */
496 +
        emit_u8(e, 0xB8);
497 +
        emit_u32(e, 0xFFFFFFFF);
498 +
    }
499 +
    u32 jmp_end1 = emit_jmp_placeholder(e);
500 +
501 +
    /* .nonzero: */
502 +
    u32 nonzero  = e->pos;
503 +
    u32 jne_safe = 0, jmp_end2 = 0;
504 +
505 +
    if (is_signed) {
506 +
        emit_cmp_ecx_neg1_32(e);
507 +
        jne_safe = emit_jne_placeholder(e);
508 +
        if (is_rem) {
509 +
            /* result = 0 */
510 +
            emit_u8(e, 0x31);
511 +
            emit_u8(e, modrm(3, X_RAX, X_RAX));
512 +
        }
513 +
        /* else: result = eax (INT_MIN stays INT_MIN) */
514 +
        jmp_end2 = emit_jmp_placeholder(e);
515 +
    }
516 +
517 +
    /* .safe: */
518 +
    u32 safe = e->pos;
519 +
    if (is_rem) {
520 +
        /* Restore dividend to eax from edx. */
521 +
        emit_u8(e, 0x89);
522 +
        emit_u8(e, modrm(3, X_RDX, X_RAX));
523 +
    }
524 +
    if (is_signed) {
525 +
        emit_u8(e, 0x99); /* cdq */
526 +
        emit_u8(e, 0xF7);
527 +
        emit_u8(e, modrm(3, 7, X_RCX)); /* idiv ecx */
528 +
    } else {
529 +
        emit_u8(e, 0x31);
530 +
        emit_u8(e, modrm(3, X_RDX, X_RDX)); /* xor edx,edx */
531 +
        emit_u8(e, 0xF7);
532 +
        emit_u8(e, modrm(3, 6, X_RCX)); /* div ecx */
533 +
    }
534 +
    if (is_rem) {
535 +
        emit_u8(e, 0x89);
536 +
        emit_u8(e, modrm(3, X_RDX, X_RAX)); /* mov eax,edx */
537 +
    }
538 +
539 +
    /* .end: */
540 +
    u32 end = e->pos;
541 +
    patch_jcc(e, jnz, nonzero);
542 +
    patch_jmp(e, jmp_end1, end);
543 +
    if (is_signed) {
544 +
        patch_jcc(e, jne_safe, safe);
545 +
        patch_jmp(e, jmp_end2, end);
546 +
    }
547 +
}
548 +
549 +
/* ---------- Memory access helpers -------------------------------------- */
550 +
551 +
/* add rax, r13 (compute host address from guest address). */
552 +
static void emit_add_rax_r13(struct emitter *e) {
553 +
    emit_u8(e, 0x4C);
554 +
    emit_u8(e, 0x01);
555 +
    emit_u8(e, 0xE8);
556 +
}
557 +
558 +
/* Load from [r13+rax] into rax, with sign/zero extension. */
559 +
static void emit_load_mem_i8(struct emitter *e) {
560 +
    emit_add_rax_r13(e);
561 +
    emit_rexw(e);
562 +
    emit_u8(e, 0x0F);
563 +
    emit_u8(e, 0xBE);
564 +
    emit_u8(e, modrm(0, X_RAX, X_RAX));
565 +
}
566 +
567 +
static void emit_load_mem_u8(struct emitter *e) {
568 +
    emit_add_rax_r13(e);
569 +
    emit_u8(e, 0x0F);
570 +
    emit_u8(e, 0xB6);
571 +
    emit_u8(e, modrm(0, X_RAX, X_RAX));
572 +
}
573 +
574 +
static void emit_load_mem_i16(struct emitter *e) {
575 +
    emit_add_rax_r13(e);
576 +
    emit_rexw(e);
577 +
    emit_u8(e, 0x0F);
578 +
    emit_u8(e, 0xBF);
579 +
    emit_u8(e, modrm(0, X_RAX, X_RAX));
580 +
}
581 +
582 +
static void emit_load_mem_u16(struct emitter *e) {
583 +
    emit_add_rax_r13(e);
584 +
    emit_u8(e, 0x0F);
585 +
    emit_u8(e, 0xB7);
586 +
    emit_u8(e, modrm(0, X_RAX, X_RAX));
587 +
}
588 +
589 +
static void emit_load_mem_i32(struct emitter *e) {
590 +
    emit_add_rax_r13(e);
591 +
    emit_rexw(e);
592 +
    emit_u8(e, 0x63);
593 +
    emit_u8(e, modrm(0, X_RAX, X_RAX));
594 +
}
595 +
596 +
static void emit_load_mem_u32(struct emitter *e) {
597 +
    emit_add_rax_r13(e);
598 +
    emit_u8(e, 0x8B);
599 +
    emit_u8(e, modrm(0, X_RAX, X_RAX));
600 +
}
601 +
602 +
static void emit_load_mem_u64(struct emitter *e) {
603 +
    emit_add_rax_r13(e);
604 +
    emit_rexw(e);
605 +
    emit_u8(e, 0x8B);
606 +
    emit_u8(e, modrm(0, X_RAX, X_RAX));
607 +
}
608 +
609 +
/* Store from rcx to [r13+rax]. */
610 +
static void emit_store_mem_u8(struct emitter *e) {
611 +
    emit_add_rax_r13(e);
612 +
    emit_u8(e, 0x88);
613 +
    emit_u8(e, modrm(0, X_RCX, X_RAX));
614 +
}
615 +
616 +
static void emit_store_mem_u16(struct emitter *e) {
617 +
    emit_add_rax_r13(e);
618 +
    emit_u8(e, 0x66);
619 +
    emit_u8(e, 0x89);
620 +
    emit_u8(e, modrm(0, X_RCX, X_RAX));
621 +
}
622 +
623 +
static void emit_store_mem_u32(struct emitter *e) {
624 +
    emit_add_rax_r13(e);
625 +
    emit_u8(e, 0x89);
626 +
    emit_u8(e, modrm(0, X_RCX, X_RAX));
627 +
}
628 +
629 +
static void emit_store_mem_u64(struct emitter *e) {
630 +
    emit_add_rax_r13(e);
631 +
    emit_rexw(e);
632 +
    emit_u8(e, 0x89);
633 +
    emit_u8(e, modrm(0, X_RCX, X_RAX));
634 +
}
635 +
636 +
/* ---------- Translate one RV64I instruction ----------------------------- */
637 +
638 +
/*
639 +
 * Returns: true if the instruction ends the basic block (branch/jump/ecall),
640 +
 *          false if execution should continue to the next instruction.
641 +
 */
642 +
static bool translate_insn(struct emitter *e, instr_t ins, u32 pc) {
643 +
    u32 opcode  = ins.r.opcode;
644 +
    u32 pc_next = pc + INSTR_SIZE;
645 +
646 +
    switch (opcode) {
647 +
648 +
    case OP_LUI:
649 +
        if (ins.u.rd != 0) {
650 +
            emit_mov_imm32_sx(e, HTMP1, (i32)(ins.u.imm_31_12 << 12));
651 +
            emit_store_rd(e, ins.u.rd);
652 +
        }
653 +
        return false;
654 +
655 +
    case OP_AUIPC:
656 +
        if (ins.u.rd != 0) {
657 +
            i64 result = (i64)pc + (i64)(i32)(ins.u.imm_31_12 << 12);
658 +
            emit_mov_imm64(e, HTMP1, (u64)result);
659 +
            emit_store_rd(e, ins.u.rd);
660 +
        }
661 +
        return false;
662 +
663 +
    case OP_JAL: {
664 +
        u32 target = pc + (u32)get_j_imm(ins);
665 +
        if (ins.j.rd != 0) {
666 +
            emit_mov_imm64(e, HTMP1, (u64)pc_next);
667 +
            emit_store_guest(e, ins.j.rd, HTMP1);
668 +
        }
669 +
        emit_block_exit(e, JIT_EXIT_BRANCH, target);
670 +
        return true;
671 +
    }
672 +
673 +
    case OP_JALR: {
674 +
        i32 imm = get_i_imm(ins);
675 +
676 +
        /* Special-case RET: jalr x0, ra, 0. */
677 +
        if (ins.i.rd == 0 && ins.i.rs1 == RA && imm == 0) {
678 +
            emit_load_guest(e, HTMP1, RA);
679 +
680 +
            /* test rax, rax */
681 +
            emit_rexw(e);
682 +
            emit_u8(e, 0x85);
683 +
            emit_u8(e, modrm(3, X_RAX, X_RAX));
684 +
685 +
            u32 jnz_pos = emit_jnz_placeholder(e);
686 +
687 +
            /* RA == 0: program exit. */
688 +
            emit_block_exit(e, JIT_EXIT_RET, 0);
689 +
690 +
            /* RA != 0: compute target = RA & ~1. */
691 +
            patch_jcc(e, jnz_pos, e->pos);
692 +
            emit_rexw(e);
693 +
            emit_u8(e, 0x83);
694 +
            emit_u8(e, modrm(3, 4, X_RAX));
695 +
            emit_u8(e, 0xFE); /* and rax, ~1 */
696 +
697 +
            /* mov dword [r14], eax -- write PC */
698 +
            emit_u8(e, 0x41);
699 +
            emit_u8(e, 0x89);
700 +
            emit_u8(e, modrm(0, X_RAX, X_R14 & 7));
701 +
702 +
            emit_epilogue_with_exit(e, (u32)JIT_EXIT_BRANCH);
703 +
            return true;
704 +
        }
705 +
706 +
        /* General JALR: target = (rs1 + imm) & ~1. */
707 +
        emit_load_rs1(e, ins.i.rs1);
708 +
        emit_add_rax_imm32(e, imm);
709 +
        emit_rexw(e);
710 +
        emit_u8(e, 0x83);
711 +
        emit_u8(e, modrm(3, 4, X_RAX));
712 +
        emit_u8(e, 0xFE); /* and rax, ~1 */
713 +
714 +
        /* mov rcx, rax (save target) */
715 +
        emit_rexw(e);
716 +
        emit_u8(e, 0x89);
717 +
        emit_u8(e, modrm(3, X_RAX, X_RCX));
718 +
719 +
        if (ins.i.rd != 0) {
720 +
            emit_mov_imm64(e, HTMP1, (u64)pc_next);
721 +
            emit_store_guest(e, ins.i.rd, HTMP1);
722 +
        }
723 +
724 +
        /* mov dword [r14], ecx */
725 +
        emit_u8(e, 0x41);
726 +
        emit_u8(e, 0x89);
727 +
        emit_u8(e, modrm(0, X_RCX, X_R14 & 7));
728 +
729 +
        emit_epilogue_with_exit(e, (u32)JIT_EXIT_BRANCH);
730 +
        return true;
731 +
    }
732 +
733 +
    case OP_BRANCH: {
734 +
        u32 target = pc + (u32)get_b_imm(ins);
735 +
736 +
        emit_load_rs1(e, ins.b.rs1);
737 +
        emit_load_rs2(e, ins.b.rs2);
738 +
        emit_cmp_rax_rcx(e);
739 +
740 +
        /* jCC to .taken */
741 +
        u8 cc;
742 +
        switch (ins.b.funct3) {
743 +
        case 0x0:
744 +
            cc = 0x84;
745 +
            break; /* beq -> je */
746 +
        case 0x1:
747 +
            cc = 0x85;
748 +
            break; /* bne -> jne */
749 +
        case 0x4:
750 +
            cc = 0x8C;
751 +
            break; /* blt -> jl */
752 +
        case 0x5:
753 +
            cc = 0x8D;
754 +
            break; /* bge -> jge */
755 +
        case 0x6:
756 +
            cc = 0x82;
757 +
            break; /* bltu -> jb */
758 +
        case 0x7:
759 +
            cc = 0x83;
760 +
            break; /* bgeu -> jae */
761 +
        default:
762 +
            cc = 0x84;
763 +
            break;
764 +
        }
765 +
766 +
        u32 jcc_pos = e->pos;
767 +
        emit_u8(e, 0x0F);
768 +
        emit_u8(e, cc);
769 +
        emit_u32(e, 0);
770 +
771 +
        /* Not taken: fall through. */
772 +
        emit_block_exit(e, JIT_EXIT_BRANCH, pc_next);
773 +
774 +
        /* .taken: */
775 +
        patch_jcc(e, jcc_pos, e->pos);
776 +
        emit_block_exit(e, JIT_EXIT_BRANCH, target);
777 +
        return true;
778 +
    }
779 +
780 +
    case OP_LOAD: {
781 +
        i32 imm = get_i_imm(ins);
782 +
        if (ins.i.rd == 0)
783 +
            return false;
784 +
785 +
        emit_load_rs1(e, ins.i.rs1);
786 +
        emit_add_rax_imm32(e, imm);
787 +
788 +
        switch (ins.i.funct3) {
789 +
        case 0x0:
790 +
            emit_load_mem_i8(e);
791 +
            break; /* lb */
792 +
        case 0x1:
793 +
            emit_load_mem_i16(e);
794 +
            break; /* lh */
795 +
        case 0x2:
796 +
            emit_load_mem_i32(e);
797 +
            break; /* lw */
798 +
        case 0x3:
799 +
            emit_load_mem_u64(e);
800 +
            break; /* ld */
801 +
        case 0x4:
802 +
            emit_load_mem_u8(e);
803 +
            break; /* lbu */
804 +
        case 0x5:
805 +
            emit_load_mem_u16(e);
806 +
            break; /* lhu */
807 +
        case 0x6:
808 +
            emit_load_mem_u32(e);
809 +
            break; /* lwu */
810 +
        default:
811 +
            emit_block_exit(e, JIT_EXIT_FAULT, pc);
812 +
            return true;
813 +
        }
814 +
        emit_store_rd(e, ins.i.rd);
815 +
        return false;
816 +
    }
817 +
818 +
    case OP_STORE: {
819 +
        i32 imm = get_s_imm(ins);
820 +
        emit_load_guest(e, HTMP1, ins.s.rs1);
821 +
        emit_add_rax_imm32(e, imm);
822 +
        emit_load_guest(e, HTMP2, ins.s.rs2);
823 +
824 +
        switch (ins.s.funct3) {
825 +
        case 0x0:
826 +
            emit_store_mem_u8(e);
827 +
            break; /* sb */
828 +
        case 0x1:
829 +
            emit_store_mem_u16(e);
830 +
            break; /* sh */
831 +
        case 0x2:
832 +
            emit_store_mem_u32(e);
833 +
            break; /* sw */
834 +
        case 0x3:
835 +
            emit_store_mem_u64(e);
836 +
            break; /* sd */
837 +
        default:
838 +
            emit_block_exit(e, JIT_EXIT_FAULT, pc);
839 +
            return true;
840 +
        }
841 +
        return false;
842 +
    }
843 +
844 +
    case OP_IMM: {
845 +
        i32 imm = get_i_imm(ins);
846 +
        if (ins.i.rd == 0)
847 +
            return false;
848 +
849 +
        emit_load_rs1(e, ins.i.rs1);
850 +
851 +
        switch (ins.i.funct3) {
852 +
        case 0x0: /* addi */
853 +
            emit_add_rax_imm32(e, imm);
854 +
            break;
855 +
        case 0x1: /* slli */
856 +
            emit_rexw(e);
857 +
            emit_u8(e, 0xC1);
858 +
            emit_u8(e, modrm(3, 4, X_RAX));
859 +
            emit_u8(e, (u8)(imm & 0x3F));
860 +
            break;
861 +
        case 0x2: /* slti */
862 +
            emit_mov_imm32_sx(e, HTMP2, imm);
863 +
            emit_cmp_rax_rcx(e);
864 +
            emit_setcc_rax(e, 0x9C); /* setl */
865 +
            break;
866 +
        case 0x3: /* sltiu */
867 +
            emit_mov_imm32_sx(e, HTMP2, imm);
868 +
            emit_cmp_rax_rcx(e);
869 +
            emit_setcc_rax(e, 0x92); /* setb */
870 +
            break;
871 +
        case 0x4: /* xori */
872 +
            emit_mov_imm32_sx(e, HTMP2, imm);
873 +
            emit_alu_rax_rcx(e, 0x31); /* xor */
874 +
            break;
875 +
        case 0x5: /* srli/srai */
876 +
            emit_rexw(e);
877 +
            emit_u8(e, 0xC1);
878 +
            emit_u8(e, modrm(3, (imm & 0x400) ? 7 : 5, X_RAX));
879 +
            emit_u8(e, (u8)(imm & 0x3F));
880 +
            break;
881 +
        case 0x6: /* ori */
882 +
            emit_mov_imm32_sx(e, HTMP2, imm);
883 +
            emit_alu_rax_rcx(e, 0x09); /* or */
884 +
            break;
885 +
        case 0x7: /* andi */
886 +
            emit_mov_imm32_sx(e, HTMP2, imm);
887 +
            emit_alu_rax_rcx(e, 0x21); /* and */
888 +
            break;
889 +
        }
890 +
        emit_store_rd(e, ins.i.rd);
891 +
        return false;
892 +
    }
893 +
894 +
    case OP_IMM_32: {
895 +
        i32 imm = get_i_imm(ins);
896 +
        if (ins.i.rd == 0)
897 +
            return false;
898 +
899 +
        emit_load_rs1(e, ins.i.rs1);
900 +
901 +
        switch (ins.i.funct3) {
902 +
        case 0x0: /* addiw */
903 +
            emit_add_rax_imm32(e, imm);
904 +
            break;
905 +
        case 0x1: /* slliw */
906 +
            emit_u8(e, 0xC1);
907 +
            emit_u8(e, modrm(3, 4, X_RAX));
908 +
            emit_u8(e, (u8)(imm & 0x1F));
909 +
            break;
910 +
        case 0x5: /* srliw/sraiw */
911 +
            emit_u8(e, 0xC1);
912 +
            emit_u8(e, modrm(3, (imm & 0x400) ? 7 : 5, X_RAX));
913 +
            emit_u8(e, (u8)(imm & 0x1F));
914 +
            break;
915 +
        }
916 +
        emit_movsxd_rax_eax(e);
917 +
        emit_store_rd(e, ins.i.rd);
918 +
        return false;
919 +
    }
920 +
921 +
    case OP_OP: {
922 +
        if (ins.r.rd == 0)
923 +
            return false;
924 +
        emit_load_rs1(e, ins.r.rs1);
925 +
        emit_load_rs2(e, ins.r.rs2);
926 +
927 +
        switch (ins.r.funct7) {
928 +
        case FUNCT7_NORMAL:
929 +
            switch (ins.r.funct3) {
930 +
            case 0x0:
931 +
                emit_alu_rax_rcx(e, 0x01);
932 +
                break; /* add */
933 +
            case 0x1:  /* sll */
934 +
                emit_and_ecx_imm8(e, 0x3F);
935 +
                emit_shift_rax_cl(e, 4);
936 +
                break;
937 +
            case 0x2: /* slt */
938 +
                emit_cmp_rax_rcx(e);
939 +
                emit_setcc_rax(e, 0x9C); /* setl */
940 +
                break;
941 +
            case 0x3: /* sltu */
942 +
                emit_cmp_rax_rcx(e);
943 +
                emit_setcc_rax(e, 0x92); /* setb */
944 +
                break;
945 +
            case 0x4:
946 +
                emit_alu_rax_rcx(e, 0x31);
947 +
                break; /* xor */
948 +
            case 0x5:  /* srl */
949 +
                emit_and_ecx_imm8(e, 0x3F);
950 +
                emit_shift_rax_cl(e, 5);
951 +
                break;
952 +
            case 0x6:
953 +
                emit_alu_rax_rcx(e, 0x09);
954 +
                break; /* or */
955 +
            case 0x7:
956 +
                emit_alu_rax_rcx(e, 0x21);
957 +
                break; /* and */
958 +
            }
959 +
            break;
960 +
961 +
        case FUNCT7_SUB:
962 +
            switch (ins.r.funct3) {
963 +
            case 0x0:
964 +
                emit_alu_rax_rcx(e, 0x29);
965 +
                break; /* sub */
966 +
            case 0x5:  /* sra */
967 +
                emit_and_ecx_imm8(e, 0x3F);
968 +
                emit_shift_rax_cl(e, 7);
969 +
                break;
970 +
            }
971 +
            break;
972 +
973 +
        case FUNCT7_MUL:
974 +
            switch (ins.r.funct3) {
975 +
            case 0x0:
976 +
                emit_imul_rax_rcx(e);
977 +
                break; /* mul */
978 +
            case 0x4:
979 +
                emit_div64(e, true, false);
980 +
                break; /* div */
981 +
            case 0x5:
982 +
                emit_div64(e, false, false);
983 +
                break; /* divu */
984 +
            case 0x6:
985 +
                emit_div64(e, true, true);
986 +
                break; /* rem */
987 +
            case 0x7:
988 +
                emit_div64(e, false, true);
989 +
                break; /* remu */
990 +
            }
991 +
            break;
992 +
        }
993 +
        emit_store_rd(e, ins.r.rd);
994 +
        return false;
995 +
    }
996 +
997 +
    case OP_OP_32: {
998 +
        if (ins.r.rd == 0)
999 +
            return false;
1000 +
        emit_load_rs1(e, ins.r.rs1);
1001 +
        emit_load_rs2(e, ins.r.rs2);
1002 +
1003 +
        switch (ins.r.funct7) {
1004 +
        case FUNCT7_NORMAL:
1005 +
            switch (ins.r.funct3) {
1006 +
            case 0x0: /* addw */
1007 +
                emit_u8(e, 0x01);
1008 +
                emit_u8(e, modrm(3, X_RCX, X_RAX));
1009 +
                break;
1010 +
            case 0x1: /* sllw */
1011 +
                emit_and_ecx_imm8(e, 0x1F);
1012 +
                emit_u8(e, 0xD3);
1013 +
                emit_u8(e, modrm(3, 4, X_RAX));
1014 +
                break;
1015 +
            case 0x5: /* srlw */
1016 +
                emit_and_ecx_imm8(e, 0x1F);
1017 +
                emit_u8(e, 0xD3);
1018 +
                emit_u8(e, modrm(3, 5, X_RAX));
1019 +
                break;
1020 +
            }
1021 +
            break;
1022 +
1023 +
        case FUNCT7_SUB:
1024 +
            switch (ins.r.funct3) {
1025 +
            case 0x0: /* subw */
1026 +
                emit_u8(e, 0x29);
1027 +
                emit_u8(e, modrm(3, X_RCX, X_RAX));
1028 +
                break;
1029 +
            case 0x5: /* sraw */
1030 +
                emit_and_ecx_imm8(e, 0x1F);
1031 +
                emit_u8(e, 0xD3);
1032 +
                emit_u8(e, modrm(3, 7, X_RAX));
1033 +
                break;
1034 +
            }
1035 +
            break;
1036 +
1037 +
        case FUNCT7_MUL:
1038 +
            switch (ins.r.funct3) {
1039 +
            case 0x0: /* mulw */
1040 +
                emit_u8(e, 0x0F);
1041 +
                emit_u8(e, 0xAF);
1042 +
                emit_u8(e, modrm(3, X_RAX, X_RCX));
1043 +
                break;
1044 +
            case 0x4:
1045 +
                emit_div32(e, true, false);
1046 +
                break; /* divw */
1047 +
            case 0x5:
1048 +
                emit_div32(e, false, false);
1049 +
                break; /* divuw */
1050 +
            case 0x6:
1051 +
                emit_div32(e, true, true);
1052 +
                break; /* remw */
1053 +
            case 0x7:
1054 +
                emit_div32(e, false, true);
1055 +
                break; /* remuw */
1056 +
            }
1057 +
            break;
1058 +
        }
1059 +
        emit_movsxd_rax_eax(e);
1060 +
        emit_store_rd(e, ins.r.rd);
1061 +
        return false;
1062 +
    }
1063 +
1064 +
    case OP_SYSTEM: {
1065 +
        u32 funct12 = ins.i.imm_11_0;
1066 +
        if (funct12 == 0) {
1067 +
            emit_block_exit(e, JIT_EXIT_ECALL, pc);
1068 +
        } else if (funct12 == 1) {
1069 +
            emit_block_exit(e, JIT_EXIT_EBREAK, pc);
1070 +
        } else {
1071 +
            emit_block_exit(e, JIT_EXIT_FAULT, pc);
1072 +
        }
1073 +
        return true;
1074 +
    }
1075 +
1076 +
    case OP_FENCE:
1077 +
        return false;
1078 +
1079 +
    default:
1080 +
        emit_block_exit(e, JIT_EXIT_FAULT, pc);
1081 +
        return true;
1082 +
    }
1083 +
}
1084 +
1085 +
/* ---------- Block compiler --------------------------------------------- */
1086 +
1087 +
static struct jit_block *jit_alloc_block(struct jit_state *jit) {
1088 +
    if (jit->block_count >= JIT_MAX_BLOCKS)
1089 +
        return NULL;
1090 +
    return &jit->blocks[jit->block_count++];
1091 +
}
1092 +
1093 +
static void jit_insert_block(struct jit_state *jit, struct jit_block *block) {
1094 +
    u32 h              = (block->guest_pc >> 2) & (JIT_BLOCK_HASH_SIZE - 1);
1095 +
    block->hash_next   = jit->block_hash[h];
1096 +
    jit->block_hash[h] = block;
1097 +
}
1098 +
1099 +
struct jit_block *jit_compile_block(
1100 +
    struct jit_state *jit, u32 guest_pc, u8 *mem, u32 prog_base, u32 prog_bytes
1101 +
) {
1102 +
1103 +
#define JIT_EMIT_BUF_SIZE (JIT_MAX_BLOCK_INSNS * JIT_MAX_INSN_BYTES + 256)
1104 +
    static u8 emit_buf[JIT_EMIT_BUF_SIZE];
1105 +
1106 +
    struct emitter em = {
1107 +
        .buf      = emit_buf,
1108 +
        .pos      = 0,
1109 +
        .capacity = JIT_EMIT_BUF_SIZE,
1110 +
        .overflow = false,
1111 +
    };
1112 +
1113 +
    emit_prologue(&em);
1114 +
1115 +
    u32 pc          = guest_pc;
1116 +
    u32 insn_count  = 0;
1117 +
    u32 prog_end_pc = prog_base + prog_bytes;
1118 +
1119 +
    while (insn_count < JIT_MAX_BLOCK_INSNS) {
1120 +
        if (pc < prog_base || pc >= prog_end_pc) {
1121 +
            emit_block_exit(&em, JIT_EXIT_FAULT, pc);
1122 +
            break;
1123 +
        }
1124 +
        instr_t ins;
1125 +
        memcpy(&ins, &mem[pc], sizeof(instr_t));
1126 +
        insn_count++;
1127 +
1128 +
        bool ends_block  = translate_insn(&em, ins, pc);
1129 +
        pc              += INSTR_SIZE;
1130 +
1131 +
        if (ends_block)
1132 +
            break;
1133 +
1134 +
        if (em.pos + JIT_MAX_INSN_BYTES + 64 > em.capacity) {
1135 +
            emit_block_exit(&em, JIT_EXIT_BRANCH, pc);
1136 +
            break;
1137 +
        }
1138 +
    }
1139 +
1140 +
    if (insn_count >= JIT_MAX_BLOCK_INSNS) {
1141 +
        emit_block_exit(&em, JIT_EXIT_BRANCH, pc);
1142 +
    }
1143 +
1144 +
    if (em.overflow)
1145 +
        return NULL;
1146 +
1147 +
    u32 code_size = em.pos;
1148 +
    if (jit->code_cache_used + code_size > JIT_CODE_CACHE_SIZE)
1149 +
        return NULL;
1150 +
1151 +
    u8 *dest = jit->code_cache + jit->code_cache_used;
1152 +
    memcpy(dest, emit_buf, code_size);
1153 +
    jit->code_cache_used += code_size;
1154 +
1155 +
    struct jit_block *block = jit_alloc_block(jit);
1156 +
    if (!block)
1157 +
        return NULL;
1158 +
1159 +
    block->guest_pc     = guest_pc;
1160 +
    block->guest_end_pc = pc;
1161 +
    block->insn_count   = insn_count;
1162 +
    block->code         = dest;
1163 +
    block->code_size    = code_size;
1164 +
    block->hash_next    = NULL;
1165 +
1166 +
    jit_insert_block(jit, block);
1167 +
    jit->blocks_compiled++;
1168 +
1169 +
    return block;
1170 +
}
1171 +
1172 +
/* ---------- Init / Destroy / Flush ------------------------------------ */
1173 +
1174 +
bool jit_init(struct jit_state *jit) {
1175 +
    memset(jit, 0, sizeof(*jit));
1176 +
1177 +
#if defined(__x86_64__) || defined(_M_X64)
1178 +
    jit->code_cache = mmap(
1179 +
        NULL,
1180 +
        JIT_CODE_CACHE_SIZE,
1181 +
        PROT_READ | PROT_WRITE | PROT_EXEC,
1182 +
        MAP_PRIVATE | MAP_ANONYMOUS,
1183 +
        -1,
1184 +
        0
1185 +
    );
1186 +
    if (jit->code_cache == MAP_FAILED) {
1187 +
        jit->code_cache = NULL;
1188 +
        jit->available  = false;
1189 +
        return false;
1190 +
    }
1191 +
    jit->available = true;
1192 +
    return true;
1193 +
#else
1194 +
    jit->available = false;
1195 +
    return false;
1196 +
#endif
1197 +
}
1198 +
1199 +
void jit_destroy(struct jit_state *jit) {
1200 +
    if (jit->code_cache) {
1201 +
        munmap(jit->code_cache, JIT_CODE_CACHE_SIZE);
1202 +
        jit->code_cache = NULL;
1203 +
    }
1204 +
    jit->block_count = 0;
1205 +
}
1206 +
1207 +
void jit_flush(struct jit_state *jit) {
1208 +
    memset(jit->block_hash, 0, sizeof(jit->block_hash));
1209 +
    jit->block_count     = 0;
1210 +
    jit->code_cache_used = 0;
1211 +
}
jit.h added +152 -0
1 +
/*
2 +
 * JIT compiler for RISC-V RV64I -> x86-64.
3 +
 *
4 +
 * Translates basic blocks of RISC-V instructions into native x86-64 machine
5 +
 * code.  The generated code operates on the guest CPU state structure directly
6 +
 * and falls back to the interpreter for ecalls, ebreak, and faults.
7 +
 *
8 +
 * Design follows QEMU's TCG approach: translate on first encounter, cache the
9 +
 * result, and execute the native code on subsequent visits.
10 +
 */
11 +
#ifndef JIT_H
12 +
#define JIT_H
13 +
14 +
#include "riscv.h"
15 +
#include "types.h"
16 +
#include <stdbool.h>
17 +
18 +
/* Forward declarations. */
19 +
struct cpu;
20 +
21 +
/* ---------------------------------------------------------------------------
22 +
 * Tuning constants
23 +
 * ------------------------------------------------------------------------- */
24 +
25 +
/* Size of the JIT code cache (16 MB). */
26 +
#define JIT_CODE_CACHE_SIZE (16 * 1024 * 1024)
27 +
28 +
/* Maximum number of x86-64 bytes emitted per guest instruction.
29 +
 * Conservative upper bound -- most instructions compile to <30 bytes. */
30 +
#define JIT_MAX_INSN_BYTES 128
31 +
32 +
/* Maximum guest instructions in a single translated block. */
33 +
#define JIT_MAX_BLOCK_INSNS 256
34 +
35 +
/* Number of hash-table buckets for block lookup (must be power of 2). */
36 +
#define JIT_BLOCK_HASH_SIZE (64 * 1024)
37 +
38 +
/* Maximum number of translated blocks. */
39 +
#define JIT_MAX_BLOCKS (256 * 1024)
40 +
41 +
/* Exit reasons returned by generated code. */
42 +
enum jit_exit {
43 +
    JIT_EXIT_BRANCH = 0, /* End of block -- branch/jump, update PC. */
44 +
    JIT_EXIT_ECALL  = 1, /* ECALL -- needs interpreter handling. */
45 +
    JIT_EXIT_EBREAK = 2, /* EBREAK -- needs interpreter handling. */
46 +
    JIT_EXIT_FAULT  = 3, /* Memory fault -- needs interpreter handling. */
47 +
    JIT_EXIT_CHAIN  = 4, /* Chain to next block (PC already set). */
48 +
    JIT_EXIT_RET    = 5, /* Program returned (jalr x0, ra, 0 with ra==0). */
49 +
};
50 +
51 +
/* A translated block of native code. */
52 +
struct jit_block {
53 +
    u32               guest_pc;     /* Guest PC this block starts at. */
54 +
    u32               guest_end_pc; /* Guest PC *after* last instruction. */
55 +
    u32               insn_count; /* Number of guest instructions translated.*/
56 +
    u8               *code;       /* Pointer into code cache. */
57 +
    u32               code_size;  /* Size of generated x86-64 code. */
58 +
    struct jit_block *hash_next;  /* Hash chain. */
59 +
};
60 +
61 +
/* Complete JIT state. */
62 +
struct jit_state {
63 +
    /* Executable code cache (mmap'd RWX). */
64 +
    u8 *code_cache;
65 +
    u32 code_cache_used;
66 +
67 +
    /* Block hash table (open-chaining). */
68 +
    struct jit_block *block_hash[JIT_BLOCK_HASH_SIZE];
69 +
70 +
    /* Block storage (static pool). */
71 +
    struct jit_block blocks[JIT_MAX_BLOCKS];
72 +
    u32              block_count;
73 +
74 +
    /* Statistics. */
75 +
    u64 blocks_compiled;
76 +
    u64 blocks_executed;
77 +
    u64 insns_executed;
78 +
79 +
    /* Whether JIT is available (mmap succeeded, x86-64 host). */
80 +
    bool available;
81 +
};
82 +
83 +
/* ---------------------------------------------------------------------------
84 +
 * API
85 +
 * ------------------------------------------------------------------------- */
86 +
87 +
/* Initialise the JIT.  Returns true on success.
88 +
 * On non-x86-64 hosts or if mmap fails, returns false and the emulator
89 +
 * should fall back to the interpreter. */
90 +
bool jit_init(struct jit_state *jit);
91 +
92 +
/* Release JIT resources. */
93 +
void jit_destroy(struct jit_state *jit);
94 +
95 +
/* Flush the entire translation cache (e.g. after self-modifying code). */
96 +
void jit_flush(struct jit_state *jit);
97 +
98 +
/* Compile (but don't look up) a block starting at `guest_pc`.
99 +
 * Returns NULL if the code cache is full or compilation fails. */
100 +
struct jit_block *jit_compile_block(
101 +
    struct jit_state *jit,
102 +
    u32               guest_pc,
103 +
    u8               *memory,
104 +
    u32               program_base,
105 +
    u32               program_bytes
106 +
);
107 +
108 +
/* Inline fast-path block lookup (hash table probe). */
109 +
static inline struct jit_block *jit_lookup_block(
110 +
    struct jit_state *jit, u32 guest_pc
111 +
) {
112 +
    u32               h = (guest_pc >> 2) & (JIT_BLOCK_HASH_SIZE - 1);
113 +
    struct jit_block *b = jit->block_hash[h];
114 +
    while (b) {
115 +
        if (b->guest_pc == guest_pc)
116 +
            return b;
117 +
        b = b->hash_next;
118 +
    }
119 +
    return NULL;
120 +
}
121 +
122 +
/* Look up or compile a block.  Calls the inline lookup first, falls
123 +
 * back to the compiler on a miss. */
124 +
static inline struct jit_block *jit_get_block(
125 +
    struct jit_state *jit,
126 +
    u32               guest_pc,
127 +
    u8               *memory,
128 +
    u32               program_base,
129 +
    u32               program_bytes
130 +
) {
131 +
    struct jit_block *b = jit_lookup_block(jit, guest_pc);
132 +
    if (b)
133 +
        return b;
134 +
    return jit_compile_block(
135 +
        jit, guest_pc, memory, program_base, program_bytes
136 +
    );
137 +
}
138 +
139 +
/* Signature of generated block entry points.
140 +
 * Args: pointer to cpu->regs[], pointer to memory[].
141 +
 * Returns: exit reason (enum jit_exit). */
142 +
typedef int (*jit_block_fn)(u64 *regs, u8 *memory, u32 *pc_out);
143 +
144 +
/* Execute a translated block.  Returns the exit reason. */
145 +
static inline int jit_exec_block(
146 +
    struct jit_block *block, u64 *regs, u8 *memory, u32 *pc_out
147 +
) {
148 +
    jit_block_fn fn = (jit_block_fn)(void *)block->code;
149 +
    return fn(regs, memory, pc_out);
150 +
}
151 +
152 +
#endif /* JIT_H */
riscv.c added +280 -0
1 +
/* RISC-V 64-bit (RV64I) instruction builder. */
2 +
#include <stdlib.h>
3 +
4 +
#include "riscv.h"
5 +
#include "types.h"
6 +
7 +
const char *reg_names[] = {
8 +
    "zero", "ra", "sp", "gp", "tp",  "t0",  "t1", "t2", "fp", "s1", "a0",
9 +
    "a1",   "a2", "a3", "a4", "a5",  "a6",  "a7", "s2", "s3", "s4", "s5",
10 +
    "s6",   "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"
11 +
};
12 +
13 +
const bool caller_saved_registers[] = {
14 +
    false, false, false, false, false, true,  true, true,  false, false, true,
15 +
    true,  true,  true,  true,  true,  true,  true, false, false, false, false,
16 +
    false, false, false, false, false, false, true, true,  true,  true
17 +
};
18 +
19 +
const reg_t temp_registers[] = { T1, T2, T3, T4, T5, T6 };
20 +
21 +
i32 sign_extend(u32 value, int bit_width) {
22 +
    if ((value >> (bit_width - 1)) & 1) {
23 +
        /* Sign bit is 1, so extend with 1s. */
24 +
        return (i32)(value | (~0u << bit_width));
25 +
    }
26 +
    return (i32)value;
27 +
}
28 +
29 +
i32 align(i32 size, i32 alignment) {
30 +
    /* Verify alignment is a power of 2. */
31 +
    /* This rounds up to the next multiple of alignment. */
32 +
    return (size + alignment - 1) & ~(alignment - 1);
33 +
}
34 +
35 +
/* Creates an I-type instruction struct.
36 +
 * Used for immediate operations like ADDI, SLTI and loads like LW. */
37 +
static instr_t instr_i(
38 +
    opcode_t opcode, funct3_t fn3, reg_t rd, reg_t rs1, i32 imm
39 +
) {
40 +
    return (instr_t){ .i = { .opcode   = opcode,
41 +
                             .rd       = rd,
42 +
                             .rs1      = rs1,
43 +
                             .funct3   = fn3,
44 +
                             .imm_11_0 = (u32)imm } };
45 +
}
46 +
47 +
/* Creates a U-type instruction struct.
48 +
 * Used for LUI (Load Upper Immediate) and AUIPC. */
49 +
static instr_t instr_u(opcode_t opcode, reg_t rd, i32 imm) {
50 +
    return (instr_t){ .u = {
51 +
                          .opcode    = opcode,
52 +
                          .rd        = rd,
53 +
                          .imm_31_12 = (u32)imm & 0xFFFFF,
54 +
                      } };
55 +
}
56 +
57 +
/* Creates an R-type instruction struct.
58 +
 * Used for register-register operations like ADD, SUB, AND, OR. */
59 +
static instr_t instr_r(
60 +
    opcode_t opcode, funct3_t fn3, funct7_t fn7, reg_t rd, reg_t rs1, reg_t rs2
61 +
) {
62 +
    return (instr_t){ .r = { .opcode = opcode,
63 +
                             .rd     = rd,
64 +
                             .rs1    = rs1,
65 +
                             .rs2    = rs2,
66 +
                             .funct3 = fn3,
67 +
                             .funct7 = fn7 } };
68 +
}
69 +
70 +
/* Creates an S-type instruction struct.
71 +
 * Used for store instructions like SW, SH, SB. */
72 +
static instr_t instr_s(
73 +
    opcode_t opcode, funct3_t fn3, reg_t rs1, reg_t rs2, i32 imm
74 +
) {
75 +
    return (instr_t){ .s = { .opcode   = opcode,
76 +
                             .rs1      = rs1,
77 +
                             .rs2      = rs2,
78 +
                             .funct3   = fn3,
79 +
                             .imm_4_0  = (u32)imm & 0x1F,
80 +
                             .imm_11_5 = ((u32)imm >> 5) & 0x7F } };
81 +
}
82 +
83 +
/* Creates an SB-type (branch) instruction struct.
84 +
 * Modified S-type used for conditional branches like BEQ, BNE. */
85 +
static instr_t instr_sb(
86 +
    opcode_t opcode, funct3_t fn3, reg_t rs1, reg_t rs2, i32 imm
87 +
) {
88 +
    return (instr_t){ .b = { .opcode   = opcode,
89 +
                             .rs1      = rs1,
90 +
                             .rs2      = rs2,
91 +
                             .funct3   = fn3,
92 +
                             .imm_11   = (imm >> 11) & 0x1,
93 +
                             .imm_4_1  = (imm >> 1) & 0xF,
94 +
                             .imm_10_5 = (imm >> 5) & 0x3F,
95 +
                             .imm_12   = (imm >> 12) & 0x1 } };
96 +
}
97 +
98 +
/* Creates a UJ-type (jump) instruction struct.
99 +
 * Modified U-type used for JAL instruction. */
100 +
static instr_t instr_uj(opcode_t opcode, reg_t rd, i32 imm) {
101 +
    return (instr_t){ .j = { .opcode    = opcode,
102 +
                             .rd        = rd,
103 +
                             .imm_20    = (imm >> 20) & 0x1,
104 +
                             .imm_10_1  = (imm >> 1) & 0x3FF,
105 +
                             .imm_11    = (imm >> 11) & 0x1,
106 +
                             .imm_19_12 = (imm >> 12) & 0xFF } };
107 +
}
108 +
109 +
/* Instruction definitions table. */
110 +
/* Maps each instruction to its parameters for easy lookup. */
111 +
typedef struct {
112 +
    ifmt_t   type;    /* Instruction type */
113 +
    opcode_t opcode;  /* Opcode value */
114 +
    funct3_t funct3;  /* Function3 value (if applicable) */
115 +
    funct7_t funct7;  /* Function7 value (if applicable) */
116 +
    bool     special; /* Special handling flag */
117 +
} idef_t;
118 +
119 +
/* Instruction definition table */
120 +
static const idef_t idefs[] = {
121 +
    /* Upper immediate instructions. */
122 +
    [I_LUI]   = { IFMT_U, OP_LUI, 0, 0, true },
123 +
    [I_AUIPC] = { IFMT_U, OP_AUIPC, 0, 0, true },
124 +
    /* Jump instructions. */
125 +
    [I_JAL]  = { IFMT_J, OP_JAL, 0, 0, true },
126 +
    [I_JALR] = { IFMT_I, OP_JALR, 0, 0, false },
127 +
    /* Branch instructions. */
128 +
    [I_BEQ]  = { IFMT_B, OP_BRANCH, FUNCT3_BYTE, 0, false },
129 +
    [I_BNE]  = { IFMT_B, OP_BRANCH, FUNCT3_HALF, 0, false },
130 +
    [I_BLT]  = { IFMT_B, OP_BRANCH, FUNCT3_BYTE_U, 0, false },
131 +
    [I_BGE]  = { IFMT_B, OP_BRANCH, FUNCT3_HALF_U, 0, false },
132 +
    [I_BLTU] = { IFMT_B, OP_BRANCH, FUNCT3_OR, 0, false },
133 +
    [I_BGEU] = { IFMT_B, OP_BRANCH, FUNCT3_AND, 0, false },
134 +
    /* Load instructions. */
135 +
    [I_LB]  = { IFMT_I, OP_LOAD, FUNCT3_BYTE, 0, false },
136 +
    [I_LH]  = { IFMT_I, OP_LOAD, FUNCT3_HALF, 0, false },
137 +
    [I_LW]  = { IFMT_I, OP_LOAD, FUNCT3_WORD, 0, false },
138 +
    [I_LBU] = { IFMT_I, OP_LOAD, FUNCT3_BYTE_U, 0, false },
139 +
    [I_LHU] = { IFMT_I, OP_LOAD, FUNCT3_HALF_U, 0, false },
140 +
    /* Store instructions. */
141 +
    [I_SB] = { IFMT_S, OP_STORE, FUNCT3_BYTE, 0, false },
142 +
    [I_SH] = { IFMT_S, OP_STORE, FUNCT3_HALF, 0, false },
143 +
    [I_SW] = { IFMT_S, OP_STORE, FUNCT3_WORD, 0, false },
144 +
    /* ALU immediate operations. */
145 +
    [I_ADDI]  = { IFMT_I, OP_IMM, FUNCT3_ADD, 0, false },
146 +
    [I_SLTI]  = { IFMT_I, OP_IMM, FUNCT3_SLT, 0, false },
147 +
    [I_SLTIU] = { IFMT_I, OP_IMM, FUNCT3_SLTU, 0, false },
148 +
    [I_XORI]  = { IFMT_I, OP_IMM, FUNCT3_XOR, 0, false },
149 +
    [I_ORI]   = { IFMT_I, OP_IMM, FUNCT3_OR, 0, false },
150 +
    [I_ANDI]  = { IFMT_I, OP_IMM, FUNCT3_AND, 0, false },
151 +
    [I_SLLI]  = { IFMT_I, OP_IMM, FUNCT3_SLL, 0, true },
152 +
    [I_SRLI]  = { IFMT_I, OP_IMM, FUNCT3_SRL, 0, true },
153 +
    [I_SRAI]  = { IFMT_I, OP_IMM, FUNCT3_SRL, 0, true },
154 +
    /* ALU register operations. */
155 +
    [I_ADD]  = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_NORMAL, false },
156 +
    [I_SUB]  = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_SUB, false },
157 +
    [I_SLL]  = { IFMT_R, OP_OP, FUNCT3_SLL, FUNCT7_NORMAL, false },
158 +
    [I_SLT]  = { IFMT_R, OP_OP, FUNCT3_SLT, FUNCT7_NORMAL, false },
159 +
    [I_SLTU] = { IFMT_R, OP_OP, FUNCT3_SLTU, FUNCT7_NORMAL, false },
160 +
    [I_XOR]  = { IFMT_R, OP_OP, FUNCT3_XOR, FUNCT7_NORMAL, false },
161 +
    [I_SRL]  = { IFMT_R, OP_OP, FUNCT3_SRL, FUNCT7_NORMAL, false },
162 +
    [I_AND]  = { IFMT_R, OP_OP, FUNCT3_AND, FUNCT7_NORMAL, false },
163 +
    [I_OR]   = { IFMT_R, OP_OP, FUNCT3_OR, FUNCT7_NORMAL, false },
164 +
    /* M extension - multiply and divide. */
165 +
    [I_MUL]    = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_MUL, false },
166 +
    [I_MULH]   = { IFMT_R, OP_OP, FUNCT3_SLL, FUNCT7_MUL, false },
167 +
    [I_MULHSU] = { IFMT_R, OP_OP, FUNCT3_SLT, FUNCT7_MUL, false },
168 +
    [I_MULHU]  = { IFMT_R, OP_OP, FUNCT3_SLTU, FUNCT7_MUL, false },
169 +
    [I_DIV]    = { IFMT_R, OP_OP, FUNCT3_XOR, FUNCT7_MUL, false },
170 +
    [I_DIVU]   = { IFMT_R, OP_OP, FUNCT3_SRL, FUNCT7_MUL, false },
171 +
    [I_REM]    = { IFMT_R, OP_OP, FUNCT3_OR, FUNCT7_MUL, false },
172 +
    [I_REMU]   = { IFMT_R, OP_OP, FUNCT3_AND, FUNCT7_MUL, false },
173 +
    /* Pseudo-instructions. */
174 +
    [I_MV]  = { IFMT_I, OP_IMM, FUNCT3_ADD, 0, true },
175 +
    [I_JMP] = { IFMT_J, OP_JAL, 0, 0, true },
176 +
    [I_NOP] = { IFMT_I, OP_IMM, FUNCT3_ADD, 0, true },
177 +
    [I_NOT] = { IFMT_I, OP_IMM, FUNCT3_XOR, 0, true },
178 +
    [I_NEG] = { IFMT_R, OP_OP, FUNCT3_ADD, FUNCT7_SUB, true },
179 +
    /* System instructions */
180 +
    [I_EBREAK] = { IFMT_I, OP_SYSTEM, 0, 0, true },
181 +
    [I_ECALL]  = { IFMT_I, OP_SYSTEM, 0, 0, true },
182 +
    /* F Extension floating-point instructions */
183 +
    [I_FADD_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FADD_S, false },
184 +
    [I_FSUB_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FSUB_S, false },
185 +
    [I_FMUL_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FMUL_S, false },
186 +
    [I_FDIV_S] = { IFMT_R, OP_OP_FP, FUNCT3_ADD, FUNCT7_FDIV_S, false },
187 +
    [I_FEQ_S]  = { IFMT_R, OP_OP_FP, FUNCT3_FEQ, FUNCT7_FEQ_S, false },
188 +
    [I_FLT_S]  = { IFMT_R, OP_OP_FP, FUNCT3_FLT, FUNCT7_FLT_S, false },
189 +
    [I_FLE_S]  = { IFMT_R, OP_OP_FP, FUNCT3_FLE, FUNCT7_FLE_S, false },
190 +
    [I_FLW]    = { IFMT_I, OP_LOAD_FP, FUNCT3_WORD_FP, 0, false },
191 +
    [I_FSW]    = { IFMT_S, OP_STORE_FP, FUNCT3_WORD_FP, 0, false },
192 +
    /* RV64I load/store */
193 +
    [I_LWU] = { IFMT_I, OP_LOAD, FUNCT3_WORD_U, 0, false },
194 +
    [I_LD]  = { IFMT_I, OP_LOAD, FUNCT3_DOUBLE, 0, false },
195 +
    [I_SD]  = { IFMT_S, OP_STORE, FUNCT3_DOUBLE, 0, false },
196 +
    /* RV64I immediate W-ops */
197 +
    [I_ADDIW] = { IFMT_I, OP_IMM_32, FUNCT3_ADD, 0, false },
198 +
    [I_SLLIW] = { IFMT_I, OP_IMM_32, FUNCT3_SLL, 0, true },
199 +
    [I_SRLIW] = { IFMT_I, OP_IMM_32, FUNCT3_SRL, 0, true },
200 +
    [I_SRAIW] = { IFMT_I, OP_IMM_32, FUNCT3_SRL, 0, true },
201 +
    /* RV64I register W-ops */
202 +
    [I_ADDW] = { IFMT_R, OP_OP_32, FUNCT3_ADD, FUNCT7_NORMAL, false },
203 +
    [I_SUBW] = { IFMT_R, OP_OP_32, FUNCT3_ADD, FUNCT7_SUB, false },
204 +
    [I_SLLW] = { IFMT_R, OP_OP_32, FUNCT3_SLL, FUNCT7_NORMAL, false },
205 +
    [I_SRLW] = { IFMT_R, OP_OP_32, FUNCT3_SRL, FUNCT7_NORMAL, false },
206 +
    [I_SRAW] = { IFMT_R, OP_OP_32, FUNCT3_SRL, FUNCT7_SRA, false },
207 +
    /* RV64M W-ops */
208 +
    [I_MULW]  = { IFMT_R, OP_OP_32, FUNCT3_ADD, FUNCT7_MUL, false },
209 +
    [I_DIVW]  = { IFMT_R, OP_OP_32, FUNCT3_XOR, FUNCT7_MUL, false },
210 +
    [I_DIVUW] = { IFMT_R, OP_OP_32, FUNCT3_SRL, FUNCT7_MUL, false },
211 +
    [I_REMW]  = { IFMT_R, OP_OP_32, FUNCT3_OR, FUNCT7_MUL, false },
212 +
    [I_REMUW] = { IFMT_R, OP_OP_32, FUNCT3_AND, FUNCT7_MUL, false },
213 +
};
214 +
215 +
/* Generates a RISC-V instruction based on the instruction definition */
216 +
instr_t instr(iname_t iop, reg_t rd, reg_t rs1, reg_t rs2, i32 imm) {
217 +
    const idef_t *def = &idefs[iop];
218 +
219 +
    /* Handle special cases that need specific processing. */
220 +
    if (def->special) {
221 +
        switch (iop) {
222 +
        case I_LUI:
223 +
            return instr_u(OP_LUI, rd, imm);
224 +
        case I_AUIPC:
225 +
            return instr_u(OP_AUIPC, rd, imm);
226 +
        case I_JAL:
227 +
            return instr_uj(OP_JAL, rd, imm);
228 +
        case I_SLLI:
229 +
            return instr_i(OP_IMM, FUNCT3_SLL, rd, rs1, imm & 0x3F);
230 +
        case I_SRLI:
231 +
            return instr_i(OP_IMM, FUNCT3_SRL, rd, rs1, imm & 0x3F);
232 +
        case I_SRAI:
233 +
            return instr_i(OP_IMM, FUNCT3_SRL, rd, rs1, (imm & 0x3F) + 0x400);
234 +
        case I_SLLIW:
235 +
            return instr_i(OP_IMM_32, FUNCT3_SLL, rd, rs1, imm & 0x1F);
236 +
        case I_SRLIW:
237 +
            return instr_i(OP_IMM_32, FUNCT3_SRL, rd, rs1, imm & 0x1F);
238 +
        case I_SRAIW:
239 +
            return instr_i(
240 +
                OP_IMM_32, FUNCT3_SRL, rd, rs1, (imm & 0x1F) + 0x400
241 +
            );
242 +
        case I_MV:
243 +
            return instr_i(OP_IMM, FUNCT3_ADD, rd, rs1, 0);
244 +
        case I_JMP:
245 +
            return instr_uj(OP_JAL, ZERO, imm);
246 +
        case I_NOP:
247 +
            return instr_i(OP_IMM, FUNCT3_ADD, ZERO, ZERO, 0);
248 +
        case I_NOT:
249 +
            return instr_i(OP_IMM, FUNCT3_XOR, rd, rs1, 1);
250 +
        case I_NEG:
251 +
            return instr_r(OP_OP, FUNCT3_ADD, FUNCT7_SUB, rd, ZERO, rs1);
252 +
        case I_EBREAK:
253 +
            /* EBREAK is encoded as all zeros except for the opcode */
254 +
            return instr_i(OP_SYSTEM, 0, 0, 0, 1);
255 +
        case I_ECALL:
256 +
            /* ECALL is encoded as all zeros including immediate */
257 +
            return instr_i(OP_SYSTEM, 0, 0, 0, 0);
258 +
        default:
259 +
            break;
260 +
        }
261 +
    }
262 +
263 +
    /* Regular instructions by type. */
264 +
    switch (def->type) {
265 +
    case IFMT_I:
266 +
        return instr_i(def->opcode, def->funct3, rd, rs1, imm);
267 +
    case IFMT_R:
268 +
        return instr_r(def->opcode, def->funct3, def->funct7, rd, rs1, rs2);
269 +
    case IFMT_S:
270 +
        return instr_s(def->opcode, def->funct3, rs1, rs2, imm);
271 +
    case IFMT_B:
272 +
        return instr_sb(def->opcode, def->funct3, rs1, rs2, imm);
273 +
    case IFMT_U:
274 +
        return instr_u(def->opcode, rd, imm);
275 +
    case IFMT_J:
276 +
        return instr_uj(def->opcode, rd, imm);
277 +
    default:
278 +
        abort();
279 +
    }
280 +
}
riscv.h added +396 -0
1 +
#ifndef OP_H
2 +
#define OP_H
3 +
4 +
#include "types.h"
5 +
6 +
/* Total number of registers. */
7 +
#define REGISTERS       32
8 +
/* Word size of target architecture (RISCV64). */
9 +
#define WORD_SIZE       8
10 +
/* Tag size for optional/union discriminants. */
11 +
#define TAG_SIZE        1
12 +
/* Instruction size in bytes (always 32-bit, even on RV64). */
13 +
#define INSTR_SIZE      4
14 +
/* Stack alignment requirement. */
15 +
#define STACK_ALIGNMENT 16
16 +
/* The frame pointer register is set as an alias of `S0`. */
17 +
#define FP              S0
18 +
19 +
/* Convenient macro wrappers for `instr`.
20 +
 * Some of these, such as BLE and BGT are implemented by swapping the operands
21 +
 * of other instructions. */
22 +
#define ADDI(rd, rs1, imm)   __instr(I_ADDI, rd, rs1, 0, imm)
23 +
#define SLTI(rd, rs1, imm)   __instr(I_SLTI, rd, rs1, 0, imm)
24 +
#define SLTIU(rd, rs1, imm)  __instr(I_SLTIU, rd, rs1, 0, imm)
25 +
#define XORI(rd, rs1, imm)   __instr(I_XORI, rd, rs1, 0, imm)
26 +
#define ORI(rd, rs1, imm)    __instr(I_ORI, rd, rs1, 0, imm)
27 +
#define ANDI(rd, rs1, imm)   __instr(I_ANDI, rd, rs1, 0, imm)
28 +
#define SLLI(rd, rs1, imm)   __instr(I_SLLI, rd, rs1, 0, imm)
29 +
#define SRLI(rd, rs1, imm)   __instr(I_SRLI, rd, rs1, 0, imm)
30 +
#define SRAI(rd, rs1, imm)   __instr(I_SRAI, rd, rs1, 0, imm)
31 +
#define JALR(rd, rs1, imm)   __instr(I_JALR, rd, rs1, 0, imm)
32 +
#define LB(rd, rs1, imm)     __instr(I_LB, rd, rs1, 0, imm)
33 +
#define LH(rd, rs1, imm)     __instr(I_LH, rd, rs1, 0, imm)
34 +
#define LW(rd, rs1, imm)     __instr(I_LW, rd, rs1, 0, imm)
35 +
#define LWU(rd, rs1, imm)    __instr(I_LWU, rd, rs1, 0, imm)
36 +
#define LD(rd, rs1, imm)     __instr(I_LD, rd, rs1, 0, imm)
37 +
#define LBU(rd, rs1, imm)    __instr(I_LBU, rd, rs1, 0, imm)
38 +
#define LHU(rd, rs1, imm)    __instr(I_LHU, rd, rs1, 0, imm)
39 +
#define SB(rs2, rs1, imm)    __instr(I_SB, 0, rs1, rs2, imm)
40 +
#define SH(rs2, rs1, imm)    __instr(I_SH, 0, rs1, rs2, imm)
41 +
#define SW(rs2, rs1, imm)    __instr(I_SW, 0, rs1, rs2, imm)
42 +
#define SD(rs2, rs1, imm)    __instr(I_SD, 0, rs1, rs2, imm)
43 +
#define BEQ(rs1, rs2, imm)   __instr(I_BEQ, 0, rs1, rs2, imm)
44 +
#define BNE(rs1, rs2, imm)   __instr(I_BNE, 0, rs1, rs2, imm)
45 +
#define BLT(rs1, rs2, imm)   __instr(I_BLT, 0, rs1, rs2, imm)
46 +
#define BGE(rs1, rs2, imm)   __instr(I_BGE, 0, rs1, rs2, imm)
47 +
#define BLTU(rs1, rs2, imm)  __instr(I_BLTU, 0, rs1, rs2, imm)
48 +
#define BGEU(rs1, rs2, imm)  __instr(I_BGEU, 0, rs1, rs2, imm)
49 +
#define BLE(rs1, rs2, imm)   __instr(I_BGE, 0, rs2, rs1, imm)
50 +
#define BGT(rs1, rs2, imm)   __instr(I_BLT, 0, rs2, rs1, imm)
51 +
#define ADD(rd, rs1, rs2)    __instr(I_ADD, rd, rs1, rs2, 0)
52 +
#define SUB(rd, rs1, rs2)    __instr(I_SUB, rd, rs1, rs2, 0)
53 +
#define DIV(rd, rs1, rs2)    __instr(I_DIV, rd, rs1, rs2, 0)
54 +
#define DIVU(rd, rs1, rs2)   __instr(I_DIVU, rd, rs1, rs2, 0)
55 +
#define REM(rd, rs1, rs2)    __instr(I_REM, rd, rs1, rs2, 0)
56 +
#define REMU(rd, rs1, rs2)   __instr(I_REMU, rd, rs1, rs2, 0)
57 +
#define MUL(rd, rs1, rs2)    __instr(I_MUL, rd, rs1, rs2, 0)
58 +
#define SLL(rd, rs1, rs2)    __instr(I_SLL, rd, rs1, rs2, 0)
59 +
#define SLT(rd, rs1, rs2)    __instr(I_SLT, rd, rs1, rs2, 0)
60 +
#define SLTU(rd, rs1, rs2)   __instr(I_SLTU, rd, rs1, rs2, 0)
61 +
#define XOR(rd, rs1, rs2)    __instr(I_XOR, rd, rs1, rs2, 0)
62 +
#define SRL(rd, rs1, rs2)    __instr(I_SRL, rd, rs1, rs2, 0)
63 +
#define AND(rd, rs1, rs2)    __instr(I_AND, rd, rs1, rs2, 0)
64 +
#define OR(rd, rs1, rs2)     __instr(I_OR, rd, rs1, rs2, 0)
65 +
#define LUI(rd, imm)         __instr(I_LUI, rd, 0, 0, imm)
66 +
#define AUIPC(rd, imm)       __instr(I_AUIPC, rd, 0, 0, imm)
67 +
#define JAL(rd, imm)         __instr(I_JAL, rd, 0, 0, imm)
68 +
#define JMP(imm)             __instr(I_JMP, 0, 0, 0, imm)
69 +
#define MV(rd, rs1)          __instr(I_MV, rd, rs1, 0, 0)
70 +
#define NOT(rd, rs1)         __instr(I_NOT, rd, rs1, 0, 0)
71 +
#define NEG(rd, rs1)         __instr(I_NEG, rd, rs1, 0, 0)
72 +
#define NOP                  __instr(I_NOP, 0, 0, 0, 0)
73 +
#define RET                  __instr(I_JALR, ZERO, RA, 0, 0)
74 +
#define EBREAK               __instr(I_EBREAK, 0, 0, 0, 0)
75 +
#define ECALL                __instr(I_ECALL, 0, 0, 0, 0)
76 +
/* RV64I word-width (32-bit) operations */
77 +
#define ADDIW(rd, rs1, imm)  __instr(I_ADDIW, rd, rs1, 0, imm)
78 +
#define ADDW(rd, rs1, rs2)   __instr(I_ADDW, rd, rs1, rs2, 0)
79 +
#define SUBW(rd, rs1, rs2)   __instr(I_SUBW, rd, rs1, rs2, 0)
80 +
#define MULW(rd, rs1, rs2)   __instr(I_MULW, rd, rs1, rs2, 0)
81 +
#define DIVW(rd, rs1, rs2)   __instr(I_DIVW, rd, rs1, rs2, 0)
82 +
#define DIVUW(rd, rs1, rs2)  __instr(I_DIVUW, rd, rs1, rs2, 0)
83 +
#define REMW(rd, rs1, rs2)   __instr(I_REMW, rd, rs1, rs2, 0)
84 +
#define REMUW(rd, rs1, rs2)  __instr(I_REMUW, rd, rs1, rs2, 0)
85 +
#define SLLIW(rd, rs1, imm)  __instr(I_SLLIW, rd, rs1, 0, imm)
86 +
#define SRLIW(rd, rs1, imm)  __instr(I_SRLIW, rd, rs1, 0, imm)
87 +
#define SRAIW(rd, rs1, imm)  __instr(I_SRAIW, rd, rs1, 0, imm)
88 +
#define SLLW(rd, rs1, rs2)   __instr(I_SLLW, rd, rs1, rs2, 0)
89 +
#define SRLW(rd, rs1, rs2)   __instr(I_SRLW, rd, rs1, rs2, 0)
90 +
#define SRAW(rd, rs1, rs2)   __instr(I_SRAW, rd, rs1, rs2, 0)
91 +
/* F Extension - Floating-point instructions */
92 +
#define FADD_S(rd, rs1, rs2) __instr(I_FADD_S, rd, rs1, rs2, 0)
93 +
#define FSUB_S(rd, rs1, rs2) __instr(I_FSUB_S, rd, rs1, rs2, 0)
94 +
#define FMUL_S(rd, rs1, rs2) __instr(I_FMUL_S, rd, rs1, rs2, 0)
95 +
#define FDIV_S(rd, rs1, rs2) __instr(I_FDIV_S, rd, rs1, rs2, 0)
96 +
#define FEQ_S(rd, rs1, rs2)  __instr(I_FEQ_S, rd, rs1, rs2, 0)
97 +
#define FLT_S(rd, rs1, rs2)  __instr(I_FLT_S, rd, rs1, rs2, 0)
98 +
#define FLE_S(rd, rs1, rs2)  __instr(I_FLE_S, rd, rs1, rs2, 0)
99 +
#define FLW(rd, rs1, imm)    __instr(I_FLW, rd, rs1, 0, imm)
100 +
#define FSW(rs2, rs1, imm)   __instr(I_FSW, 0, rs1, rs2, imm)
101 +
102 +
/* String representations of register names. */
103 +
extern const char *reg_names[];
104 +
105 +
/* Boolean map of caller-saved registers.
106 +
 * True for registers that need to be saved by the caller
107 +
 * before a function call. */
108 +
extern const bool caller_saved_registers[REGISTERS];
109 +
110 +
/* RISC-V register names. */
111 +
typedef enum {
112 +
    ZERO = 0,  /* Hard-wired zero */
113 +
    RA   = 1,  /* Return address */
114 +
    SP   = 2,  /* Stack pointer */
115 +
    GP   = 3,  /* Global pointer */
116 +
    TP   = 4,  /* Thread pointer */
117 +
    T0   = 5,  /* Temporary/alternate link register */
118 +
    T1   = 6,  /* Temporary */
119 +
    T2   = 7,  /* Temporary */
120 +
    S0   = 8,  /* Saved register/frame pointer */
121 +
    S1   = 9,  /* Saved register */
122 +
    A0   = 10, /* Function arguments/returns */
123 +
    A1   = 11,
124 +
    A2   = 12, /* Function arguments */
125 +
    A3   = 13,
126 +
    A4   = 14,
127 +
    A5   = 15,
128 +
    A6   = 16,
129 +
    A7   = 17,
130 +
    S2   = 18, /* Saved registers */
131 +
    S3   = 19,
132 +
    S4   = 20,
133 +
    S5   = 21,
134 +
    S6   = 22,
135 +
    S7   = 23,
136 +
    S8   = 24,
137 +
    S9   = 25,
138 +
    S10  = 26,
139 +
    S11  = 27,
140 +
    T3   = 28, /* Temporaries */
141 +
    T4   = 29,
142 +
    T5   = 30,
143 +
    T6   = 31
144 +
} reg_t;
145 +
146 +
/* Temporary registers (T1-T6) */
147 +
extern const reg_t temp_registers[6];
148 +
149 +
/* Opcodes for RISC-V base instruction set */
150 +
typedef enum {
151 +
    OP_LOAD   = 0x03,
152 +
    OP_STORE  = 0x23,
153 +
    OP_BRANCH = 0x63,
154 +
    OP_JALR   = 0x67,
155 +
    OP_JAL    = 0x6F,
156 +
    OP_OP     = 0x33,
157 +
    OP_IMM    = 0x13,
158 +
    OP_AUIPC  = 0x17,
159 +
    OP_IMM_32 = 0x1B, /* RV64I: ADDIW, SLLIW, SRLIW, SRAIW */
160 +
    OP_OP_32 = 0x3B, /* RV64I: ADDW, SUBW, SLLW, SRLW, SRAW, MULW, DIVW, REMW */
161 +
    OP_LUI   = 0x37,
162 +
    OP_SYSTEM = 0x73,
163 +
    OP_FENCE  = 0x0F,
164 +
    /* F Extension opcodes */
165 +
    OP_LOAD_FP  = 0x07,
166 +
    OP_STORE_FP = 0x27,
167 +
    OP_OP_FP    = 0x53
168 +
} opcode_t;
169 +
170 +
/* Function3 values */
171 +
typedef enum {
172 +
    /* Memory operations */
173 +
    FUNCT3_BYTE   = 0x0, /* LB/SB - Load/Store Byte */
174 +
    FUNCT3_HALF   = 0x1, /* LH/SH - Load/Store Halfword */
175 +
    FUNCT3_WORD   = 0x2, /* LW/SW - Load/Store Word */
176 +
    FUNCT3_DOUBLE = 0x3, /* LD/SD - Load/Store Doubleword */
177 +
    FUNCT3_BYTE_U = 0x4, /* LBU - Load Byte Unsigned */
178 +
    FUNCT3_HALF_U = 0x5, /* LHU - Load Halfword Unsigned */
179 +
    FUNCT3_WORD_U = 0x6, /* LWU - Load Word Unsigned */
180 +
181 +
    /* ALU operations */
182 +
    FUNCT3_ADD  = 0x0, /* ADD/SUB/ADDI */
183 +
    FUNCT3_SLL  = 0x1, /* SLL/SLLI */
184 +
    FUNCT3_SLT  = 0x2, /* SLT/SLTI */
185 +
    FUNCT3_SLTU = 0x3, /* SLTU/SLTIU */
186 +
    FUNCT3_XOR  = 0x4, /* XOR/XORI */
187 +
    FUNCT3_SRL  = 0x5, /* SRL/SRA/SRLI/SRAI */
188 +
    FUNCT3_OR   = 0x6, /* OR/ORI */
189 +
    FUNCT3_AND  = 0x7, /* AND/ANDI */
190 +
    /* F Extension function3 codes */
191 +
    FUNCT3_WORD_FP = 0x2, /* FLW/FSW - Load/Store Single */
192 +
    FUNCT3_FEQ     = 0x2, /* FEQ.S */
193 +
    FUNCT3_FLT     = 0x1, /* FLT.S */
194 +
    FUNCT3_FLE     = 0x0  /* FLE.S */
195 +
} funct3_t;
196 +
197 +
/* Function7 values */
198 +
typedef enum {
199 +
    FUNCT7_NORMAL = 0x00,
200 +
    FUNCT7_SUB    = 0x20,
201 +
    FUNCT7_SRA    = 0x20,
202 +
    FUNCT7_MUL    = 0x01,
203 +
    /* F Extension function codes */
204 +
    FUNCT7_FADD_S = 0x00,
205 +
    FUNCT7_FSUB_S = 0x04,
206 +
    FUNCT7_FMUL_S = 0x08,
207 +
    FUNCT7_FDIV_S = 0x0C,
208 +
    FUNCT7_FEQ_S  = 0x50,
209 +
    FUNCT7_FLT_S  = 0x50,
210 +
    FUNCT7_FLE_S  = 0x50
211 +
} funct7_t;
212 +
213 +
/* Represents a RISC-V instruction in its various formats */
214 +
typedef union {
215 +
    struct {
216 +
        u32 opcode : 7;
217 +
        u32 rd     : 5;
218 +
        u32 funct3 : 3;
219 +
        u32 rs1    : 5;
220 +
        u32 rs2    : 5;
221 +
        u32 funct7 : 7;
222 +
    } r; /* Register format */
223 +
224 +
    struct {
225 +
        u32 opcode   : 7;
226 +
        u32 rd       : 5;
227 +
        u32 funct3   : 3;
228 +
        u32 rs1      : 5;
229 +
        u32 imm_11_0 : 12;
230 +
    } i; /* Immediate format */
231 +
232 +
    struct {
233 +
        u32 opcode   : 7;
234 +
        u32 imm_4_0  : 5;
235 +
        u32 funct3   : 3;
236 +
        u32 rs1      : 5;
237 +
        u32 rs2      : 5;
238 +
        u32 imm_11_5 : 7;
239 +
    } s; /* Store format */
240 +
241 +
    struct {
242 +
        u32 opcode   : 7;
243 +
        u32 imm_11   : 1;
244 +
        u32 imm_4_1  : 4;
245 +
        u32 funct3   : 3;
246 +
        u32 rs1      : 5;
247 +
        u32 rs2      : 5;
248 +
        u32 imm_10_5 : 6;
249 +
        u32 imm_12   : 1;
250 +
    } b; /* Branch format */
251 +
252 +
    struct {
253 +
        u32 opcode    : 7;
254 +
        u32 rd        : 5;
255 +
        u32 imm_31_12 : 20;
256 +
    } u; /* Upper immediate format */
257 +
258 +
    struct {
259 +
        u32 opcode    : 7;
260 +
        u32 rd        : 5;
261 +
        u32 imm_19_12 : 8;
262 +
        u32 imm_11    : 1;
263 +
        u32 imm_10_1  : 10;
264 +
        u32 imm_20    : 1;
265 +
    } j; /* Jump format */
266 +
267 +
    u32 raw; /* Raw 32-bit instruction */
268 +
} instr_t;
269 +
270 +
/* Instruction type. */
271 +
typedef enum {
272 +
    IFMT_I, /* I-type (immediate) */
273 +
    IFMT_R, /* R-type (register) */
274 +
    IFMT_S, /* S-type (store) */
275 +
    IFMT_B, /* B-type (branch) */
276 +
    IFMT_U, /* U-type (upper immediate) */
277 +
    IFMT_J, /* J-type (jump) */
278 +
} ifmt_t;
279 +
280 +
/* RISC-V instruction name. */
281 +
typedef enum {
282 +
    I_LUI,
283 +
    I_AUIPC,
284 +
    I_JAL,
285 +
    I_JALR,
286 +
    I_BEQ,
287 +
    I_BNE,
288 +
    I_BLT,
289 +
    I_BGE,
290 +
    I_BLTU,
291 +
    I_BGEU,
292 +
    I_LB,
293 +
    I_LH,
294 +
    I_LW,
295 +
    I_LBU,
296 +
    I_LHU,
297 +
    I_SB,
298 +
    I_SH,
299 +
    I_SW,
300 +
    I_ADDI,
301 +
    I_SLTI,
302 +
    I_SLTIU,
303 +
    I_XORI,
304 +
    I_ORI,
305 +
    I_ANDI,
306 +
    I_SLLI,
307 +
    I_SRLI,
308 +
    I_SRAI,
309 +
    I_ADD,
310 +
    I_SUB,
311 +
    I_SLL,
312 +
    I_SLT,
313 +
    I_SLTU,
314 +
    I_XOR,
315 +
    I_SRL,
316 +
    I_SRA,
317 +
    I_OR,
318 +
    I_AND,
319 +
    I_MUL,
320 +
    I_MULH,
321 +
    I_MULHSU,
322 +
    I_MULHU,
323 +
    I_DIV,
324 +
    I_DIVU,
325 +
    I_REM,
326 +
    I_REMU,
327 +
    I_MV,
328 +
    I_JMP,
329 +
    I_NOP,
330 +
    I_NOT,
331 +
    I_NEG,
332 +
    I_EBREAK,
333 +
    I_ECALL,
334 +
    /* F Extension - Floating-point instructions */
335 +
    I_FADD_S,
336 +
    I_FSUB_S,
337 +
    I_FMUL_S,
338 +
    I_FDIV_S,
339 +
    I_FEQ_S,
340 +
    I_FLT_S,
341 +
    I_FLE_S,
342 +
    I_FLW,
343 +
    I_FSW,
344 +
    /* RV64I extensions */
345 +
    I_LWU,
346 +
    I_LD,
347 +
    I_SD,
348 +
    I_ADDIW,
349 +
    I_SLLIW,
350 +
    I_SRLIW,
351 +
    I_SRAIW,
352 +
    I_ADDW,
353 +
    I_SUBW,
354 +
    I_SLLW,
355 +
    I_SRLW,
356 +
    I_SRAW,
357 +
    I_MULW,
358 +
    I_DIVW,
359 +
    I_DIVUW,
360 +
    I_REMW,
361 +
    I_REMUW
362 +
} iname_t;
363 +
364 +
/* Returns a RISC-V instruction based on the instruction type. */
365 +
instr_t instr(iname_t op, reg_t rd, reg_t rs1, reg_t rs2, i32 imm);
366 +
367 +
static inline instr_t __instr(
368 +
    iname_t op, reg_t rd, reg_t rs1, reg_t rs2, i32 imm
369 +
) {
370 +
    return instr(op, rd, rs1, rs2, imm);
371 +
}
372 +
373 +
/* Return true when a signed 12-bit immediate can encode `value`. */
374 +
static inline bool is_small(i32 value) {
375 +
    return value >= -2048 && value <= 2047;
376 +
}
377 +
378 +
static inline bool is_branch_imm(i32 value) {
379 +
    return value >= -(1 << 12) && value <= ((1 << 12) - 2) && !(value & 1);
380 +
}
381 +
382 +
static inline bool is_jump_imm(i32 value) {
383 +
    return value >= -(1 << 20) && value <= ((1 << 20) - 2) && !(value & 1);
384 +
}
385 +
386 +
/* Helper function to sign-extend a value. */
387 +
i32 sign_extend(u32 value, int bit_width);
388 +
/* Aligns a size to the specified alignment boundary. */
389 +
i32 align(i32 size, i32 alignment);
390 +
/* Functions to get immediates out of instruction. */
391 +
i32 get_i_imm(instr_t instr);
392 +
i32 get_s_imm(instr_t instr);
393 +
i32 get_b_imm(instr_t instr);
394 +
i32 get_j_imm(instr_t instr);
395 +
396 +
#endif
riscv/debug.c added +723 -0
1 +
#include <stdarg.h>
2 +
#include <stdio.h>
3 +
#include <string.h>
4 +
#include <unistd.h>
5 +
6 +
#include "../color.h"
7 +
#include "../riscv.h"
8 +
9 +
#include "debug.h"
10 +
11 +
/* Width for opcode field alignment. */
12 +
#define OP_WIDTH 6
13 +
14 +
/* Load instructions based on funct3. */
15 +
static const char *load_op_names[] = {
16 +
    "lb",  /* FUNCT3_BYTE */
17 +
    "lh",  /* FUNCT3_HALF */
18 +
    "lw",  /* FUNCT3_WORD */
19 +
    "ld",  /* FUNCT3_DOUBLE (RV64) */
20 +
    "lbu", /* FUNCT3_BYTE_U */
21 +
    "lhu", /* FUNCT3_HALF_U */
22 +
    "lwu", /* FUNCT3_WORD_U */
23 +
    "?"    /* invalid */
24 +
};
25 +
26 +
/* Store instructions based on funct3. */
27 +
static const char *store_op_names[] = {
28 +
    "sb", /* FUNCT3_BYTE */
29 +
    "sh", /* FUNCT3_HALF */
30 +
    "sw", /* FUNCT3_WORD */
31 +
    "sd", /* FUNCT3_DOUBLE (RV64) */
32 +
    "?",  /* invalid */
33 +
    "?",  /* invalid */
34 +
    "?",  /* invalid */
35 +
    "?"   /* invalid */
36 +
};
37 +
38 +
/* Branch instructions based on funct3. */
39 +
static const char *branch_op_names[] = {
40 +
    "beq",  /* FUNCT3_BYTE */
41 +
    "bne",  /* FUNCT3_HALF */
42 +
    "?",    /* invalid */
43 +
    "?",    /* invalid */
44 +
    "blt",  /* FUNCT3_BYTE_U */
45 +
    "bge",  /* FUNCT3_HALF_U */
46 +
    "bltu", /* FUNCT3_OR */
47 +
    "bgeu"  /* FUNCT3_AND */
48 +
};
49 +
50 +
/* ALU operations with immediates based on funct3. */
51 +
static const char *alu_imm_op_names[] = {
52 +
    "addi",  /* FUNCT3_ADD */
53 +
    "slli",  /* FUNCT3_SLL */
54 +
    "slti",  /* FUNCT3_SLT */
55 +
    "sltiu", /* FUNCT3_SLTU */
56 +
    "xori",  /* FUNCT3_XOR */
57 +
    "srli",  /* FUNCT3_SRL (may also be SRAI) */
58 +
    "ori",   /* FUNCT3_OR */
59 +
    "andi"   /* FUNCT3_AND */
60 +
};
61 +
62 +
/* ALU operations (register-register) based on funct3 and funct7.
63 +
 * Index as `[funct7][funct3]` */
64 +
static const char *alu_op_names[4][8] = {
65 +
    /* FUNCT7_NORMAL (0x00) */
66 +
    {
67 +
        "add",  /* FUNCT3_ADD */
68 +
        "sll",  /* FUNCT3_SLL */
69 +
        "slt",  /* FUNCT3_SLT */
70 +
        "sltu", /* FUNCT3_SLTU */
71 +
        "xor",  /* FUNCT3_XOR */
72 +
        "srl",  /* FUNCT3_SRL */
73 +
        "or",   /* FUNCT3_OR */
74 +
        "and"   /* FUNCT3_AND */
75 +
    },
76 +
    /* FUNCT7_SUB (0x20) - Subset of instructions that use this funct7 */
77 +
    {
78 +
        "sub", /* FUNCT3_ADD */
79 +
        "?",   /* invalid */
80 +
        "?",   /* invalid */
81 +
        "?",   /* invalid */
82 +
        "?",   /* invalid */
83 +
        "sra", /* FUNCT3_SRL */
84 +
        "?",   /* invalid */
85 +
        "?"    /* invalid */
86 +
    },
87 +
    /* Not used, placeholder for index 2 */
88 +
    { "?", "?", "?", "?", "?", "?", "?", "?" },
89 +
    /* FUNCT7_MUL (0x01) - M-extension instructions */
90 +
    {
91 +
        "mul",    /* FUNCT3_ADD */
92 +
        "mulh",   /* FUNCT3_SLL */
93 +
        "mulhsu", /* FUNCT3_SLT */
94 +
        "mulhu",  /* FUNCT3_SLTU */
95 +
        "div",    /* FUNCT3_XOR */
96 +
        "divu",   /* FUNCT3_SRL */
97 +
        "rem",    /* FUNCT3_OR */
98 +
        "remu"    /* FUNCT3_AND */
99 +
    }
100 +
};
101 +
102 +
/* Print context. */
103 +
struct ctx {
104 +
    bool  color;
105 +
    i32   length;
106 +
    char *cursor;
107 +
};
108 +
109 +
/* Initialize an empty instruction components structure */
110 +
static struct instrparts parts(char type) {
111 +
    struct instrparts components;
112 +
113 +
    components.op[0]     = '\0';
114 +
    components.rd[0]     = '\0';
115 +
    components.rs1[0]    = '\0';
116 +
    components.rs2[0]    = '\0';
117 +
    components.imm[0]    = '\0';
118 +
    components.off[0]    = '\0';
119 +
    components.base[0]   = '\0';
120 +
    components.type      = type;
121 +
    components.is_pseudo = false;
122 +
123 +
    return components;
124 +
}
125 +
126 +
/* Get immediate value from I-type instruction. */
127 +
i32 get_i_imm(instr_t instr) {
128 +
    /* Sign-extend 12-bit immediate to 32-bit. */
129 +
    return sign_extend(instr.i.imm_11_0, 12);
130 +
}
131 +
132 +
/* Get immediate value from S-type instruction. */
133 +
i32 get_s_imm(instr_t instr) {
134 +
    /* Sign-extend 12-bit immediate. */
135 +
    u32 imm = (u32)((instr.s.imm_11_5 << 5) | instr.s.imm_4_0);
136 +
137 +
    return sign_extend(imm, 12);
138 +
}
139 +
140 +
/* Get immediate value from B-type instruction. */
141 +
i32 get_b_imm(instr_t instr) {
142 +
    u32 imm = (u32)((instr.b.imm_12 << 12) | (instr.b.imm_11 << 11) |
143 +
                    (instr.b.imm_10_5 << 5) | (instr.b.imm_4_1 << 1));
144 +
145 +
    return sign_extend(imm, 13);
146 +
}
147 +
148 +
/* Get immediate value from J-type instruction. */
149 +
i32 get_j_imm(instr_t instr) {
150 +
    /* The sign bit is already in bit 20 (imm_20). */
151 +
    u32 imm = (u32)((instr.j.imm_20 << 20) | (instr.j.imm_19_12 << 12) |
152 +
                    (instr.j.imm_11 << 11) | (instr.j.imm_10_1 << 1));
153 +
154 +
    return sign_extend(imm, 21);
155 +
}
156 +
157 +
/* Map opcodes to decode function and get instruction components. */
158 +
struct instrparts instr_decode(instr_t instr) {
159 +
    struct instrparts comp;
160 +
    i32               imm;
161 +
    int               funct7_idx;
162 +
163 +
    /* Handle `nop` (`addi x0, x0, 0`). */
164 +
    if (instr.raw == 0x00000013) {
165 +
        comp = parts('?');
166 +
        strncpy(comp.op, "nop", MAX_PART_LEN);
167 +
        comp.is_pseudo = true;
168 +
        return comp;
169 +
    }
170 +
171 +
    /* Decode based on opcode */
172 +
    switch (instr.i.opcode) {
173 +
    case OP_OP:
174 +
        /* R-type instruction (ADD, SUB, etc.) */
175 +
        comp = parts('R');
176 +
177 +
        /* Compute lookup index for funct7 */
178 +
        switch (instr.r.funct7) {
179 +
        case FUNCT7_NORMAL:
180 +
            funct7_idx = 0;
181 +
            break;
182 +
        case FUNCT7_SUB:
183 +
            funct7_idx = 1;
184 +
            break;
185 +
        case FUNCT7_MUL:
186 +
            funct7_idx = 3;
187 +
            break;
188 +
        default:
189 +
            funct7_idx = 0;
190 +
            break;
191 +
        }
192 +
        strncpy(
193 +
            comp.op, alu_op_names[funct7_idx][instr.r.funct3], MAX_PART_LEN
194 +
        );
195 +
        strncpy(comp.rd, reg_names[instr.r.rd], MAX_PART_LEN);
196 +
        strncpy(comp.rs1, reg_names[instr.r.rs1], MAX_PART_LEN);
197 +
        strncpy(comp.rs2, reg_names[instr.r.rs2], MAX_PART_LEN);
198 +
        break;
199 +
200 +
    case OP_IMM:
201 +
        /* I-type ALU instruction (ADDI, SLTI, etc.) */
202 +
        comp = parts('I');
203 +
        imm  = get_i_imm(instr);
204 +
205 +
        /* Special case for `mv` pseudo-instruction */
206 +
        if (instr.i.funct3 == FUNCT3_ADD && instr.i.imm_11_0 == 0 &&
207 +
            instr.i.rs1 != 0) {
208 +
            strncpy(comp.op, "mv", MAX_PART_LEN);
209 +
            strncpy(comp.rd, reg_names[instr.i.rd], MAX_PART_LEN);
210 +
            strncpy(comp.rs1, reg_names[instr.i.rs1], MAX_PART_LEN);
211 +
            comp.is_pseudo = true;
212 +
            return comp;
213 +
        }
214 +
215 +
        /* Special case for `li` pseudo-instruction */
216 +
        if (instr.i.funct3 == FUNCT3_ADD && instr.i.rs1 == 0) {
217 +
            strncpy(comp.op, "li", MAX_PART_LEN);
218 +
            strncpy(comp.rd, reg_names[instr.i.rd], MAX_PART_LEN);
219 +
            snprintf(comp.imm, MAX_PART_LEN, "%d", imm);
220 +
            comp.is_pseudo = true;
221 +
            return comp;
222 +
        }
223 +
224 +
        const char *imm_op = alu_imm_op_names[instr.i.funct3];
225 +
226 +
        /* Special case for right shifts (SRAI vs SRLI) */
227 +
        if (instr.i.funct3 == FUNCT3_SRL && (instr.i.imm_11_0 & 0x400)) {
228 +
            imm_op = "srai";
229 +
        }
230 +
231 +
        strncpy(comp.op, imm_op, MAX_PART_LEN);
232 +
        strncpy(comp.rd, reg_names[instr.i.rd], MAX_PART_LEN);
233 +
        strncpy(comp.rs1, reg_names[instr.i.rs1], MAX_PART_LEN);
234 +
        snprintf(comp.imm, MAX_PART_LEN, "%d", imm);
235 +
        break;
236 +
237 +
    case OP_IMM_32: {
238 +
        /* RV64I: 32-bit immediate operations (ADDIW, SLLIW, SRLIW, SRAIW) */
239 +
        comp = parts('I');
240 +
        imm  = get_i_imm(instr);
241 +
242 +
        /* Special case: sext.w pseudo-instruction (addiw rd, rs1, 0) */
243 +
        if (instr.i.funct3 == FUNCT3_ADD && instr.i.imm_11_0 == 0) {
244 +
            strncpy(comp.op, "sext.w", MAX_PART_LEN);
245 +
            strncpy(comp.rd, reg_names[instr.i.rd], MAX_PART_LEN);
246 +
            strncpy(comp.rs1, reg_names[instr.i.rs1], MAX_PART_LEN);
247 +
            comp.is_pseudo = true;
248 +
            return comp;
249 +
        }
250 +
251 +
        const char *w_op;
252 +
        switch (instr.i.funct3) {
253 +
        case FUNCT3_ADD:
254 +
            w_op = "addiw";
255 +
            break;
256 +
        case FUNCT3_SLL:
257 +
            w_op = "slliw";
258 +
            break;
259 +
        case FUNCT3_SRL:
260 +
            w_op = (instr.i.imm_11_0 & 0x400) ? "sraiw" : "srliw";
261 +
            break;
262 +
        default:
263 +
            w_op = "?w";
264 +
            break;
265 +
        }
266 +
267 +
        strncpy(comp.op, w_op, MAX_PART_LEN);
268 +
        strncpy(comp.rd, reg_names[instr.i.rd], MAX_PART_LEN);
269 +
        strncpy(comp.rs1, reg_names[instr.i.rs1], MAX_PART_LEN);
270 +
        snprintf(comp.imm, MAX_PART_LEN, "%d", imm);
271 +
        break;
272 +
    }
273 +
274 +
    case OP_OP_32: {
275 +
        /* RV64I: 32-bit register-register operations */
276 +
        comp = parts('R');
277 +
        const char *w_rop;
278 +
        switch (instr.r.funct7) {
279 +
        case FUNCT7_NORMAL:
280 +
            switch (instr.r.funct3) {
281 +
            case FUNCT3_ADD:
282 +
                w_rop = "addw";
283 +
                break;
284 +
            case FUNCT3_SLL:
285 +
                w_rop = "sllw";
286 +
                break;
287 +
            case FUNCT3_SRL:
288 +
                w_rop = "srlw";
289 +
                break;
290 +
            default:
291 +
                w_rop = "?w";
292 +
                break;
293 +
            }
294 +
            break;
295 +
        case FUNCT7_SUB:
296 +
            switch (instr.r.funct3) {
297 +
            case FUNCT3_ADD:
298 +
                w_rop = "subw";
299 +
                break;
300 +
            case FUNCT3_SRL:
301 +
                w_rop = "sraw";
302 +
                break;
303 +
            default:
304 +
                w_rop = "?w";
305 +
                break;
306 +
            }
307 +
            break;
308 +
        case FUNCT7_MUL:
309 +
            switch (instr.r.funct3) {
310 +
            case FUNCT3_ADD:
311 +
                w_rop = "mulw";
312 +
                break;
313 +
            case FUNCT3_XOR:
314 +
                w_rop = "divw";
315 +
                break;
316 +
            case FUNCT3_SRL:
317 +
                w_rop = "divuw";
318 +
                break;
319 +
            case FUNCT3_OR:
320 +
                w_rop = "remw";
321 +
                break;
322 +
            case FUNCT3_AND:
323 +
                w_rop = "remuw";
324 +
                break;
325 +
            default:
326 +
                w_rop = "?w";
327 +
                break;
328 +
            }
329 +
            break;
330 +
        default:
331 +
            w_rop = "?w";
332 +
            break;
333 +
        }
334 +
        strncpy(comp.op, w_rop, MAX_PART_LEN);
335 +
        strncpy(comp.rd, reg_names[instr.r.rd], MAX_PART_LEN);
336 +
        strncpy(comp.rs1, reg_names[instr.r.rs1], MAX_PART_LEN);
337 +
        strncpy(comp.rs2, reg_names[instr.r.rs2], MAX_PART_LEN);
338 +
        break;
339 +
    }
340 +
341 +
    case OP_LOAD:
342 +
        /* I-type Load instruction (LW, LH, etc.) */
343 +
        comp = parts('I');
344 +
        imm  = get_i_imm(instr);
345 +
346 +
        strncpy(comp.op, load_op_names[instr.i.funct3], MAX_PART_LEN);
347 +
        strncpy(comp.rd, reg_names[instr.i.rd], MAX_PART_LEN);
348 +
        snprintf(comp.off, MAX_PART_LEN, "%d", imm);
349 +
        strncpy(comp.base, reg_names[instr.i.rs1], MAX_PART_LEN);
350 +
        break;
351 +
352 +
    case OP_JALR:
353 +
        /* I-type JALR instruction */
354 +
        comp = parts('I');
355 +
        imm  = get_i_imm(instr);
356 +
357 +
        /* Special case for `ret` pseudo-instruction */
358 +
        if (instr.i.rd == 0 && instr.i.rs1 == 1 && instr.i.imm_11_0 == 0) {
359 +
            strncpy(comp.op, "ret", MAX_PART_LEN);
360 +
            comp.is_pseudo = true;
361 +
            return comp;
362 +
        }
363 +
364 +
        strncpy(comp.op, "jalr", MAX_PART_LEN);
365 +
        strncpy(comp.rd, reg_names[instr.i.rd], MAX_PART_LEN);
366 +
        snprintf(comp.off, MAX_PART_LEN, "%d", imm);
367 +
        strncpy(comp.base, reg_names[instr.i.rs1], MAX_PART_LEN);
368 +
        break;
369 +
370 +
    case OP_STORE:
371 +
        /* S-type instruction (SW, SH, etc.) */
372 +
        comp = parts('S');
373 +
        imm  = get_s_imm(instr);
374 +
375 +
        strncpy(comp.op, store_op_names[instr.s.funct3], MAX_PART_LEN);
376 +
        strncpy(comp.rs2, reg_names[instr.s.rs2], MAX_PART_LEN);
377 +
        snprintf(comp.off, MAX_PART_LEN, "%d", imm);
378 +
        strncpy(comp.base, reg_names[instr.s.rs1], MAX_PART_LEN);
379 +
        break;
380 +
381 +
    case OP_BRANCH:
382 +
        /* B-type instruction (BEQ, BNE, etc.) */
383 +
        comp = parts('B');
384 +
        imm  = get_b_imm(instr);
385 +
386 +
        strncpy(comp.op, branch_op_names[instr.b.funct3], MAX_PART_LEN);
387 +
        strncpy(comp.rs1, reg_names[instr.b.rs1], MAX_PART_LEN);
388 +
        strncpy(comp.rs2, reg_names[instr.b.rs2], MAX_PART_LEN);
389 +
        snprintf(comp.imm, MAX_PART_LEN, "%d", imm);
390 +
        break;
391 +
392 +
    case OP_LUI:
393 +
    case OP_AUIPC:
394 +
        /* U-type instruction (LUI, AUIPC) */
395 +
        comp = parts('U');
396 +
397 +
        /* Choose operation name based on opcode */
398 +
        strncpy(
399 +
            comp.op, (instr.u.opcode == OP_LUI) ? "lui" : "auipc", MAX_PART_LEN
400 +
        );
401 +
        strncpy(comp.rd, reg_names[instr.u.rd], MAX_PART_LEN);
402 +
        snprintf(comp.imm, MAX_PART_LEN, "0x%x", instr.u.imm_31_12);
403 +
        break;
404 +
405 +
    case OP_JAL:
406 +
        /* J-type instruction (JAL) */
407 +
        comp = parts('J');
408 +
        imm  = get_j_imm(instr);
409 +
410 +
        /* J pseudo-instruction (JAL with rd=x0) */
411 +
        if (instr.j.rd == 0) {
412 +
            strncpy(comp.op, "j", MAX_PART_LEN);
413 +
            snprintf(comp.imm, MAX_PART_LEN, "%d", imm);
414 +
            comp.is_pseudo = true;
415 +
        } else {
416 +
            strncpy(comp.op, "jal", MAX_PART_LEN);
417 +
            strncpy(comp.rd, reg_names[instr.j.rd], MAX_PART_LEN);
418 +
            snprintf(comp.imm, MAX_PART_LEN, "%d", imm);
419 +
        }
420 +
        break;
421 +
422 +
    case OP_SYSTEM:
423 +
        /* System instructions */
424 +
        if (instr.i.imm_11_0 == 0) {
425 +
            /* ECALL instruction */
426 +
            comp = parts('I');
427 +
            strncpy(comp.op, "ecall", MAX_PART_LEN);
428 +
            return comp;
429 +
        } else if (instr.i.imm_11_0 == 1) {
430 +
            /* EBREAK instruction */
431 +
            comp = parts('I');
432 +
            strncpy(comp.op, "ebreak", MAX_PART_LEN);
433 +
            return comp;
434 +
        }
435 +
        /* Fall through for unknown system instructions */
436 +
437 +
        /* fall through */
438 +
    case OP_LOAD_FP:
439 +
        /* F Extension - Floating-point load instruction */
440 +
        comp = parts('I');
441 +
        imm  = get_i_imm(instr);
442 +
443 +
        if (instr.i.funct3 == FUNCT3_WORD_FP) {
444 +
            strncpy(comp.op, "flw", MAX_PART_LEN);
445 +
        } else {
446 +
            strncpy(
447 +
                comp.op, "flw", MAX_PART_LEN
448 +
            ); /* Only single precision supported */
449 +
        }
450 +
        strncpy(comp.rd, reg_names[instr.i.rd], MAX_PART_LEN);
451 +
        snprintf(comp.off, MAX_PART_LEN, "%d", imm);
452 +
        strncpy(comp.base, reg_names[instr.i.rs1], MAX_PART_LEN);
453 +
        break;
454 +
455 +
    case OP_STORE_FP:
456 +
        /* F Extension - Floating-point store instruction */
457 +
        comp = parts('S');
458 +
        imm  = get_s_imm(instr);
459 +
460 +
        if (instr.s.funct3 == FUNCT3_WORD_FP) {
461 +
            strncpy(comp.op, "fsw", MAX_PART_LEN);
462 +
        } else {
463 +
            strncpy(
464 +
                comp.op, "fsw", MAX_PART_LEN
465 +
            ); /* Only single precision supported */
466 +
        }
467 +
        strncpy(comp.rs2, reg_names[instr.s.rs2], MAX_PART_LEN);
468 +
        snprintf(comp.off, MAX_PART_LEN, "%d", imm);
469 +
        strncpy(comp.base, reg_names[instr.s.rs1], MAX_PART_LEN);
470 +
        break;
471 +
472 +
    case OP_OP_FP:
473 +
        /* F Extension - Floating-point operation instruction */
474 +
        comp = parts('R');
475 +
476 +
        switch (instr.r.funct7) {
477 +
        case FUNCT7_FADD_S:
478 +
            strncpy(comp.op, "fadd.s", MAX_PART_LEN);
479 +
            break;
480 +
        case FUNCT7_FSUB_S:
481 +
            strncpy(comp.op, "fsub.s", MAX_PART_LEN);
482 +
            break;
483 +
        case FUNCT7_FMUL_S:
484 +
            strncpy(comp.op, "fmul.s", MAX_PART_LEN);
485 +
            break;
486 +
        case FUNCT7_FDIV_S:
487 +
            strncpy(comp.op, "fdiv.s", MAX_PART_LEN);
488 +
            break;
489 +
        case FUNCT7_FEQ_S:
490 +
            /* Comparison instructions use different funct3 values */
491 +
            switch (instr.r.funct3) {
492 +
            case FUNCT3_FEQ:
493 +
                strncpy(comp.op, "feq.s", MAX_PART_LEN);
494 +
                break;
495 +
            case FUNCT3_FLT:
496 +
                strncpy(comp.op, "flt.s", MAX_PART_LEN);
497 +
                break;
498 +
            case FUNCT3_FLE:
499 +
                strncpy(comp.op, "fle.s", MAX_PART_LEN);
500 +
                break;
501 +
            default:
502 +
                strncpy(comp.op, "fcmp.s", MAX_PART_LEN);
503 +
                break;
504 +
            }
505 +
            break;
506 +
        default:
507 +
            strncpy(comp.op, "fop.s", MAX_PART_LEN);
508 +
            break;
509 +
        }
510 +
        strncpy(comp.rd, reg_names[instr.r.rd], MAX_PART_LEN);
511 +
        strncpy(comp.rs1, reg_names[instr.r.rs1], MAX_PART_LEN);
512 +
        strncpy(comp.rs2, reg_names[instr.r.rs2], MAX_PART_LEN);
513 +
        break;
514 +
515 +
    default:
516 +
        /* Unknown opcode */
517 +
        comp = parts('?');
518 +
        snprintf(comp.op, MAX_PART_LEN, "unknown (%d)", instr.i.opcode);
519 +
        break;
520 +
    }
521 +
    return comp;
522 +
}
523 +
524 +
/* Helper function to format colored text with automatic reset.
525 +
 * When color is COLOR_NORMAL, outputs plain text with no color codes */
526 +
static void put(struct ctx *ctx, const char *color, const char *str) {
527 +
    if (color && ctx->color) {
528 +
        ctx->cursor += sprintf(ctx->cursor, "%s", color);
529 +
    }
530 +
    i32 len = sprintf(ctx->cursor, "%s", str);
531 +
532 +
    ctx->cursor += len;
533 +
    ctx->length += len;
534 +
535 +
    if (color && ctx->color) {
536 +
        ctx->cursor += sprintf(ctx->cursor, "%s", COLOR_RESET);
537 +
    }
538 +
}
539 +
540 +
/* Format instruction components to a string.
541 +
 * If use_color is true, the output will include ANSI color codes. */
542 +
static void format_instr(const struct instrparts *i, struct ctx *ctx) {
543 +
    /* Define colors based on whether color is enabled */
544 +
    const char *opcolor  = COLOR_BOLD;
545 +
    const char *rdcolor  = COLOR_GREEN;
546 +
    const char *rscolor  = COLOR_GREEN;
547 +
    const char *immcolor = COLOR_BLUE;
548 +
    const char *nocolor  = COLOR_NORMAL;
549 +
550 +
    put(ctx, opcolor, i->op);
551 +
552 +
    /* Check if this is an op without operands. */
553 +
    if (i->is_pseudo &&
554 +
        (strcmp(i->op, "ret") == 0 || strcmp(i->op, "nop") == 0)) {
555 +
        return;
556 +
    }
557 +
    if (strcmp(i->op, "ebreak") == 0 || strcmp(i->op, "ecall") == 0)
558 +
        return;
559 +
560 +
    /* Padding for ops with operands */
561 +
    for (usize len = strlen(i->op); len <= OP_WIDTH; len++)
562 +
        put(ctx, nocolor, " ");
563 +
564 +
    if (i->is_pseudo) {
565 +
        if (strcmp(i->op, "j") == 0) {
566 +
            put(ctx, nocolor, " ");
567 +
            put(ctx, immcolor, i->imm);
568 +
            return;
569 +
        } else if (strcmp(i->op, "mv") == 0) {
570 +
            put(ctx, nocolor, " ");
571 +
            put(ctx, rdcolor, i->rd);
572 +
            put(ctx, nocolor, ", ");
573 +
            put(ctx, rscolor, i->rs1);
574 +
            return;
575 +
        } else if (strcmp(i->op, "li") == 0) {
576 +
            put(ctx, nocolor, " ");
577 +
            put(ctx, rdcolor, i->rd);
578 +
            put(ctx, nocolor, ", ");
579 +
            put(ctx, immcolor, i->imm);
580 +
            return;
581 +
        }
582 +
    }
583 +
584 +
    switch (i->type) {
585 +
    case 'R':
586 +
        /* R-type: op rd, rs1, rs2. */
587 +
        put(ctx, nocolor, " ");
588 +
        put(ctx, rdcolor, i->rd);
589 +
        put(ctx, nocolor, ", ");
590 +
        put(ctx, rscolor, i->rs1);
591 +
        put(ctx, nocolor, ", ");
592 +
        put(ctx, rscolor, i->rs2);
593 +
        break;
594 +
595 +
    case 'I':
596 +
        if (i->base[0] != '\0') {
597 +
            /* I-type (load/jalr): op rd, offset(rs1). */
598 +
            put(ctx, nocolor, " ");
599 +
            put(ctx, rdcolor, i->rd);
600 +
            put(ctx, nocolor, ", ");
601 +
            put(ctx, immcolor, i->off);
602 +
            put(ctx, nocolor, "(");
603 +
            put(ctx, rscolor, i->base);
604 +
            put(ctx, nocolor, ")");
605 +
        } else {
606 +
            /* I-type (immediate): op rd, rs1, imm. */
607 +
            put(ctx, nocolor, " ");
608 +
            put(ctx, rdcolor, i->rd);
609 +
            put(ctx, nocolor, ", ");
610 +
            put(ctx, rscolor, i->rs1);
611 +
            put(ctx, nocolor, ", ");
612 +
            put(ctx, immcolor, i->imm);
613 +
        }
614 +
        break;
615 +
616 +
    case 'S':
617 +
        /* S-type: op rs2, offset(rs1). */
618 +
        put(ctx, nocolor, " ");
619 +
        put(ctx, rscolor, i->rs2);
620 +
        put(ctx, nocolor, ", ");
621 +
        put(ctx, immcolor, i->off);
622 +
        put(ctx, nocolor, "(");
623 +
        put(ctx, rscolor, i->base);
624 +
        put(ctx, nocolor, ")");
625 +
        break;
626 +
627 +
    case 'B':
628 +
        /* B-type: op rs1, rs2, imm. */
629 +
        put(ctx, nocolor, " ");
630 +
        put(ctx, rscolor, i->rs1);
631 +
        put(ctx, nocolor, ", ");
632 +
        put(ctx, rscolor, i->rs2);
633 +
        put(ctx, nocolor, ", ");
634 +
        put(ctx, immcolor, i->imm);
635 +
        break;
636 +
637 +
    case 'U':
638 +
        /* U-type: op rd, imm. */
639 +
        put(ctx, nocolor, " ");
640 +
        put(ctx, rdcolor, i->rd);
641 +
        put(ctx, nocolor, ", ");
642 +
        put(ctx, immcolor, i->imm);
643 +
        break;
644 +
645 +
    case 'J':
646 +
        /* J-type: op rd, imm. */
647 +
        put(ctx, nocolor, " ");
648 +
        if (i->rd[0] != '\0') {
649 +
            put(ctx, rdcolor, i->rd);
650 +
            put(ctx, nocolor, ", ");
651 +
            put(ctx, immcolor, i->imm);
652 +
        } else {
653 +
            put(ctx, immcolor, i->imm);
654 +
        }
655 +
        break;
656 +
657 +
    default:
658 +
        /* Unknown type or invalid opcode. */
659 +
        break;
660 +
    }
661 +
}
662 +
663 +
i32 sprint_instr(instr_t instr, char *buf, bool color) {
664 +
    struct ctx ctx = (struct ctx){ .color = color, .length = 0, .cursor = buf };
665 +
    struct instrparts parts = instr_decode(instr);
666 +
667 +
    format_instr(&parts, &ctx);
668 +
669 +
    return ctx.length;
670 +
}
671 +
672 +
void print_instr(
673 +
    instr_t instr, FILE *stream, const char *comment, bool color, i32 indent
674 +
) {
675 +
    char buf[MAX_INSTR_STR_LEN] = { 0 };
676 +
    i32  n                      = sprint_instr(instr, buf, color);
677 +
678 +
    color = color && isatty(stream->_fileno);
679 +
680 +
    /* Indent line */
681 +
    fprintf(stream, "%*s", indent, " ");
682 +
683 +
    if (comment) {
684 +
        fprintf(stream, "%s", buf);
685 +
        fprintf(stream, "%-*s", INSTR_STR_LEN - n, "");
686 +
687 +
        if (color) {
688 +
            fputs(COLOR_GREY, stream);
689 +
        }
690 +
        fprintf(stream, "# %s\n", comment);
691 +
692 +
        if (color) {
693 +
            fputs(COLOR_RESET, stream);
694 +
        }
695 +
    } else {
696 +
        fprintf(stream, "%s\n", buf);
697 +
    }
698 +
}
699 +
700 +
void print_instrs(
701 +
    instr_t *instrs, usize count, FILE *stream, const char *comment
702 +
) {
703 +
    bool color = (bool)isatty(stream->_fileno);
704 +
705 +
    for (usize i = 0; i < count; i++) {
706 +
        char  buf[MAX_INSTR_STR_LEN];
707 +
        usize addr = i * INSTR_SIZE;
708 +
        sprint_instr(instrs[i], buf, color);
709 +
710 +
        if (comment) {
711 +
            fprintf(
712 +
                stream,
713 +
                "%04lx: %08x  %s # %s\n",
714 +
                addr,
715 +
                instrs[i].raw,
716 +
                buf,
717 +
                comment
718 +
            );
719 +
        } else {
720 +
            fprintf(stream, "%04lx: %08x  %s\n", addr, instrs[i].raw, buf);
721 +
        }
722 +
    }
723 +
}
riscv/debug.h added +60 -0
1 +
#ifndef OP_DEBUG_H
2 +
#define OP_DEBUG_H
3 +
4 +
#include <stdio.h>
5 +
6 +
#include "../riscv.h"
7 +
#include "../types.h"
8 +
9 +
/* Maximum length of a component string (op, reg, imm). */
10 +
#define MAX_PART_LEN      16
11 +
/* Maximum length of an instruction string, including colors. */
12 +
#define MAX_INSTR_STR_LEN 96
13 +
/* Length of an instruction string, without colors. */
14 +
#define INSTR_STR_LEN     24
15 +
16 +
/* Structure for representing a decoded RISC-V instruction's components.
17 +
 * All components are stored as strings to allow for syntax highlighting. */
18 +
struct instrparts {
19 +
    /* Operation name (e.g., "add", "lw", "beq") */
20 +
    char op[MAX_PART_LEN];
21 +
    /* Destination register (e.g., "a0", "t0") */
22 +
    char rd[MAX_PART_LEN];
23 +
    /* Source register 1 (e.g., "a1", "t1") */
24 +
    char rs1[MAX_PART_LEN];
25 +
    /* Source register 2 (e.g., "a2", "t2") */
26 +
    char rs2[MAX_PART_LEN];
27 +
    /* Immediate value as string (e.g., "42", "-8") */
28 +
    char imm[MAX_PART_LEN];
29 +
    /* Memory offset (e.g., "8" in "lw t0, 8(sp)") */
30 +
    char off[MAX_PART_LEN];
31 +
    /* For memory operations, base register (e.g., "sp" in "lw t0, 8(sp)") */
32 +
    char base[MAX_PART_LEN];
33 +
    /* Type of the instruction (R, I, S, B, U, J) */
34 +
    char type;
35 +
    /* Whether the instruction is a pseudo-instruction (e.g., mv, li, ret) */
36 +
    bool is_pseudo;
37 +
};
38 +
39 +
/**
40 +
 * Convert an instruction to a human-readable RISC-V assembly string.
41 +
 */
42 +
i32 sprint_instr(instr_t instr, char *str, bool color);
43 +
44 +
/**
45 +
 * Prints a human-readable RISC-V assembly representation of the instruction to
46 +
 * the given stream.
47 +
 */
48 +
void print_instr(
49 +
    instr_t instr, FILE *stream, const char *comment, bool color, i32 indent
50 +
);
51 +
52 +
/**
53 +
 * Prints human-readable RISC-V assembly representations of multiple
54 +
 * instructions to the given stream.
55 +
 */
56 +
void print_instrs(
57 +
    instr_t *instrs, usize count, FILE *stream, const char *comment
58 +
);
59 +
60 +
#endif /* OP_DEBUG_H */
types.h added +49 -0
1 +
#ifndef TYPES_H
2 +
#define TYPES_H
3 +
4 +
typedef unsigned char      u8;
5 +
typedef unsigned short     u16;
6 +
typedef unsigned int       u32;
7 +
typedef unsigned long long u64;
8 +
typedef signed char        i8;
9 +
typedef short              i16;
10 +
typedef int                i32;
11 +
typedef long long          i64;
12 +
typedef float              f32;
13 +
typedef double             f64;
14 +
15 +
typedef unsigned long usize;
16 +
typedef long          isize;
17 +
18 +
typedef u8 bool;
19 +
20 +
#define true  1
21 +
#define false 0
22 +
23 +
#ifndef NULL
24 +
#define NULL ((void *)0)
25 +
#endif
26 +
27 +
#define U8_MIN  0
28 +
#define U8_MAX  255
29 +
#define U16_MIN 0
30 +
#define U16_MAX 65535
31 +
#define U32_MIN 0
32 +
#define U32_MAX 4294967295U
33 +
34 +
#define I8_MIN  (-128)
35 +
#define I8_MAX  127
36 +
#define I16_MIN (-32768)
37 +
#define I16_MAX 32767
38 +
#define I32_MIN -2147483648
39 +
#define I32_MAX 2147483647
40 +
41 +
/* Use appropriate syntax for no-discard function attribute, depending
42 +
 * on C standard used. */
43 +
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201710L
44 +
#define __nodiscard [[nodiscard]]
45 +
#else
46 +
#define __nodiscard __attribute__((warn_unused_result))
47 +
#endif
48 +
49 +
#endif