aboutsummaryrefslogtreecommitdiff
path: root/examples/nesemu1/nesemu1.cff
diff options
context:
space:
mode:
Diffstat (limited to 'examples/nesemu1/nesemu1.cff')
-rw-r--r--examples/nesemu1/nesemu1.cff929
1 files changed, 929 insertions, 0 deletions
diff --git a/examples/nesemu1/nesemu1.cff b/examples/nesemu1/nesemu1.cff
new file mode 100644
index 0000000..f971091
--- /dev/null
+++ b/examples/nesemu1/nesemu1.cff
@@ -0,0 +1,929 @@
+import "../libc.hff";
+import "sdl.hff";
+
+// A port of https://bisqwit.iki.fi/jutut/kuvat/programming_examples/nesemu1/nesemu1.cc
+// Original program by Joel Yliluoma
+
+struct IO {
+ s *SDL_Surface,
+ win *SDL_Window,
+ screen *SDL_Surface,
+ curjoy [2]int, nextjoy [2]int, joypos [2]int,
+
+ fn init(io *IO) void {
+ io.win = SDL_CreateWindow("nesemu1", 0x1FFF0000u, 0x1FFF0000u, 256, 240, 0);
+ if io.win == #null {
+ fprintf(stderr, "SDL: %s", SDL_GetError());
+ exit(1);
+ }
+ io.screen = SDL_GetWindowSurface(io.win);
+ io.s = SDL_CreateRGBSurface(0, 256, 240, 32, 0,0,0,0);
+ if io.s == #null {
+ fprintf(stderr, "SDL: %s\n", SDL_GetError());
+ exit(1);
+ }
+ }
+
+ fn putpixel(io *IO, px uint, py uint, pixel uint, offset int) void {
+ static palette [64]u32 = {
+ 0x666666ff, 0x002a88ff, 0x1412a7ff, 0x3b00a4ff, 0x5c007eff, 0x6e0040ff, 0x6c0600ff, 0x561d00ff,
+ 0x333500ff, 0x0b4800ff, 0x005200ff, 0x004f08ff, 0x00404dff, 0x000000ff, 0x000000ff, 0x000000ff,
+ 0xadadadff, 0x155fd9ff, 0x4240ffff, 0x7527feff, 0xa01accff, 0xb71e7bff, 0xb53120ff, 0x994e00ff,
+ 0x6b6d00ff, 0x388700ff, 0x0c9300ff, 0x008f32ff, 0x007c8dff, 0x000000ff, 0x000000ff, 0x000000ff,
+ 0xfffeffff, 0x64b0ffff, 0x9290ffff, 0xc676ffff, 0xf36affff, 0xfe6eccff, 0xfe8170ff, 0xea9e22ff,
+ 0xbcbe00ff, 0x88d800ff, 0x5ce430ff, 0x45e082ff, 0x48cddeff, 0x4f4f4fff, 0x000000ff, 0x000000ff,
+ 0xfffeffff, 0xc0dfffff, 0xd3d2ffff, 0xe8c8ffff, 0xfbc2ffff, 0xfec4eaff, 0xfeccc5ff, 0xf7d8a5ff,
+ 0xe4e594ff, 0xcfef96ff, 0xbdf4abff, 0xb3f3ccff, 0xb5ebf2ff, 0xb8b8b8ff, 0x000000ff, 0x000000ff,
+ };
+ // Store the RGB color into the frame buffer.
+ ((as(*u32)io.s.pixels))[(py * 256) + px] = palette[pixel%64];
+ }
+
+ fn flush_scanline(io *IO, py uint) void {
+ if py == 239 {
+ SDL_UpperBlit(io.s, #null, io.screen, #null);
+ SDL_UpdateWindowSurface(io.win);
+ let evt SDL_Event #?;
+ while SDL_PollEvent(&evt) {
+ if evt.t == :quit { exit(0); }
+ }
+ // SDL_Delay(100);
+ }
+ }
+
+ fn joy_strobe(io *IO, v bool) void {
+ if v {
+ io.curjoy[0] = io.nextjoy[0]; io.joypos[0] = 0;
+ io.curjoy[1] = io.nextjoy[1]; io.joypos[1] = 0;
+ }
+ }
+ fn joy_read(io *IO, idx uint) u8 {
+ static const masks [8]const u8 = {0x20,0x10,0x40,0x80,0x04,0x08,0x02,0x01};
+ return (io.curjoy[idx] & masks[io.joypos[idx]++ & 7]) != 0 ? 1 : 0;
+ }
+}
+static g_io IO = {};
+
+def VROM_GRANULARITY = 0x400,
+ VROM_PAGES = 0x2000 / VROM_GRANULARITY,
+ ROM_GRANULARITY = 0x2000,
+ ROM_PAGES = 0x10000 / ROM_GRANULARITY;
+
+struct GamePak {
+ rom [#]u8,
+ vram [#]u8,
+ mapperno uint,
+ nram [0x1000]u8,
+ pram [0x2000]u8,
+ banks [ROM_PAGES]*u8,
+ vbanks [VROM_PAGES]*u8,
+ nta [4]*u8,
+
+ fn setrom(this *GamePak, size uint, baseaddr uint, index uint) void {
+ let r = this.rom;
+ def granu = ROM_GRANULARITY;
+ for let v = r.#len + (index * size), p = baseaddr / granu;
+ p < (baseaddr + size) / granu and p < ROM_PAGES;
+ (do ++p; v += granu;)
+ {
+ this.banks[p] = &r[v % r.#len];
+ }
+ }
+
+ fn setvrom(this *GamePak, size uint, baseaddr uint, index uint) void {
+ let r = &this.vram;
+ def granu = VROM_GRANULARITY;
+ for let v = (*r).#len + (index * size), p = baseaddr / granu;
+ p < (baseaddr + size) / granu and p < VROM_PAGES;
+ (do ++p; v += granu;)
+ {
+ this.vbanks[p] = &r.[v % (*r).#len];
+ }
+ }
+
+ fn access(this *GamePak, addr u16, val u8, write bool) u8 {
+ switch {
+ case write and addr >= 0x8000 and this.mapperno == 7; // e.g. Rare games
+ this->setrom(0x8000, 0x8000, val & 7);
+ for let i = 0; i++ < 4; {
+ this.nta[i] = &this.nram[0x400 * ((val >> 4) & 1)];
+ }
+ case write and addr >= 0x8000 and this.mapperno == 2; // e.g. Rockman, Castlevania
+ this->setrom(0x4000, 0x8000, val);
+
+ case write and addr >= 0x8000 and this.mapperno == 3; // e.g. Kage, Solomon's Key
+ val &= this->access(addr, 0, #f); // Bus conflict
+ this->setvrom(0x2000, 0, val & 3);
+
+ case write and addr >= 0x8000 and this.mapperno == 1; // e.g. Rockman 2, Simon's Quest
+ static regs [4]u8 = {0xC, 0, 0, 0},
+ counter = 0,
+ cache = 0;
+ let configure = #f;
+ if val & 0x80 != 0 {
+ regs[0] = 0xC;
+ configure = #t;
+ } else {
+ cache |= (val & 1) << counter;
+ }
+ if configure or ++counter == 5 {
+ if !configure { regs[(addr >> 13) & 3] = (val = cache); }
+ cache = (counter = 0);
+ static sel [4][4]u8 = { {0,0,0,0}, {1,1,1,1}, {0,1,0,1}, {0,0,1,1} };
+ for let m = 0; m < 4; ++m {
+ this.nta[m] = &this.nram[0x400 * sel[regs[0] & 3][m]];
+ }
+ this->setrom(0x1000, 0x0000, ((regs[0]&16 != 0) ? regs[1] : ((regs[1]&~1)+0)));
+ this->setrom(0x1000, 0x1000, ((regs[0]&16 != 0) ? regs[2] : ((regs[1]&~1)+1)));
+ switch (regs[0] >> 2) & 3 {
+ case 0, 1;
+ this->setrom(0x8000, 0x8000, (regs[3] & 0xE) / 2);
+ case 2;
+ this->setrom(0x4000, 0x8000, 0);
+ this->setrom(0x4000, 0xC000, regs[3] & 0xF);
+ case 3;
+ this->setrom(0x4000, 0x8000, regs[3] & 0xF);
+ this->setrom(0x4000, 0xC000, ~0);
+ }
+ }
+ }
+ if addr >> 13 == 3 {
+ return this.pram[addr & 0x1FFF];
+ }
+ // printf("read addr %.4X\n",addr);
+ return this.banks[(addr / ROM_GRANULARITY) % ROM_PAGES][addr % ROM_GRANULARITY];
+ }
+ fn init(this *GamePak) void {
+ this.nta = { &this.nram[0], &this.nram[0x400], &this.nram[0], &this.nram[0x400] };
+ this->setvrom(0x2000, 0x0000, 0);
+ for let v = 0; v < 4; ++v { this->setrom(0x4000, v * 0x4000, v == 3 ? ~0 : 0); }
+ }
+}
+
+static g_pak GamePak = {};
+
+struct CPU;
+fn cpu_access(*CPU, addr u16, val u8, write bool) u8;
+fn cpu_tick() void;
+struct CPU { //CPU: Ricoh RP2A03 (based on MOS6502, almost the same as in Commodore 64)
+ ram [0x800]u8,
+ reset bool,
+ nmi bool,
+ nmi_edge bool,
+ intr bool,
+ pc u16, a u8, x u8, y u8, s u8, // registers
+ p bitfield : u8 { // status flags
+ c (0, 1) bool, ic (0, 1), // carry
+ z (1, 1) bool, iz (1, 1), // zero
+ i (2, 1) bool, ii (2, 1), // interrupt
+ d (3, 1) bool, id (3, 1), // decimal
+ v (6, 1) bool, iv (6, 1), // overflow
+ n (7, 1) bool, in (7, 1), // negative
+ },
+
+ fn init(cpu *CPU) void {
+ cpu.reset = #t;
+ cpu.p.i = #t;
+ }
+ fn RB(cpu *CPU, addr u16) u8 { return cpu_access(cpu, addr, 0, #f); }
+ fn WB(cpu *CPU, addr u16, val u8) u8 { return cpu_access(cpu, addr, val, #t); }
+}
+
+static g_cpu CPU = {};
+
+struct PPUSpr {
+ sprindex u8, y u8, index u8, attr u8, x u8,
+ pattern u16
+}
+bitfield PPUScrolltype : u32 {
+ raw (3,16), // raw VRAM address (16-bit)
+ xscroll (0, 8), // low 8 bits of first write to 2005
+ xfine (0, 3), // low 3 bits of first write to 2005
+ xcoarse (3, 5), // high 5 bits of first write to 2005
+ ycoarse (8, 5), // high 5 bits of second write to 2005
+ basenta (13,2), // nametable index (copied from 2000)
+ basenta_h (13,1), // horizontal nametable index
+ basenta_v (14,1), // vertical nametable index
+ yfine (15,3), // low 3 bits of second write to 2005
+ vaddrhi (11,8), // first write to 2006 (with high 2 bits set to zero)
+ vaddrlo (3, 8) // second write to 2006
+}
+struct PPU {
+ reg bitfield : u32 {
+ // reg 0 (w) // reg 1 (w) // reg 2 (r)
+ sysctrl (0, 8), dispctrl (8, 8), status (16, 8),
+ basenta (0, 2), grayscale (8, 1) bool, spoverflow (21, 1) bool,
+ inc (2, 1), showbg8 (9, 1) bool, sp0hit (22, 1) bool,
+ spaddr (3, 1), showsp8 (10,1) bool, invblank (23, 1) bool,
+ bgaddr (4, 1), showbg (11,1) bool, // reg 3 (w)
+ spsize (5, 1) bool, showsp (12,1) bool, oamaddr (24, 8),
+ slaveflag (6, 1) bool, showbgsp (11,2), oamdata (24, 2),
+ nmienable (7, 1) bool, emprgb (12, 3), oamindex (26, 2),
+ },
+ palette [32]u8,
+ oam [256]u8, oam2 [8]PPUSpr, oam3 [8]PPUSpr,
+ scroll PPUScrolltype, vaddr PPUScrolltype,
+ pat_addr uint, sprinpos uint, sproutpos uint, sprrenpos uint, sprtmp uint,
+ tileattr u16, tilepat u16, ioaddr u16,
+ bg_shift_pat u32, bg_shift_attr u32,
+ scanline int, x int, scanline_end int, vblankstate int, ncycles int,
+ readbuffer int, openbus int, openbus_decaytimer int,
+ parity bool, offset_toggle bool,
+
+ fn init(ppu *PPU) void {
+ ppu.scanline = 241;
+ ppu.scanline_end = 341;
+ }
+
+ // Memory mapping: Convert PPU memory address into a reference to relevant data
+ fn mmap(ppu *PPU, i int) *u8 {
+ i &= 0x3FFF;
+ if i >= 0x3F00 {
+ if i % 4 == 0 { i &= 0x0F; }
+ return &ppu.palette[i & 31];
+ }
+ if i < 0x2000 {
+ return &g_pak.vbanks[(i / VROM_GRANULARITY) % VROM_PAGES][i % VROM_GRANULARITY];
+ }
+ return &g_pak.nta[(i >> 10) & 3][i & 0x3FF];
+ }
+
+ // external I/O: read or write
+ fn access(ppu *PPU, index uint, v u8, write bool) u8 {
+ defmacro refreshopenbus(v) [(do ppu.openbus_decaytimer = 77777; ppu.openbus = v; )]
+ let res u8 = ppu.openbus;
+ if write { refreshopenbus(v); }
+ switch index {
+ case 0; if write { ppu.reg.sysctrl = v; ppu.scroll.basenta = ppu.reg.basenta; }
+ case 1; if write { ppu.reg.dispctrl = v; }
+ case 2;
+ if !write {
+ res = ppu.reg.status | (ppu.openbus & 0x1F);
+ ppu.reg.invblank = #f; // Reading $2002 clears the vblank flag.
+ ppu.offset_toggle = #f; // Also resets the toggle for address updates.
+ if ppu.vblankstate != -5 {
+ ppu.vblankstate = 0; // This also may cancel the setting of InVBlank.
+ }
+ }
+ case 3; if write { ppu.reg.oamaddr = v; } // Index into OAM
+ case 4; if write { ppu.oam[ppu.reg.oamaddr++] = v; } // Write/read the OAM
+ else { res = refreshopenbus(ppu.oam[ppu.reg.oamaddr] & (ppu.reg.oamdata == 2 ? 0xE3 : 0xFF)); }
+ case 5; // set background scrolling offset
+ if write {
+ if ppu.offset_toggle { ppu.scroll.yfine = v & 7; ppu.scroll.ycoarse = v >> 3;
+ } else { ppu.scroll.xscroll = v; }
+ ppu.offset_toggle = !ppu.offset_toggle;
+ }
+ case 6; // set video memory position for access
+ if write {
+ if ppu.offset_toggle { ppu.scroll.vaddrlo = v; ppu.vaddr = ppu.scroll;
+ } else { ppu.scroll.vaddrhi = v & 0x3F; }
+ ppu.offset_toggle = !ppu.offset_toggle;
+ }
+ case 7;
+ res = ppu.readbuffer;
+ let t = mmap(ppu, ppu.vaddr.#raw); // access video memory
+ if write { res = (*t = v); }
+ else {
+ if ppu.vaddr.#raw & 0x3F00 == 0x3F00 { // palette?
+ res = (ppu.readbuffer = (ppu.openbus & 0xC0) | (*t & 0x3F));
+ }
+ ppu.readbuffer = *t;
+ }
+ refreshopenbus(res);
+ ppu.vaddr.#raw += (ppu.reg.invblank ? 32 : 1); // update address
+ }
+ return res;
+ }
+
+ fn rendering_tick(ppu *PPU) void {
+ let tile_decode_mode = as(bool)(0x10FFFF & (1u << (ppu.x / 16))); // when x is 0..255, 320..335
+
+ // Each action happens in two steps: 1) select memory address; 2) receive data and react on it.
+ switch ppu.x % 8 {
+ case 0, 2; // point to nametable / attribute table
+ if ppu.x % 8 == 2 {
+ ppu.ioaddr = 0x23C0 + (0x400*ppu.vaddr.basenta) + (8*(ppu.vaddr.ycoarse/4)) + (ppu.vaddr.xcoarse/4);
+ }
+ if ppu.x % 8 == 0 or !tile_decode_mode {
+ ppu.ioaddr = 0x2000 + (ppu.vaddr.#raw & 0xFFF);
+ // reset sprite data
+ if ppu.x == 0 {
+ ppu.sprinpos = (ppu.sproutpos = 0);
+ if ppu.reg.showsp { ppu.reg.oamaddr = 0; }
+ }
+ if ppu.reg.showbg {
+ // reset scrolling (vertical once, horizontal each scanline)
+ if ppu.x == 304 and ppu.scanline == -1 { ppu.vaddr = ppu.scroll; }
+ if ppu.x == 256 {
+ ppu.vaddr.xcoarse = ppu.scroll.xcoarse;
+ ppu.vaddr.basenta_h = ppu.scroll.basenta_h;
+ ppu.sprrenpos = 0;
+ }
+ }
+ }
+ case 1;
+ if ppu.x == 337 and ppu.scanline == -1 and ppu.parity and ppu.reg.showbg {
+ ppu.scanline_end = 340;
+ }
+ // name table access
+ ppu.pat_addr = (0x1000*ppu.reg.bgaddr) + (16 * *mmap(ppu, ppu.ioaddr)) + ppu.vaddr.yfine;
+ if tile_decode_mode {
+ // push current tile into shift regs
+ // the bitmap pattern is 16 bits, while the attribute is 2 bits, repeated 8 times
+ ppu.bg_shift_pat = (ppu.bg_shift_pat >> 16) + (0x00010000 * ppu.tilepat);
+ ppu.bg_shift_attr = (ppu.bg_shift_attr >> 16) + (0x55550000 * ppu.tileattr);
+ }
+ case 3;
+ // attribute table access
+ if tile_decode_mode {
+ ppu.tileattr = (*mmap(ppu, ppu.ioaddr) >> ((ppu.vaddr.xcoarse&2) + (2*(ppu.vaddr.ycoarse&2)))) & 3;
+ // go to the next tile horizontally (and switch nametable if it wraps)
+ if ++ppu.vaddr.xcoarse == 0 { ppu.vaddr.basenta_h = 1 - ppu.vaddr.basenta_h; }
+ // at the edge of the screen do the same but vertically
+ if ppu.x == 251 and ++ppu.vaddr.yfine == 0 and ++ppu.vaddr.ycoarse == 30 {
+ ppu.vaddr.ycoarse = 0; ppu.vaddr.basenta_v = 1 - ppu.vaddr.basenta_v;
+ }
+ } else if ppu.sprrenpos < ppu.sproutpos {
+ // select sprite pattern instead of background pattern
+ let o = &ppu.oam3[ppu.sprrenpos]; // sprite to render on next scanline
+ memcpy(o, &ppu.oam2[ppu.sprrenpos], sizeof(o));
+ let y uint = ppu.scanline - o.y;
+ if o.attr & 0x80 != 0 { y ^= ppu.reg.spsize ? 15 : 7; }
+ ppu.pat_addr = 0x1000 * (ppu.reg.spsize ? (o.index & 0x01) : ppu.reg.spaddr);
+ ppu.pat_addr += 0x10 * (ppu.reg.spsize ? (o.index & 0xFE) : (o.index & 0xFF));
+ ppu.pat_addr += (y&7) + ((y&8)*2);
+ }
+ case 5; // pattern table bytes
+ ppu.tilepat = *mmap(ppu, ppu.pat_addr);
+ case 7; // interleave bits of the two pattern bytes
+ let p = ppu.tilepat | (*mmap(ppu, ppu.pat_addr|8) << 8);
+ p = (p&0xF00F) | ((p&0x0F00)>>4) | ((p&0x00F0)<<4);
+ p = (p&0xC3C3) | ((p&0x3030)>>2) | ((p&0x0C0C)<<2);
+ p = (p&0x9999) | ((p&0x4444)>>1) | ((p&0x2222)<<1);
+ ppu.tilepat = p;
+ // When decoding sprites, save the sprite graphics and move to next sprite
+ if !tile_decode_mode and ppu.sprrenpos < ppu.sproutpos {
+ ppu.oam3[ppu.sprrenpos++].pattern = ppu.tilepat;
+ }
+ }
+ // find which sprites are visible on next scanline (TODO: implement crazy 9-sprite malfunction)
+ switch ppu.x >= 64 and ppu.x < 256 and ppu.x%2 == 0 ? (ppu.reg.oamaddr++ & 3) : 4 {
+ case else
+ // access oam
+ ppu.sprtmp = ppu.oam[ppu.reg.oamaddr];
+ case 0;
+ if ppu.sprinpos >= 64 { ppu.reg.oamaddr = 0; }
+ else {
+ ++ppu.sprinpos; // next sprite
+ if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].y = ppu.sprtmp; }
+ if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].sprindex = ppu.reg.oamindex; }
+ let y1 = ppu.sprtmp, y2 int = y1 + (ppu.reg.spsize ? 16 : 8);
+ if !(ppu.scanline >= y2 and ppu.scanline < y2) {
+ ppu.reg.oamaddr = ppu.sprinpos != 2 ? ppu.reg.oamaddr + 3 : 8;
+ }
+ }
+ case 1;
+ if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].index = ppu.sprtmp; }
+ case 2;
+ if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].attr = ppu.sprtmp; }
+ case 3;
+ if ppu.sproutpos < 8 {
+ ppu.oam2[ppu.sproutpos].x = ppu.sprtmp;
+ ++ppu.sproutpos;
+ } else { ppu.reg.spoverflow = #t; }
+ if ppu.sprinpos == 2 { ppu.reg.oamaddr = 8; }
+ }
+ }
+
+ fn render_pixel(ppu *PPU) void {
+ let edge = as(u8)(ppu.x + 8) < 16; // 0..7, 248..255
+ let showbg = ppu.reg.showbg and (!edge or ppu.reg.showbg8);
+ let showsp = ppu.reg.showsp and (!edge or ppu.reg.showsp8);
+
+ //render the background
+ let fx = ppu.scroll.xfine, xpos = 15u - (((ppu.x&7) + fx + (ppu.x&7!=0 ? 8 : 0)) & 15);
+ let pixel = 0u, attr = 0u;
+ if showbg { // pick a pixel from shift registers
+ pixel = (ppu.bg_shift_pat >> (xpos*2)) & 3;
+ attr = (ppu.bg_shift_attr >> (xpos*2)) & (pixel != 0 ? 3 : 0);
+ } else if ppu.vaddr.#raw & 0x3F00 == 0x3F00 and ppu.reg.showbgsp == 0 {
+ pixel = ppu.vaddr.#raw;
+ }
+
+ // overlay the sprites
+ if showsp {
+ for let sno = 0u; sno < ppu.sprrenpos; ++sno {
+ let s = &ppu.oam3[sno];
+ //check if sprite is horizontall in range
+ let xdiff uint = ppu.x - s.x;
+ if xdiff >= 8 { continue; }
+ // determine which pixel to display; skip transparent ones
+ if s.attr & 0x40 == 0 { xdiff = 7 - xdiff; }
+ let spritepixel u8 = (s.pattern >> (xdiff*2)) & 3;
+ if spritepixel == 0 { continue; }
+ // check sprite-0 hit
+ if ppu.x < 255 and pixel != 0 and s.sprindex == 0 { ppu.reg.sp0hit = #t; }
+ // render pixel unless behind-background placement wanted
+ if s.attr & 0x20 == 0 or pixel == 0 {
+ attr = (s.attr & 3) + 4;
+ pixel = spritepixel;
+ }
+ // only process first non-transparent sprite pixel
+ break;
+ }
+ }
+ pixel = ppu.palette[((attr*4) + pixel) & 0x1F] & (ppu.reg.grayscale ? 0x30 : 0x3F);
+ g_io->putpixel(ppu.x, ppu.scanline, pixel | (ppu.reg.emprgb << 6), ppu.ncycles);
+ }
+
+ // PPU:tick() -- This function is called 3 times per each CPU cycle.
+ // Each call iterates through one pixel of the screen.
+ // The screen is divided into 262 scanlines, each having 341 columns, as such:
+ //
+ // x=0 x=256 x=340
+ // ___|____________________|__________|
+ // y=-1 | pre-render scanline| prepare | >
+ // ___|____________________| sprites _| > Graphics
+ // y=0 | visible area | for the | > processing
+ // | - this is rendered | next | > scanlines
+ // y=239 | on the screen. | scanline | >
+ // ___|____________________|______
+ // y=240 | idle
+ // ___|_______________________________
+ // y=241 | vertical blanking (idle)
+ // | 20 scanlines long
+ // y=260___|____________________|__________|
+ //
+ // On actual PPU, the scanline begins actually before x=0, with
+ // sync/colorburst/black/background color being rendered, and
+ // ends after x=256 with background/black being rendered first,
+ // but in this emulator we only care about the visible area.
+ //
+ // When background rendering is enabled, scanline -1 is
+ // 340 or 341 pixels long, alternating each frame.
+ // In all other situations the scanline is 341 pixels long.
+ // Thus, it takes 89341 or 89342 PPU::tick() calls to render 1 frame.
+ fn tick(ppu *PPU) void {
+ // set/clear vblank where needed
+ switch ppu.vblankstate {
+ case -5; ppu.reg.status = 0;
+ case 2; ppu.reg.invblank = #t;
+ case 0; g_cpu.nmi = ppu.reg.invblank and ppu.reg.nmienable;
+ }
+ if ppu.vblankstate != 0 { ppu.vblankstate += (ppu.vblankstate < 0 ? 1 : -1); }
+ if ppu.openbus_decaytimer > 0 {
+ if --ppu.openbus_decaytimer == 0 { ppu.openbus = 0; }
+ }
+ // graphics processing scanline?
+ if ppu.scanline < 240 {
+ // process graphics for this cycle
+ if ppu.reg.showbgsp != 0 { ppu->rendering_tick(); }
+ if ppu.scanline >= 0 and ppu.x < 256 { ppu->render_pixel(); }
+ }
+ // done with cycle. check for end of scanline
+ if ++ppu.ncycles == 3 { ppu.ncycles = 0; #{ for NTSC rendering } }
+ if ++ppu.x >= ppu.scanline_end {
+ // begin new scanline
+ g_io->flush_scanline(ppu.scanline);
+ ppu.scanline_end = 341;
+ ppu.x = 0;
+ switch ppu.scanline += 1 {
+ case 261; // begin rendering
+ ppu.scanline = -1; // pre render line
+ ppu.parity = !ppu.parity;
+ // clear vblank
+ ppu.vblankstate = -5;
+ case 241; // begin of vblank
+ static fp *FILE = {};
+ if fp == #null { fp = fopen("input.fmv", "rb"); };
+ if(fp)
+ {
+ static ctrlmask = 0u;
+ if(ftell(fp) == 0)
+ {
+ fseek(fp, 0x05, SEEK_SET);
+ ctrlmask = fgetc(fp);
+ fseek(fp, 0x90, SEEK_SET); // Famtasia Movie format.
+ }
+ if(ctrlmask & 0x80!=0) { g_io.nextjoy[0] = fgetc(fp); if feof(fp){g_io.nextjoy[0] = 0;} }
+ if(ctrlmask & 0x40!=0) { g_io.nextjoy[1] = fgetc(fp); if feof(fp){g_io.nextjoy[1] = 0;} }
+ }
+ // set vblank flag
+ ppu.vblankstate = 2;
+ }
+ }
+ }
+}
+
+static g_ppu PPU = {};
+
+struct APU { // Audio Processing Unit
+ fivecycledivider bool, irqdisable bool, channelsenabled [5]bool,
+ periodicirq bool, dmc_irq bool,
+ channels [5]struct {
+ length_counter int, linear_counter int, address int, envelope int,
+ sweep_delay int, env_delay int, wave_counter int, hold int, phase int, level int,
+ reg bitfield : u32 { // per channel register file
+ // 4000, 4004, 400C, 4012: // 4001, 4005, 4013: // 4002, 4006, 400A, 400E:
+ reg0 (0,8), reg1 (8, 8), reg2 (16, 8),
+ dutycycle (6,2), sweepshift (8, 3), noisefreq (16, 4),
+ envdecaydisable (4,1) bool, sweepdecrease (11,1), noisetype (23, 1) bool,
+ envdecayrate (0,4), sweeprate (12,3), wavelength (16,11),
+ envdecayloopenable (5,1) bool, sweepenable (15,1) bool, // 4003, 4007, 400b, 400f, 4010:
+ fixedvolume (0,4), pcmlength (8, 8), reg3 (24, 8),
+ lengthcounterdisable (5,1) bool, lengthcounterinit (27, 5),
+ linearcounterinit (0,7), loopenabled (30, 1) bool,
+ linearcounterdisable (7,1) bool, irqenable (31, 1) bool,
+ }
+ },
+ hz240counter struct { lo i16, hi i16 },
+
+ fn count(v *int, reset int) bool {
+ if --*v < 0 { *v = reset; return #t; }
+ return #f;
+ }
+
+ typedef Channel typeof((APU{}).channels[0]);
+
+ fn tick_channel(apu *APU, ch *Channel, c uint) int {
+ if !apu.channelsenabled[c] { return c == 4 ? 64 : 8; }
+ let wl = (ch.reg.wavelength + 1) * (c >= 2 ? 1 : 2);
+ static const NoisePeriods [16]const u16 = { 2,4,8,16,32,48,64,80,101,127,190,254,381,508,1017,2034 };
+ if c == 3 { wl = NoisePeriods[ch.reg.noisefreq]; }
+ let volume = ch.length_counter > 0 ? (ch.reg.envdecaydisable ? ch.reg.fixedvolume : ch.envelope) : 0;
+ let S = &ch.level;
+ if count(&ch.wave_counter, wl) { return *S; }
+ switch c {
+ case else // square wave. with 4 different 8-step binary waveforms (32 bits of data total)
+ if wl < 8 { return *S = 8; }
+ return *S = (0xF33C0C04u & (1u << ((++ch.phase % 8) + (ch.reg.dutycycle * 8)))) != 0 ? volume : 0;
+
+ case 2; // triangle wave
+ if ch.length_counter > 0 and ch.linear_counter > 0 and wl >= 3 { ++ch.phase; }
+ return *S = (ch.phase & 15) ^ ((ch.phase & 16) != 0 ? 15 : 0);
+
+ case 3; // noise: LSFR
+ if ch.hold == 0 { ch.hold = 1; }
+ ch.hold = (ch.hold >> 1)
+ | (((ch.hold ^ (ch.hold >> (ch.reg.noisetype ? 6 : 1))) & 1) << 14);
+ return *S = (ch.hold & 1) != 0 ? 0 : volume;
+
+ case 4; // delta modulation channel (DMC)
+ // hold = 8 bit value, phase = number of bits buffered
+ if ch.phase == 0 { // nothing in sample buffer?
+ if ch.length_counter == 0 and ch.reg.loopenabled { // Loop?
+ ch.length_counter = (ch.reg.pcmlength * 16) + 1;
+ ch.address = (ch.reg.reg0 | 0x300) << 6;
+ }
+ if ch.length_counter > 0 { // load next 8 bits
+ // Note: Re-entrant! But not recursive, because even
+ // the shortest wave length is greater than the read time.
+ // TODO: proper clock
+ if ch.reg.wavelength > 20 {
+ for let t=0; t<3; ++t { g_cpu->RB(as(u16)(ch.address) | 0x8000); } // timing
+ }
+ ch.hold = g_cpu->RB(as(u16)(ch.address++) | 0x8000); // fetch byte
+ ch.phase = 8;
+ --ch.length_counter;
+ } else { // disable channeel or issue irq
+ apu.channelsenabled[4] = ch.reg.irqenable and (g_cpu.intr = (apu.dmc_irq = #t));
+ }
+ }
+ if ch.phase != 0 { // update the signal if sample buffer non empty
+ let v = ch.linear_counter;
+ if ch.hold != 0 and (0x80 >> --ch.phase) != 0 { v += 2; } else { v -= 2; }
+ if v >= 0 and v <= 0x7F { ch.linear_counter = v; }
+ }
+ return *S = ch.linear_counter;
+ }
+ }
+
+ fn init(apu *APU) void {
+ apu.irqdisable = #t;
+ }
+
+ fn write(apu *APU, index u8, value u8) void {
+ let ch = &apu.channels[(index/4)%5];
+ switch index < 0x10 ? index%4 : index {
+ case 0; if ch.reg.linearcounterdisable { ch.linear_counter = value&0x7F; ch.reg.reg0 = value; }
+ case 1; ch.reg.reg1 = value; ch.sweep_delay = ch.reg.sweeprate;
+ case 2; ch.reg.reg2 = value;
+ case 3;
+ ch.reg.reg3 = value;
+ if apu.channelsenabled[index/4] {
+ static const LengthCounters[32]const u8 = { 10,254,20, 2,40, 4,80, 6,160, 8,60,10,14,12,26,14,
+ 12, 16,24,18,48,20,96,22,192,24,72,26,16,28,32,30 };
+ ch.length_counter = LengthCounters[ch.reg.lengthcounterinit];
+ }
+ ch.linear_counter = ch.reg.linearcounterinit;
+ ch.env_delay = ch.reg.envdecayrate;
+ ch.envelope = 15;
+ if index < 8 { ch.phase = 0; }
+ case 0x10;
+ static const DMCperiods[16]const u16 = { 428,380,340,320,286,254,226,214,190,160,142,128,106,84,72,54 };
+ ch.reg.reg3 = value; ch.reg.wavelength = DMCperiods[value&0xF];
+ case 0x12; ch.reg.reg0 = value; ch.address = (ch.reg.reg0 | 0x300) << 6;
+ case 0x13; ch.reg.reg1 = value; ch.length_counter = (ch.reg.pcmlength*16) + 1; // sample length
+ case 0x11; ch.linear_counter = value & 0x7F; // dac value
+ case 0x15;
+ for let c = 0; c<5; ++c {
+ apu.channelsenabled[c] = value & (1 << c) != 0;
+ }
+ for let c = 0; c<5; ++c {
+ if !apu.channelsenabled[c] {
+ apu.channels[c].length_counter = 0;
+ } else if c == 4 and apu.channels[c].length_counter == 0 {
+ apu.channels[c].length_counter = (ch.reg.pcmlength*16) + 1;
+ }
+ }
+ case 0x17;
+ apu.irqdisable = value & 0x40 != 0;
+ apu.fivecycledivider = value & 0x80 != 0;
+ apu.hz240counter = { 0, 0 };
+ if apu.irqdisable { apu.periodicirq = (apu.dmc_irq = #f); }
+ }
+ }
+
+ fn read(apu *APU) u8 {
+ let res u8 = 0;
+ for let c=0; c<5; ++c { res |= (apu.channels[c].length_counter > 0 ? 1 << c : 0); }
+ if apu.periodicirq { res |= 0x40; apu.periodicirq = #f; }
+ if apu.dmc_irq { res |= 0x80; apu.dmc_irq = #f; }
+ g_cpu.intr = #f;
+ return res;
+ }
+
+ fn tick(apu *APU) void { // invoked at cpu's rate
+ // Divide CPU clock by 7457.5 to get a 240 Hz, which controls certain events.
+ if (apu.hz240counter.lo += 2) >= 14915 {
+ apu.hz240counter.lo -= 14915;
+ if ++apu.hz240counter.hi >= 4+as(int)apu.fivecycledivider { apu.hz240counter.hi = 0; }
+
+ // 60 Hz interval: IRQ. IRQ is not invoked in five-cycle mode (48 Hz).
+ if !apu.irqdisable and !apu.fivecycledivider and apu.hz240counter.hi == 0 {
+ g_cpu.intr = (apu.periodicirq = #t);
+ }
+
+ // Some events are invoked at 96 Hz or 120 Hz rate. Others, 192 Hz or 240 Hz.
+ let halftick = (apu.hz240counter.hi & 5) == 1, fulltick = apu.hz240counter.hi < 4;
+ for let c = 0; c < 4; ++c {
+ let ch = &apu.channels[c];
+ let wl = ch.reg.wavelength;
+
+ // Length tick (all channels except DMC, but different disable bit for triangle wave)
+ if halftick and ch.length_counter > 0
+ and !(c == 2 ? ch.reg.linearcounterdisable : ch.reg.lengthcounterdisable) {
+ ch.length_counter -= 1;
+ }
+
+ // Sweep tick (square waves only)
+ if halftick and c < 2 and count(&ch.sweep_delay, ch.reg.sweeprate) {
+ if wl >= 9 and ch.reg.sweepenable and ch.reg.sweepshift != 0 {
+ let s = wl >> ch.reg.sweepshift, d [4]int = {s,s,~s,-s};
+ wl += d[(ch.reg.sweepdecrease*2)+ c];
+ if wl < 0x800 { ch.reg.wavelength = wl; }
+ }
+ }
+
+ // Linear tick (triangle wave only)
+ if fulltick and c == 2 {
+ ch.linear_counter =
+ ch.reg.linearcounterdisable ? ch.reg.linearcounterinit
+ : (ch.linear_counter > 0 ? ch.linear_counter - 1 : 0);
+ }
+
+ // envelope tick (square and noise channels)
+ if fulltick and c != 2 and count(&ch.env_delay, ch.reg.envdecayrate) {
+ if ch.envelope > 0 or ch.reg.envdecayloopenable {
+ ch.envelope = (ch.envelope - 1) & 15;
+ }
+ }
+ }
+ }
+ // mix the audio: get the momentary sample from each channel and mix them
+ defmacro s(c) [ (apu->tick_channel(&apu.channels[c], c == 1 ? 0 : c)) ]
+ fn v(m f32, n f32, d f32) f32 { return n != 0.f ? m/n : d; }
+ let sample i16 = 30000 *
+ (v(95.88f, (100.f + v(8128.f, s(0) + s(1), -100.f)), 0.f)
+ + v(159.79f, (100.f + v(1.0, s(2)/8227.f + s(3)/12241.f + s(4)/22638.f, -100.f)), 0.f)
+ + -0.5f);
+ // SDL_QueueAudio(g_io.auddev, &sample, 2);
+ static r *FILE = {};
+ }
+}
+static g_apu APU = {};
+
+fn cpu_tick() void {
+ for let n = 0; n < 3; ++n { g_ppu->tick(); }
+ for let n = 0; n < 1; ++n { g_apu->tick(); }
+}
+fn cpu_access(cpu *CPU, addr u16, v u8, write bool) u8 {
+ // memory writes are turned into reads while reset is being signalled
+ if cpu.reset and write { return cpu_access(cpu, addr, 0, #f); }
+ cpu_tick();
+ //map the memory from cpu's viewpoint
+ switch {
+ case addr < 0x2000; let r = &cpu.ram[addr & 0x7FF];
+ if !write { return *r; }
+ *r = v;
+ case addr < 0x4000; return g_ppu->access(addr & 7, v, write);
+ case addr < 0x4018;
+ switch addr & 0x1F {
+ case 0x14; // OAM DMA
+ if write { for let b = 0; b<256; ++b { cpu->WB(0x2004, cpu->RB(((v&7)*0x100)+b)); } }
+ case 0x15; if !write { return g_apu->read(); }
+ g_apu->write(0x15,v);
+ case 0x16; if !write { return g_io->joy_read(0); }
+ g_io->joy_strobe(v);
+ case 0x17; if !write { return g_io->joy_read(1); }
+ g_apu->write(addr & 0x1F, v);
+ case else if write { g_apu->write(addr&0x1F, v); }
+ }
+ case else
+ return g_pak->access(addr, v, write);
+ }
+ return 0;
+}
+
+fn Cwrap(oldaddr u16, newaddr u16) u16 { return (oldaddr & 0xFF00) + as(u8)newaddr; }
+fn Cmisfire(cpu *CPU, old u16, addr u16) void { let q = Cwrap(old, addr); if q != addr { cpu->RB(q); }}
+fn Cpop(cpu *CPU) u8 { return cpu->RB(0x100 | as(u8)++cpu.s); }
+fn Cpush(cpu *CPU, v u8) u8 { cpu->WB(0x100 | as(u8)cpu.s--, v); }
+struct CIns<op u16> { // Execute a single CPU instruction, defined by opcode "op".
+ fn ins(cpu *CPU) void { // With template magic, the compiler will literally synthesize >256 different functions.
+ // Note: op 0x100 means "NMI", 0x101 means "Reset", 0x102 means "IRQ". They are implemented in terms of "BRK".
+ // User is responsible for ensuring that WB() will not store into memory while Reset is being processed.
+ let addr=0u, d=0u, t=0xFFu, c=0u, sb=0u, pbits = op<0x100 ? 0x30u : 0x20u;
+
+ // Define the opcode decoding matrix, which decides which micro-operations constitute
+ // any particular opcode. (Note: The PLA of 6502 works on a slightly different principle.)
+ def const o8 int = op/8;
+ def const o8m int = 1 << (op%8);
+ // Fetch op'th item from a bitstring encoded in a data-specific variant of base64,
+ // where each character transmits 8 bits of information rather than 6.
+ // This peculiar encoding was chosen to reduce the source code size.
+ defmacro O(s,code) [
+ {
+ def const i int = o8m & (s[o8]>90 ? (130+" (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[s[o8]-94])
+ : (s[o8]-" (("[s[o8]/39]));
+ if i!=0 { code; }
+ }
+ ]
+ def X = cpu.x, A = cpu.a, Y = cpu.y, PC = cpu.pc, S = cpu.s, P = cpu.p;
+ defmacro RB(a) [ cpu->RB(a) ]
+ defmacro WB(a,x) [ cpu->WB(a,x) ]
+ defmacro Misfire(...args) [Cmisfire(cpu, args)]
+ defmacro Pop() [Cpop(cpu)]
+ defmacro Push(x) [Cpush(cpu,x)]
+ def wrap = Cwrap;
+
+ // Decode address operand
+ O(" !", addr = 0xFFFA) // NMI vector location
+ O(" *", addr = 0xFFFC) // Reset vector location
+ O("! ,", addr = 0xFFFE) // Interrupt vector location
+ O("zy}z{y}zzy}zzy}zzy}zzy}zzy}zzy}z ", addr = RB(PC++))
+ O("2 yy2 yy2 yy2 yy2 XX2 XX2 yy2 yy ", d = X) // register index
+ O(" 62 62 62 62 om om 62 62 ", d = Y)
+ O("2 y 2 y 2 y 2 y 2 y 2 y 2 y 2 y ", addr=as(u8)(addr+d); d=0; cpu_tick()) // add zeropage-index
+ O(" y z!y z y z y z y z y z y z y z ", addr=as(u8)(addr); addr+=256*RB(PC++)) // absolute address
+ O("3 6 2 6 2 6 286 2 6 2 6 2 6 2 6 /", addr=RB(c=addr); addr+=256*RB(wrap(c,c+1)))// indirect w/ page wrap
+ O(" *Z *Z *Z *Z 6z *Z *Z ", Misfire(addr, addr+d)) // abs. load: extra misread when cross-page
+ O(" 4k 4k 4k 4k 6z 4k 4k ", RB(wrap(addr, addr+d)))// abs. store: always issue a misread
+ // Load source operand
+ O("aa__ff__ab__,4 ____ - ____ ", t &= A) // Many operations take A or X as operand. Some try in
+ O(" knnn 4 99 ", t &= X) // error to take both; the outcome is an AND operation.
+ O(" 9989 99 ", t &= Y) // sty,dey,iny,tya,cpy
+ O(" 4 ", t &= S) // tsx, las
+ O("!!!! !! !! !! ! !! !! !!/", t &= P.#raw|pbits; c = t)// php, flag test/set/clear, interrupts
+ O("_^__dc___^__ ed__98 ", c = t; t = 0xFF) // save as second operand
+ O("vuwvzywvvuwvvuwv zy|zzywvzywv ", t &= RB(addr+d)) // memory operand
+ O(",2 ,2 ,2 ,2 -2 -2 -2 -2 ", t &= RB(PC++)) // immediate operand
+ // Operations that mogrify memory operands directly
+ O(" 88 ", P.v = 0!= t & 0x40; P.n = 0!= t & 0x80) // bit
+ O(" nink nnnk ", sb = P.ic) // rol,rla, ror,rra,arr
+ O("nnnknnnk 0 ", P.c = 0!= t & 0x80) // rol,rla, asl,slo,[arr,anc]
+ O(" nnnknink ", P.c = 0!= t & 0x01) // lsr,sre, ror,rra,asr
+ O("ninknink ", t = (t << 1) | (sb * 0x01))
+ O(" nnnknnnk ", t = (t >> 1) | (sb * 0x80))
+ O(" ! kink ", t = as(u8)(t - 1)) // dec,dex,dey,dcp
+ O(" ! khnk ", t = as(u8)(t + 1)) // inc,inx,iny,isb
+ // Store modified value (memory)
+ O("kgnkkgnkkgnkkgnkzy|J kgnkkgnk ", WB(addr+d, t))
+ O(" q ", WB(wrap(addr, addr+d), t &= ((addr+d) >> 8))) // [shx,shy,shs,sha?]
+ // Some operations used up one clock cycle that we did not account for yet
+ O("rpstljstqjstrjst - - - -kjstkjst/", cpu_tick()) // nop,flag ops,inc,dec,shifts,stack,transregister,interrupts
+ // Stack operations and unconditional jumps
+ O(" ! ! ! ", cpu_tick(); t = Pop()) // pla,plp,rti
+ O(" ! ! ", RB(PC++); PC = Pop(); PC |= (Pop() << 8)) // rti,rts
+ O(" ! ", RB(PC++)) // rts
+ O("! ! /", d=PC+(op!=0?-1:1); Push(d>>8); Push(d)) // jsr, interrupts
+ O("! ! 8 8 /", PC = addr) // jmp, jsr, interrupts
+ O("!! ! /", Push(t)) // pha, php, interrupts
+ // Bitmasks
+ O("! !! !! !! !! ! !! !! !!/", t = 1)
+ O(" ! ! !! !! ", t <<= 1)
+ O("! ! ! !! !! ! ! !/", t <<= 2)
+ O(" ! ! ! ! ! ", t <<= 4)
+ O(" ! ! ! !____ ", t = as(u8)(~t)) // sbc, isb, clear flag
+ O("`^__ ! ! !/", t = c | t) // ora, slo, set flag
+ O(" !!dc`_ !! ! ! !! !! ! ", t = c & t) // and, bit, rla, clear/test flag
+ O(" _^__ ", t = c ^ t) // eor, sre
+ // Conditional branches
+ O(" ! ! ! ! ", if 0!=t { cpu_tick(); Misfire(PC, addr = as(i8)(addr) + PC); PC=addr; })
+ O(" ! ! ! ! ", if 0==t { cpu_tick(); Misfire(PC, addr = as(i8)(addr) + PC); PC=addr; })
+ // Addition and subtraction
+ O(" _^__ ____ ", c = t; t += A + P.ic; P.v = 0!= (c^t) & (A^t) & 0x80; P.c = 0!= t & 0x100)
+ O(" ed__98 ", t = c - t; P.c = 0!= ~t & 0x100) // cmp,cpx,cpy, dcp, sbx
+ // Store modified value (register)
+ O("aa__aa__aa__ab__ 4 !____ ____ ", A = t)
+ O(" nnnn 4 ! ", X = t) // ldx, dex, tax, inx, tsx,lax,las,sbx
+ O(" ! 9988 ! ", Y = t) // ldy, dey, tay, iny
+ O(" 4 0 ", S = t) // txs, las, shs
+ O("! ! ! !! ! ! ! ! !/", P.#raw = t & ~0x30) // plp, rti, flag set/clear
+ // Generic status flag updates
+ O("wwwvwwwvwwwvwxwv 5 !}}||{}wv{{wv ", P.n = 0!= t & 0x80)
+ O("wwwv||wvwwwvwxwv 5 !}}||{}wv{{wv ", P.z = as(u8)(t) == 0)
+ O(" 0 ", P.v = 0!= (((t >> 5)+1)&2)) // [arr]
+ // All implemented opcodes are cycle-accurate and memory-access-accurate.
+ // [] means that this particular separate rule exists only to provide the indicated unofficial opcode(s).
+ }
+}
+
+fn cpu_op(cpu *CPU) void {
+ let nmi_now = cpu.nmi; // check nmi
+ let op int = cpu->RB(cpu.pc++);
+ switch {
+ case cpu.reset; op = 0x101;
+ case nmi_now and !cpu.nmi_edge; op = 0x100; cpu.nmi_edge = #t;
+ case cpu.intr and !cpu.p.i; op = 0x102;
+ }
+ if !nmi_now { cpu.nmi_edge = #f; }
+ defmacro I(n) [&CIns<n+0>:ins,&CIns<n+1>:ins,&CIns<n+2>:ins,&CIns<n+3>:ins,
+ &CIns<n+4>:ins,&CIns<n+5>:ins,&CIns<n+6>:ins,&CIns<n+7>:ins,]
+ static const i [0x108]const *fn(*CPU)void = {
+ I(0x00)I(0x08)I(0x10)I(0x18)I(0x20)I(0x28)I(0x30)I(0x38)
+ I(0x40)I(0x48)I(0x50)I(0x58)I(0x60)I(0x68)I(0x70)I(0x78)
+ I(0x80)I(0x88)I(0x90)I(0x98)I(0xA0)I(0xA8)I(0xB0)I(0xB8)
+ I(0xC0)I(0xC8)I(0xD0)I(0xD8)I(0xE0)I(0xE8)I(0xF0)I(0xF8) I(0x100)
+ };
+ i[op](cpu);
+ cpu.reset = #f;
+}
+
+extern fn main(argc int, argv **u8) int {
+ if SDL_Init() != 0 {
+ fprintf(stderr, "SDL: %s", SDL_GetError());
+ return 1;
+ }
+ let fp *FILE #?;
+ if argc < 2 {
+ fprintf(stderr, "ROM path?\n");
+ return 1;
+ }
+ fp = fopen(argv[1], "rb");
+ if fp == #null {
+ fprintf(stderr, "error opening rom\n");
+ return 1;
+ }
+
+ // read rom file header
+ if !(fgetc(fp) == 'N' and fgetc(fp) == 'E' and fgetc(fp) == 'S' and fgetc(fp) == 0x1A) {
+ fprintf(stderr, "bad rom\n");
+ return 1;
+ }
+
+
+ let rom16count u8 = fgetc(fp),
+ vrom8count u8 = fgetc(fp),
+ ctrlbyte u8 = fgetc(fp),
+ mappernum u8 = fgetc(fp) | (ctrlbyte>>4);
+ fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);
+ if mappernum >= 0x40 { mappernum &= 15; }
+
+ // Read the ROM data
+ if rom16count > 0 { g_pak.rom = (as(*u8)malloc(rom16count * 0x4000))[0::rom16count*0x4000]; }
+ if vrom8count > 0 { g_pak.vram =(as(*u8)malloc(vrom8count * 0x2000))[0::vrom8count*0x2000]; }
+ else { g_pak.vram = (as(*u8)malloc(0x2000))[0::0x2000]; }
+ fread(&g_pak.rom[0], rom16count, 0x4000, fp);
+ fread(&g_pak.vram[0], vrom8count, 0x2000, fp);
+
+ fclose(fp);
+ printf("%u * 16kB ROM, %u * 8kB VROM, mapper %u, ctrlbyte %02X\n", rom16count, vrom8count, mappernum, ctrlbyte);
+
+ g_io->init();
+ g_pak->init();
+ g_pak.mapperno = mappernum;
+ g_cpu->init();
+ g_ppu->init();
+ g_apu->init();
+
+ // Pre-initialize RAM the same way as FCEUX does, to improve TAS sync.
+ for let a=0; a<0x800; ++a {
+ g_cpu.ram[a] = (a&4)!=0 ? 0xFF : 0x00;
+ }
+
+ for ;; {
+ cpu_op(&g_cpu);
+ }
+}