import "../libc.hff"; import "sdl.hff"; // A port of https://bisqwit.iki.fi/jutut/kuvat/programming_examples/nesemu1/nesemu1.cc // Original program by Joel Yliluoma struct IO { s *SDL_Surface, win *SDL_Window, screen *SDL_Surface, curjoy [2]int, nextjoy [2]int, joypos [2]int, fn init(io *IO) void { io.win = SDL_CreateWindow("nesemu1", 0x1FFF0000u, 0x1FFF0000u, 256, 240, 0); if io.win == #null { fprintf(stderr, "SDL: %s", SDL_GetError()); exit(1); } io.screen = SDL_GetWindowSurface(io.win); io.s = SDL_CreateRGBSurface(0, 256, 240, 32, 0,0,0,0); if io.s == #null { fprintf(stderr, "SDL: %s\n", SDL_GetError()); exit(1); } } fn putpixel(io *IO, px uint, py uint, pixel uint, offset int) void { static palette [64]u32 = { 0x666666ff, 0x002a88ff, 0x1412a7ff, 0x3b00a4ff, 0x5c007eff, 0x6e0040ff, 0x6c0600ff, 0x561d00ff, 0x333500ff, 0x0b4800ff, 0x005200ff, 0x004f08ff, 0x00404dff, 0x000000ff, 0x000000ff, 0x000000ff, 0xadadadff, 0x155fd9ff, 0x4240ffff, 0x7527feff, 0xa01accff, 0xb71e7bff, 0xb53120ff, 0x994e00ff, 0x6b6d00ff, 0x388700ff, 0x0c9300ff, 0x008f32ff, 0x007c8dff, 0x000000ff, 0x000000ff, 0x000000ff, 0xfffeffff, 0x64b0ffff, 0x9290ffff, 0xc676ffff, 0xf36affff, 0xfe6eccff, 0xfe8170ff, 0xea9e22ff, 0xbcbe00ff, 0x88d800ff, 0x5ce430ff, 0x45e082ff, 0x48cddeff, 0x4f4f4fff, 0x000000ff, 0x000000ff, 0xfffeffff, 0xc0dfffff, 0xd3d2ffff, 0xe8c8ffff, 0xfbc2ffff, 0xfec4eaff, 0xfeccc5ff, 0xf7d8a5ff, 0xe4e594ff, 0xcfef96ff, 0xbdf4abff, 0xb3f3ccff, 0xb5ebf2ff, 0xb8b8b8ff, 0x000000ff, 0x000000ff, }; // Store the RGB color into the frame buffer. ((as(*u32)io.s.pixels))[(py * 256) + px] = palette[pixel%64]; } fn flush_scanline(io *IO, py uint) void { if py == 239 { SDL_UpperBlit(io.s, #null, io.screen, #null); SDL_UpdateWindowSurface(io.win); let evt SDL_Event #?; while SDL_PollEvent(&evt) { if evt.t == :quit { exit(0); } } // SDL_Delay(100); } } fn joy_strobe(io *IO, v bool) void { if v { io.curjoy[0] = io.nextjoy[0]; io.joypos[0] = 0; io.curjoy[1] = io.nextjoy[1]; io.joypos[1] = 0; } } fn joy_read(io *IO, idx uint) u8 { static const masks [8]const u8 = {0x20,0x10,0x40,0x80,0x04,0x08,0x02,0x01}; return (io.curjoy[idx] & masks[io.joypos[idx]++ & 7]) != 0 ? 1 : 0; } } static g_io IO = {}; def VROM_GRANULARITY = 0x400, VROM_PAGES = 0x2000 / VROM_GRANULARITY, ROM_GRANULARITY = 0x2000, ROM_PAGES = 0x10000 / ROM_GRANULARITY; struct GamePak { rom [#]u8, vram [#]u8, mapperno uint, nram [0x1000]u8, pram [0x2000]u8, banks [ROM_PAGES]*u8, vbanks [VROM_PAGES]*u8, nta [4]*u8, fn setrom(this *GamePak, size uint, baseaddr uint, index uint) void { let r = this.rom; def granu = ROM_GRANULARITY; for let v = r.#len + (index * size), p = baseaddr / granu; p < (baseaddr + size) / granu and p < ROM_PAGES; (do ++p; v += granu;) { this.banks[p] = &r[v % r.#len]; } } fn setvrom(this *GamePak, size uint, baseaddr uint, index uint) void { let r = &this.vram; def granu = VROM_GRANULARITY; for let v = (*r).#len + (index * size), p = baseaddr / granu; p < (baseaddr + size) / granu and p < VROM_PAGES; (do ++p; v += granu;) { this.vbanks[p] = &r.[v % (*r).#len]; } } fn access(this *GamePak, addr u16, val u8, write bool) u8 { switch { case write and addr >= 0x8000 and this.mapperno == 7; // e.g. Rare games this->setrom(0x8000, 0x8000, val & 7); for let i = 0; i++ < 4; { this.nta[i] = &this.nram[0x400 * ((val >> 4) & 1)]; } case write and addr >= 0x8000 and this.mapperno == 2; // e.g. Rockman, Castlevania this->setrom(0x4000, 0x8000, val); case write and addr >= 0x8000 and this.mapperno == 3; // e.g. Kage, Solomon's Key val &= this->access(addr, 0, #f); // Bus conflict this->setvrom(0x2000, 0, val & 3); case write and addr >= 0x8000 and this.mapperno == 1; // e.g. Rockman 2, Simon's Quest static regs [4]u8 = {0xC, 0, 0, 0}, counter = 0, cache = 0; let configure = #f; if val & 0x80 != 0 { regs[0] = 0xC; configure = #t; } else { cache |= (val & 1) << counter; } if configure or ++counter == 5 { if !configure { regs[(addr >> 13) & 3] = (val = cache); } cache = (counter = 0); static sel [4][4]u8 = { {0,0,0,0}, {1,1,1,1}, {0,1,0,1}, {0,0,1,1} }; for let m = 0; m < 4; ++m { this.nta[m] = &this.nram[0x400 * sel[regs[0] & 3][m]]; } this->setrom(0x1000, 0x0000, ((regs[0]&16 != 0) ? regs[1] : ((regs[1]&~1)+0))); this->setrom(0x1000, 0x1000, ((regs[0]&16 != 0) ? regs[2] : ((regs[1]&~1)+1))); switch (regs[0] >> 2) & 3 { case 0, 1; this->setrom(0x8000, 0x8000, (regs[3] & 0xE) / 2); case 2; this->setrom(0x4000, 0x8000, 0); this->setrom(0x4000, 0xC000, regs[3] & 0xF); case 3; this->setrom(0x4000, 0x8000, regs[3] & 0xF); this->setrom(0x4000, 0xC000, ~0); } } } if addr >> 13 == 3 { return this.pram[addr & 0x1FFF]; } // printf("read addr %.4X\n",addr); return this.banks[(addr / ROM_GRANULARITY) % ROM_PAGES][addr % ROM_GRANULARITY]; } fn init(this *GamePak) void { this.nta = { &this.nram[0], &this.nram[0x400], &this.nram[0], &this.nram[0x400] }; this->setvrom(0x2000, 0x0000, 0); for let v = 0; v < 4; ++v { this->setrom(0x4000, v * 0x4000, v == 3 ? ~0 : 0); } } } static g_pak GamePak = {}; struct CPU; fn cpu_access(*CPU, addr u16, val u8, write bool) u8; fn cpu_tick() void; struct CPU { //CPU: Ricoh RP2A03 (based on MOS6502, almost the same as in Commodore 64) ram [0x800]u8, reset bool, nmi bool, nmi_edge bool, intr bool, pc u16, a u8, x u8, y u8, s u8, // registers p bitfield : u8 { // status flags c (0, 1) bool, ic (0, 1), // carry z (1, 1) bool, iz (1, 1), // zero i (2, 1) bool, ii (2, 1), // interrupt d (3, 1) bool, id (3, 1), // decimal v (6, 1) bool, iv (6, 1), // overflow n (7, 1) bool, in (7, 1), // negative }, fn init(cpu *CPU) void { cpu.reset = #t; cpu.p.i = #t; } fn RB(cpu *CPU, addr u16) u8 { return cpu_access(cpu, addr, 0, #f); } fn WB(cpu *CPU, addr u16, val u8) u8 { return cpu_access(cpu, addr, val, #t); } } static g_cpu CPU = {}; struct PPUSpr { sprindex u8, y u8, index u8, attr u8, x u8, pattern u16 } bitfield PPUScrolltype : u32 { raw (3,16), // raw VRAM address (16-bit) xscroll (0, 8), // low 8 bits of first write to 2005 xfine (0, 3), // low 3 bits of first write to 2005 xcoarse (3, 5), // high 5 bits of first write to 2005 ycoarse (8, 5), // high 5 bits of second write to 2005 basenta (13,2), // nametable index (copied from 2000) basenta_h (13,1), // horizontal nametable index basenta_v (14,1), // vertical nametable index yfine (15,3), // low 3 bits of second write to 2005 vaddrhi (11,8), // first write to 2006 (with high 2 bits set to zero) vaddrlo (3, 8) // second write to 2006 } struct PPU { reg bitfield : u32 { // reg 0 (w) // reg 1 (w) // reg 2 (r) sysctrl (0, 8), dispctrl (8, 8), status (16, 8), basenta (0, 2), grayscale (8, 1) bool, spoverflow (21, 1) bool, inc (2, 1), showbg8 (9, 1) bool, sp0hit (22, 1) bool, spaddr (3, 1), showsp8 (10,1) bool, invblank (23, 1) bool, bgaddr (4, 1), showbg (11,1) bool, // reg 3 (w) spsize (5, 1) bool, showsp (12,1) bool, oamaddr (24, 8), slaveflag (6, 1) bool, showbgsp (11,2), oamdata (24, 2), nmienable (7, 1) bool, emprgb (12, 3), oamindex (26, 2), }, palette [32]u8, oam [256]u8, oam2 [8]PPUSpr, oam3 [8]PPUSpr, scroll PPUScrolltype, vaddr PPUScrolltype, pat_addr uint, sprinpos uint, sproutpos uint, sprrenpos uint, sprtmp uint, tileattr u16, tilepat u16, ioaddr u16, bg_shift_pat u32, bg_shift_attr u32, scanline int, x int, scanline_end int, vblankstate int, ncycles int, readbuffer int, openbus int, openbus_decaytimer int, parity bool, offset_toggle bool, fn init(ppu *PPU) void { ppu.scanline = 241; ppu.scanline_end = 341; } // Memory mapping: Convert PPU memory address into a reference to relevant data fn mmap(ppu *PPU, i int) *u8 { i &= 0x3FFF; if i >= 0x3F00 { if i % 4 == 0 { i &= 0x0F; } return &ppu.palette[i & 31]; } if i < 0x2000 { return &g_pak.vbanks[(i / VROM_GRANULARITY) % VROM_PAGES][i % VROM_GRANULARITY]; } return &g_pak.nta[(i >> 10) & 3][i & 0x3FF]; } // external I/O: read or write fn access(ppu *PPU, index uint, v u8, write bool) u8 { defmacro refreshopenbus(v) [(do ppu.openbus_decaytimer = 77777; ppu.openbus = v; )] let res u8 = ppu.openbus; if write { refreshopenbus(v); } switch index { case 0; if write { ppu.reg.sysctrl = v; ppu.scroll.basenta = ppu.reg.basenta; } case 1; if write { ppu.reg.dispctrl = v; } case 2; if !write { res = ppu.reg.status | (ppu.openbus & 0x1F); ppu.reg.invblank = #f; // Reading $2002 clears the vblank flag. ppu.offset_toggle = #f; // Also resets the toggle for address updates. if ppu.vblankstate != -5 { ppu.vblankstate = 0; // This also may cancel the setting of InVBlank. } } case 3; if write { ppu.reg.oamaddr = v; } // Index into OAM case 4; if write { ppu.oam[ppu.reg.oamaddr++] = v; } // Write/read the OAM else { res = refreshopenbus(ppu.oam[ppu.reg.oamaddr] & (ppu.reg.oamdata == 2 ? 0xE3 : 0xFF)); } case 5; // set background scrolling offset if write { if ppu.offset_toggle { ppu.scroll.yfine = v & 7; ppu.scroll.ycoarse = v >> 3; } else { ppu.scroll.xscroll = v; } ppu.offset_toggle = !ppu.offset_toggle; } case 6; // set video memory position for access if write { if ppu.offset_toggle { ppu.scroll.vaddrlo = v; ppu.vaddr = ppu.scroll; } else { ppu.scroll.vaddrhi = v & 0x3F; } ppu.offset_toggle = !ppu.offset_toggle; } case 7; res = ppu.readbuffer; let t = mmap(ppu, ppu.vaddr.#raw); // access video memory if write { res = (*t = v); } else { if ppu.vaddr.#raw & 0x3F00 == 0x3F00 { // palette? res = (ppu.readbuffer = (ppu.openbus & 0xC0) | (*t & 0x3F)); } ppu.readbuffer = *t; } refreshopenbus(res); ppu.vaddr.#raw += (ppu.reg.invblank ? 32 : 1); // update address } return res; } fn rendering_tick(ppu *PPU) void { let tile_decode_mode = as(bool)(0x10FFFF & (1u << (ppu.x / 16))); // when x is 0..255, 320..335 // Each action happens in two steps: 1) select memory address; 2) receive data and react on it. switch ppu.x % 8 { case 0, 2; // point to nametable / attribute table if ppu.x % 8 == 2 { ppu.ioaddr = 0x23C0 + (0x400*ppu.vaddr.basenta) + (8*(ppu.vaddr.ycoarse/4)) + (ppu.vaddr.xcoarse/4); } if ppu.x % 8 == 0 or !tile_decode_mode { ppu.ioaddr = 0x2000 + (ppu.vaddr.#raw & 0xFFF); // reset sprite data if ppu.x == 0 { ppu.sprinpos = (ppu.sproutpos = 0); if ppu.reg.showsp { ppu.reg.oamaddr = 0; } } if ppu.reg.showbg { // reset scrolling (vertical once, horizontal each scanline) if ppu.x == 304 and ppu.scanline == -1 { ppu.vaddr = ppu.scroll; } if ppu.x == 256 { ppu.vaddr.xcoarse = ppu.scroll.xcoarse; ppu.vaddr.basenta_h = ppu.scroll.basenta_h; ppu.sprrenpos = 0; } } } case 1; if ppu.x == 337 and ppu.scanline == -1 and ppu.parity and ppu.reg.showbg { ppu.scanline_end = 340; } // name table access ppu.pat_addr = (0x1000*ppu.reg.bgaddr) + (16 * *mmap(ppu, ppu.ioaddr)) + ppu.vaddr.yfine; if tile_decode_mode { // push current tile into shift regs // the bitmap pattern is 16 bits, while the attribute is 2 bits, repeated 8 times ppu.bg_shift_pat = (ppu.bg_shift_pat >> 16) + (0x00010000 * ppu.tilepat); ppu.bg_shift_attr = (ppu.bg_shift_attr >> 16) + (0x55550000 * ppu.tileattr); } case 3; // attribute table access if tile_decode_mode { ppu.tileattr = (*mmap(ppu, ppu.ioaddr) >> ((ppu.vaddr.xcoarse&2) + (2*(ppu.vaddr.ycoarse&2)))) & 3; // go to the next tile horizontally (and switch nametable if it wraps) if ++ppu.vaddr.xcoarse == 0 { ppu.vaddr.basenta_h = 1 - ppu.vaddr.basenta_h; } // at the edge of the screen do the same but vertically if ppu.x == 251 and ++ppu.vaddr.yfine == 0 and ++ppu.vaddr.ycoarse == 30 { ppu.vaddr.ycoarse = 0; ppu.vaddr.basenta_v = 1 - ppu.vaddr.basenta_v; } } else if ppu.sprrenpos < ppu.sproutpos { // select sprite pattern instead of background pattern let o = &ppu.oam3[ppu.sprrenpos]; // sprite to render on next scanline memcpy(o, &ppu.oam2[ppu.sprrenpos], sizeof(o)); let y uint = ppu.scanline - o.y; if o.attr & 0x80 != 0 { y ^= ppu.reg.spsize ? 15 : 7; } ppu.pat_addr = 0x1000 * (ppu.reg.spsize ? (o.index & 0x01) : ppu.reg.spaddr); ppu.pat_addr += 0x10 * (ppu.reg.spsize ? (o.index & 0xFE) : (o.index & 0xFF)); ppu.pat_addr += (y&7) + ((y&8)*2); } case 5; // pattern table bytes ppu.tilepat = *mmap(ppu, ppu.pat_addr); case 7; // interleave bits of the two pattern bytes let p = ppu.tilepat | (*mmap(ppu, ppu.pat_addr|8) << 8); p = (p&0xF00F) | ((p&0x0F00)>>4) | ((p&0x00F0)<<4); p = (p&0xC3C3) | ((p&0x3030)>>2) | ((p&0x0C0C)<<2); p = (p&0x9999) | ((p&0x4444)>>1) | ((p&0x2222)<<1); ppu.tilepat = p; // When decoding sprites, save the sprite graphics and move to next sprite if !tile_decode_mode and ppu.sprrenpos < ppu.sproutpos { ppu.oam3[ppu.sprrenpos++].pattern = ppu.tilepat; } } // find which sprites are visible on next scanline (TODO: implement crazy 9-sprite malfunction) switch ppu.x >= 64 and ppu.x < 256 and ppu.x%2 == 0 ? (ppu.reg.oamaddr++ & 3) : 4 { case else // access oam ppu.sprtmp = ppu.oam[ppu.reg.oamaddr]; case 0; if ppu.sprinpos >= 64 { ppu.reg.oamaddr = 0; } else { ++ppu.sprinpos; // next sprite if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].y = ppu.sprtmp; } if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].sprindex = ppu.reg.oamindex; } let y1 = ppu.sprtmp, y2 int = y1 + (ppu.reg.spsize ? 16 : 8); if !(ppu.scanline >= y2 and ppu.scanline < y2) { ppu.reg.oamaddr = ppu.sprinpos != 2 ? ppu.reg.oamaddr + 3 : 8; } } case 1; if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].index = ppu.sprtmp; } case 2; if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].attr = ppu.sprtmp; } case 3; if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].x = ppu.sprtmp; ++ppu.sproutpos; } else { ppu.reg.spoverflow = #t; } if ppu.sprinpos == 2 { ppu.reg.oamaddr = 8; } } } fn render_pixel(ppu *PPU) void { let edge = as(u8)(ppu.x + 8) < 16; // 0..7, 248..255 let showbg = ppu.reg.showbg and (!edge or ppu.reg.showbg8); let showsp = ppu.reg.showsp and (!edge or ppu.reg.showsp8); //render the background let fx = ppu.scroll.xfine, xpos = 15u - (((ppu.x&7) + fx + (ppu.x&7!=0 ? 8 : 0)) & 15); let pixel = 0u, attr = 0u; if showbg { // pick a pixel from shift registers pixel = (ppu.bg_shift_pat >> (xpos*2)) & 3; attr = (ppu.bg_shift_attr >> (xpos*2)) & (pixel != 0 ? 3 : 0); } else if ppu.vaddr.#raw & 0x3F00 == 0x3F00 and ppu.reg.showbgsp == 0 { pixel = ppu.vaddr.#raw; } // overlay the sprites if showsp { for let sno = 0u; sno < ppu.sprrenpos; ++sno { let s = &ppu.oam3[sno]; //check if sprite is horizontall in range let xdiff uint = ppu.x - s.x; if xdiff >= 8 { continue; } // determine which pixel to display; skip transparent ones if s.attr & 0x40 == 0 { xdiff = 7 - xdiff; } let spritepixel u8 = (s.pattern >> (xdiff*2)) & 3; if spritepixel == 0 { continue; } // check sprite-0 hit if ppu.x < 255 and pixel != 0 and s.sprindex == 0 { ppu.reg.sp0hit = #t; } // render pixel unless behind-background placement wanted if s.attr & 0x20 == 0 or pixel == 0 { attr = (s.attr & 3) + 4; pixel = spritepixel; } // only process first non-transparent sprite pixel break; } } pixel = ppu.palette[((attr*4) + pixel) & 0x1F] & (ppu.reg.grayscale ? 0x30 : 0x3F); g_io->putpixel(ppu.x, ppu.scanline, pixel | (ppu.reg.emprgb << 6), ppu.ncycles); } // PPU:tick() -- This function is called 3 times per each CPU cycle. // Each call iterates through one pixel of the screen. // The screen is divided into 262 scanlines, each having 341 columns, as such: // // x=0 x=256 x=340 // ___|____________________|__________| // y=-1 | pre-render scanline| prepare | > // ___|____________________| sprites _| > Graphics // y=0 | visible area | for the | > processing // | - this is rendered | next | > scanlines // y=239 | on the screen. | scanline | > // ___|____________________|______ // y=240 | idle // ___|_______________________________ // y=241 | vertical blanking (idle) // | 20 scanlines long // y=260___|____________________|__________| // // On actual PPU, the scanline begins actually before x=0, with // sync/colorburst/black/background color being rendered, and // ends after x=256 with background/black being rendered first, // but in this emulator we only care about the visible area. // // When background rendering is enabled, scanline -1 is // 340 or 341 pixels long, alternating each frame. // In all other situations the scanline is 341 pixels long. // Thus, it takes 89341 or 89342 PPU::tick() calls to render 1 frame. fn tick(ppu *PPU) void { // set/clear vblank where needed switch ppu.vblankstate { case -5; ppu.reg.status = 0; case 2; ppu.reg.invblank = #t; case 0; g_cpu.nmi = ppu.reg.invblank and ppu.reg.nmienable; } if ppu.vblankstate != 0 { ppu.vblankstate += (ppu.vblankstate < 0 ? 1 : -1); } if ppu.openbus_decaytimer > 0 { if --ppu.openbus_decaytimer == 0 { ppu.openbus = 0; } } // graphics processing scanline? if ppu.scanline < 240 { // process graphics for this cycle if ppu.reg.showbgsp != 0 { ppu->rendering_tick(); } if ppu.scanline >= 0 and ppu.x < 256 { ppu->render_pixel(); } } // done with cycle. check for end of scanline if ++ppu.ncycles == 3 { ppu.ncycles = 0; #{ for NTSC rendering } } if ++ppu.x >= ppu.scanline_end { // begin new scanline g_io->flush_scanline(ppu.scanline); ppu.scanline_end = 341; ppu.x = 0; switch ppu.scanline += 1 { case 261; // begin rendering ppu.scanline = -1; // pre render line ppu.parity = !ppu.parity; // clear vblank ppu.vblankstate = -5; case 241; // begin of vblank static fp *FILE = {}; if fp == #null { fp = fopen("input.fmv", "rb"); }; if(fp) { static ctrlmask = 0u; if(ftell(fp) == 0) { fseek(fp, 0x05, SEEK_SET); ctrlmask = fgetc(fp); fseek(fp, 0x90, SEEK_SET); // Famtasia Movie format. } if(ctrlmask & 0x80!=0) { g_io.nextjoy[0] = fgetc(fp); if feof(fp){g_io.nextjoy[0] = 0;} } if(ctrlmask & 0x40!=0) { g_io.nextjoy[1] = fgetc(fp); if feof(fp){g_io.nextjoy[1] = 0;} } } // set vblank flag ppu.vblankstate = 2; } } } } static g_ppu PPU = {}; struct APU { // Audio Processing Unit fivecycledivider bool, irqdisable bool, channelsenabled [5]bool, periodicirq bool, dmc_irq bool, channels [5]struct { length_counter int, linear_counter int, address int, envelope int, sweep_delay int, env_delay int, wave_counter int, hold int, phase int, level int, reg bitfield : u32 { // per channel register file // 4000, 4004, 400C, 4012: // 4001, 4005, 4013: // 4002, 4006, 400A, 400E: reg0 (0,8), reg1 (8, 8), reg2 (16, 8), dutycycle (6,2), sweepshift (8, 3), noisefreq (16, 4), envdecaydisable (4,1) bool, sweepdecrease (11,1), noisetype (23, 1) bool, envdecayrate (0,4), sweeprate (12,3), wavelength (16,11), envdecayloopenable (5,1) bool, sweepenable (15,1) bool, // 4003, 4007, 400b, 400f, 4010: fixedvolume (0,4), pcmlength (8, 8), reg3 (24, 8), lengthcounterdisable (5,1) bool, lengthcounterinit (27, 5), linearcounterinit (0,7), loopenabled (30, 1) bool, linearcounterdisable (7,1) bool, irqenable (31, 1) bool, } }, hz240counter struct { lo i16, hi i16 }, fn count(v *int, reset int) bool { if --*v < 0 { *v = reset; return #t; } return #f; } typedef Channel typeof((APU{}).channels[0]); fn tick_channel(apu *APU, ch *Channel, c uint) int { if !apu.channelsenabled[c] { return c == 4 ? 64 : 8; } let wl = (ch.reg.wavelength + 1) * (c >= 2 ? 1 : 2); static const NoisePeriods [16]const u16 = { 2,4,8,16,32,48,64,80,101,127,190,254,381,508,1017,2034 }; if c == 3 { wl = NoisePeriods[ch.reg.noisefreq]; } let volume = ch.length_counter > 0 ? (ch.reg.envdecaydisable ? ch.reg.fixedvolume : ch.envelope) : 0; let S = &ch.level; if count(&ch.wave_counter, wl) { return *S; } switch c { case else // square wave. with 4 different 8-step binary waveforms (32 bits of data total) if wl < 8 { return *S = 8; } return *S = (0xF33C0C04u & (1u << ((++ch.phase % 8) + (ch.reg.dutycycle * 8)))) != 0 ? volume : 0; case 2; // triangle wave if ch.length_counter > 0 and ch.linear_counter > 0 and wl >= 3 { ++ch.phase; } return *S = (ch.phase & 15) ^ ((ch.phase & 16) != 0 ? 15 : 0); case 3; // noise: LSFR if ch.hold == 0 { ch.hold = 1; } ch.hold = (ch.hold >> 1) | (((ch.hold ^ (ch.hold >> (ch.reg.noisetype ? 6 : 1))) & 1) << 14); return *S = (ch.hold & 1) != 0 ? 0 : volume; case 4; // delta modulation channel (DMC) // hold = 8 bit value, phase = number of bits buffered if ch.phase == 0 { // nothing in sample buffer? if ch.length_counter == 0 and ch.reg.loopenabled { // Loop? ch.length_counter = (ch.reg.pcmlength * 16) + 1; ch.address = (ch.reg.reg0 | 0x300) << 6; } if ch.length_counter > 0 { // load next 8 bits // Note: Re-entrant! But not recursive, because even // the shortest wave length is greater than the read time. // TODO: proper clock if ch.reg.wavelength > 20 { for let t=0; t<3; ++t { g_cpu->RB(as(u16)(ch.address) | 0x8000); } // timing } ch.hold = g_cpu->RB(as(u16)(ch.address++) | 0x8000); // fetch byte ch.phase = 8; --ch.length_counter; } else { // disable channeel or issue irq apu.channelsenabled[4] = ch.reg.irqenable and (g_cpu.intr = (apu.dmc_irq = #t)); } } if ch.phase != 0 { // update the signal if sample buffer non empty let v = ch.linear_counter; if ch.hold != 0 and (0x80 >> --ch.phase) != 0 { v += 2; } else { v -= 2; } if v >= 0 and v <= 0x7F { ch.linear_counter = v; } } return *S = ch.linear_counter; } } fn init(apu *APU) void { apu.irqdisable = #t; } fn write(apu *APU, index u8, value u8) void { let ch = &apu.channels[(index/4)%5]; switch index < 0x10 ? index%4 : index { case 0; if ch.reg.linearcounterdisable { ch.linear_counter = value&0x7F; ch.reg.reg0 = value; } case 1; ch.reg.reg1 = value; ch.sweep_delay = ch.reg.sweeprate; case 2; ch.reg.reg2 = value; case 3; ch.reg.reg3 = value; if apu.channelsenabled[index/4] { static const LengthCounters[32]const u8 = { 10,254,20, 2,40, 4,80, 6,160, 8,60,10,14,12,26,14, 12, 16,24,18,48,20,96,22,192,24,72,26,16,28,32,30 }; ch.length_counter = LengthCounters[ch.reg.lengthcounterinit]; } ch.linear_counter = ch.reg.linearcounterinit; ch.env_delay = ch.reg.envdecayrate; ch.envelope = 15; if index < 8 { ch.phase = 0; } case 0x10; static const DMCperiods[16]const u16 = { 428,380,340,320,286,254,226,214,190,160,142,128,106,84,72,54 }; ch.reg.reg3 = value; ch.reg.wavelength = DMCperiods[value&0xF]; case 0x12; ch.reg.reg0 = value; ch.address = (ch.reg.reg0 | 0x300) << 6; case 0x13; ch.reg.reg1 = value; ch.length_counter = (ch.reg.pcmlength*16) + 1; // sample length case 0x11; ch.linear_counter = value & 0x7F; // dac value case 0x15; for let c = 0; c<5; ++c { apu.channelsenabled[c] = value & (1 << c) != 0; } for let c = 0; c<5; ++c { if !apu.channelsenabled[c] { apu.channels[c].length_counter = 0; } else if c == 4 and apu.channels[c].length_counter == 0 { apu.channels[c].length_counter = (ch.reg.pcmlength*16) + 1; } } case 0x17; apu.irqdisable = value & 0x40 != 0; apu.fivecycledivider = value & 0x80 != 0; apu.hz240counter = { 0, 0 }; if apu.irqdisable { apu.periodicirq = (apu.dmc_irq = #f); } } } fn read(apu *APU) u8 { let res u8 = 0; for let c=0; c<5; ++c { res |= (apu.channels[c].length_counter > 0 ? 1 << c : 0); } if apu.periodicirq { res |= 0x40; apu.periodicirq = #f; } if apu.dmc_irq { res |= 0x80; apu.dmc_irq = #f; } g_cpu.intr = #f; return res; } fn tick(apu *APU) void { // invoked at cpu's rate // Divide CPU clock by 7457.5 to get a 240 Hz, which controls certain events. if (apu.hz240counter.lo += 2) >= 14915 { apu.hz240counter.lo -= 14915; if ++apu.hz240counter.hi >= 4+as(int)apu.fivecycledivider { apu.hz240counter.hi = 0; } // 60 Hz interval: IRQ. IRQ is not invoked in five-cycle mode (48 Hz). if !apu.irqdisable and !apu.fivecycledivider and apu.hz240counter.hi == 0 { g_cpu.intr = (apu.periodicirq = #t); } // Some events are invoked at 96 Hz or 120 Hz rate. Others, 192 Hz or 240 Hz. let halftick = (apu.hz240counter.hi & 5) == 1, fulltick = apu.hz240counter.hi < 4; for let c = 0; c < 4; ++c { let ch = &apu.channels[c]; let wl = ch.reg.wavelength; // Length tick (all channels except DMC, but different disable bit for triangle wave) if halftick and ch.length_counter > 0 and !(c == 2 ? ch.reg.linearcounterdisable : ch.reg.lengthcounterdisable) { ch.length_counter -= 1; } // Sweep tick (square waves only) if halftick and c < 2 and count(&ch.sweep_delay, ch.reg.sweeprate) { if wl >= 9 and ch.reg.sweepenable and ch.reg.sweepshift != 0 { let s = wl >> ch.reg.sweepshift, d [4]int = {s,s,~s,-s}; wl += d[(ch.reg.sweepdecrease*2)+ c]; if wl < 0x800 { ch.reg.wavelength = wl; } } } // Linear tick (triangle wave only) if fulltick and c == 2 { ch.linear_counter = ch.reg.linearcounterdisable ? ch.reg.linearcounterinit : (ch.linear_counter > 0 ? ch.linear_counter - 1 : 0); } // envelope tick (square and noise channels) if fulltick and c != 2 and count(&ch.env_delay, ch.reg.envdecayrate) { if ch.envelope > 0 or ch.reg.envdecayloopenable { ch.envelope = (ch.envelope - 1) & 15; } } } } // mix the audio: get the momentary sample from each channel and mix them defmacro s(c) [ (apu->tick_channel(&apu.channels[c], c == 1 ? 0 : c)) ] fn v(m f32, n f32, d f32) f32 { return n != 0.f ? m/n : d; } let sample i16 = 30000 * (v(95.88f, (100.f + v(8128.f, s(0) + s(1), -100.f)), 0.f) + v(159.79f, (100.f + v(1.0, s(2)/8227.f + s(3)/12241.f + s(4)/22638.f, -100.f)), 0.f) + -0.5f); // SDL_QueueAudio(g_io.auddev, &sample, 2); static r *FILE = {}; } } static g_apu APU = {}; fn cpu_tick() void { for let n = 0; n < 3; ++n { g_ppu->tick(); } for let n = 0; n < 1; ++n { g_apu->tick(); } } fn cpu_access(cpu *CPU, addr u16, v u8, write bool) u8 { // memory writes are turned into reads while reset is being signalled if cpu.reset and write { return cpu_access(cpu, addr, 0, #f); } cpu_tick(); //map the memory from cpu's viewpoint switch { case addr < 0x2000; let r = &cpu.ram[addr & 0x7FF]; if !write { return *r; } *r = v; case addr < 0x4000; return g_ppu->access(addr & 7, v, write); case addr < 0x4018; switch addr & 0x1F { case 0x14; // OAM DMA if write { for let b = 0; b<256; ++b { cpu->WB(0x2004, cpu->RB(((v&7)*0x100)+b)); } } case 0x15; if !write { return g_apu->read(); } g_apu->write(0x15,v); case 0x16; if !write { return g_io->joy_read(0); } g_io->joy_strobe(v); case 0x17; if !write { return g_io->joy_read(1); } g_apu->write(addr & 0x1F, v); case else if write { g_apu->write(addr&0x1F, v); } } case else return g_pak->access(addr, v, write); } return 0; } fn Cwrap(oldaddr u16, newaddr u16) u16 { return (oldaddr & 0xFF00) + as(u8)newaddr; } fn Cmisfire(cpu *CPU, old u16, addr u16) void { let q = Cwrap(old, addr); if q != addr { cpu->RB(q); }} fn Cpop(cpu *CPU) u8 { return cpu->RB(0x100 | as(u8)++cpu.s); } fn Cpush(cpu *CPU, v u8) u8 { cpu->WB(0x100 | as(u8)cpu.s--, v); } struct CIns { // Execute a single CPU instruction, defined by opcode "op". fn ins(cpu *CPU) void { // With template magic, the compiler will literally synthesize >256 different functions. // Note: op 0x100 means "NMI", 0x101 means "Reset", 0x102 means "IRQ". They are implemented in terms of "BRK". // User is responsible for ensuring that WB() will not store into memory while Reset is being processed. let addr=0u, d=0u, t=0xFFu, c=0u, sb=0u, pbits = op<0x100 ? 0x30u : 0x20u; // Define the opcode decoding matrix, which decides which micro-operations constitute // any particular opcode. (Note: The PLA of 6502 works on a slightly different principle.) def const o8 int = op/8; def const o8m int = 1 << (op%8); // Fetch op'th item from a bitstring encoded in a data-specific variant of base64, // where each character transmits 8 bits of information rather than 6. // This peculiar encoding was chosen to reduce the source code size. defmacro O(s,code) [ { def const i int = o8m & (s[o8]>90 ? (130+" (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[s[o8]-94]) : (s[o8]-" (("[s[o8]/39])); if i!=0 { code; } } ] def X = cpu.x, A = cpu.a, Y = cpu.y, PC = cpu.pc, S = cpu.s, P = cpu.p; defmacro RB(a) [ cpu->RB(a) ] defmacro WB(a,x) [ cpu->WB(a,x) ] defmacro Misfire(...args) [Cmisfire(cpu, args)] defmacro Pop() [Cpop(cpu)] defmacro Push(x) [Cpush(cpu,x)] def wrap = Cwrap; // Decode address operand O(" !", addr = 0xFFFA) // NMI vector location O(" *", addr = 0xFFFC) // Reset vector location O("! ,", addr = 0xFFFE) // Interrupt vector location O("zy}z{y}zzy}zzy}zzy}zzy}zzy}zzy}z ", addr = RB(PC++)) O("2 yy2 yy2 yy2 yy2 XX2 XX2 yy2 yy ", d = X) // register index O(" 62 62 62 62 om om 62 62 ", d = Y) O("2 y 2 y 2 y 2 y 2 y 2 y 2 y 2 y ", addr=as(u8)(addr+d); d=0; cpu_tick()) // add zeropage-index O(" y z!y z y z y z y z y z y z y z ", addr=as(u8)(addr); addr+=256*RB(PC++)) // absolute address O("3 6 2 6 2 6 286 2 6 2 6 2 6 2 6 /", addr=RB(c=addr); addr+=256*RB(wrap(c,c+1)))// indirect w/ page wrap O(" *Z *Z *Z *Z 6z *Z *Z ", Misfire(addr, addr+d)) // abs. load: extra misread when cross-page O(" 4k 4k 4k 4k 6z 4k 4k ", RB(wrap(addr, addr+d)))// abs. store: always issue a misread // Load source operand O("aa__ff__ab__,4 ____ - ____ ", t &= A) // Many operations take A or X as operand. Some try in O(" knnn 4 99 ", t &= X) // error to take both; the outcome is an AND operation. O(" 9989 99 ", t &= Y) // sty,dey,iny,tya,cpy O(" 4 ", t &= S) // tsx, las O("!!!! !! !! !! ! !! !! !!/", t &= P.#raw|pbits; c = t)// php, flag test/set/clear, interrupts O("_^__dc___^__ ed__98 ", c = t; t = 0xFF) // save as second operand O("vuwvzywvvuwvvuwv zy|zzywvzywv ", t &= RB(addr+d)) // memory operand O(",2 ,2 ,2 ,2 -2 -2 -2 -2 ", t &= RB(PC++)) // immediate operand // Operations that mogrify memory operands directly O(" 88 ", P.v = 0!= t & 0x40; P.n = 0!= t & 0x80) // bit O(" nink nnnk ", sb = P.ic) // rol,rla, ror,rra,arr O("nnnknnnk 0 ", P.c = 0!= t & 0x80) // rol,rla, asl,slo,[arr,anc] O(" nnnknink ", P.c = 0!= t & 0x01) // lsr,sre, ror,rra,asr O("ninknink ", t = (t << 1) | (sb * 0x01)) O(" nnnknnnk ", t = (t >> 1) | (sb * 0x80)) O(" ! kink ", t = as(u8)(t - 1)) // dec,dex,dey,dcp O(" ! khnk ", t = as(u8)(t + 1)) // inc,inx,iny,isb // Store modified value (memory) O("kgnkkgnkkgnkkgnkzy|J kgnkkgnk ", WB(addr+d, t)) O(" q ", WB(wrap(addr, addr+d), t &= ((addr+d) >> 8))) // [shx,shy,shs,sha?] // Some operations used up one clock cycle that we did not account for yet O("rpstljstqjstrjst - - - -kjstkjst/", cpu_tick()) // nop,flag ops,inc,dec,shifts,stack,transregister,interrupts // Stack operations and unconditional jumps O(" ! ! ! ", cpu_tick(); t = Pop()) // pla,plp,rti O(" ! ! ", RB(PC++); PC = Pop(); PC |= (Pop() << 8)) // rti,rts O(" ! ", RB(PC++)) // rts O("! ! /", d=PC+(op!=0?-1:1); Push(d>>8); Push(d)) // jsr, interrupts O("! ! 8 8 /", PC = addr) // jmp, jsr, interrupts O("!! ! /", Push(t)) // pha, php, interrupts // Bitmasks O("! !! !! !! !! ! !! !! !!/", t = 1) O(" ! ! !! !! ", t <<= 1) O("! ! ! !! !! ! ! !/", t <<= 2) O(" ! ! ! ! ! ", t <<= 4) O(" ! ! ! !____ ", t = as(u8)(~t)) // sbc, isb, clear flag O("`^__ ! ! !/", t = c | t) // ora, slo, set flag O(" !!dc`_ !! ! ! !! !! ! ", t = c & t) // and, bit, rla, clear/test flag O(" _^__ ", t = c ^ t) // eor, sre // Conditional branches O(" ! ! ! ! ", if 0!=t { cpu_tick(); Misfire(PC, addr = as(i8)(addr) + PC); PC=addr; }) O(" ! ! ! ! ", if 0==t { cpu_tick(); Misfire(PC, addr = as(i8)(addr) + PC); PC=addr; }) // Addition and subtraction O(" _^__ ____ ", c = t; t += A + P.ic; P.v = 0!= (c^t) & (A^t) & 0x80; P.c = 0!= t & 0x100) O(" ed__98 ", t = c - t; P.c = 0!= ~t & 0x100) // cmp,cpx,cpy, dcp, sbx // Store modified value (register) O("aa__aa__aa__ab__ 4 !____ ____ ", A = t) O(" nnnn 4 ! ", X = t) // ldx, dex, tax, inx, tsx,lax,las,sbx O(" ! 9988 ! ", Y = t) // ldy, dey, tay, iny O(" 4 0 ", S = t) // txs, las, shs O("! ! ! !! ! ! ! ! !/", P.#raw = t & ~0x30) // plp, rti, flag set/clear // Generic status flag updates O("wwwvwwwvwwwvwxwv 5 !}}||{}wv{{wv ", P.n = 0!= t & 0x80) O("wwwv||wvwwwvwxwv 5 !}}||{}wv{{wv ", P.z = as(u8)(t) == 0) O(" 0 ", P.v = 0!= (((t >> 5)+1)&2)) // [arr] // All implemented opcodes are cycle-accurate and memory-access-accurate. // [] means that this particular separate rule exists only to provide the indicated unofficial opcode(s). } } fn cpu_op(cpu *CPU) void { let nmi_now = cpu.nmi; // check nmi let op int = cpu->RB(cpu.pc++); switch { case cpu.reset; op = 0x101; case nmi_now and !cpu.nmi_edge; op = 0x100; cpu.nmi_edge = #t; case cpu.intr and !cpu.p.i; op = 0x102; } if !nmi_now { cpu.nmi_edge = #f; } defmacro I(n) [&CIns:ins,&CIns:ins,&CIns:ins,&CIns:ins, &CIns:ins,&CIns:ins,&CIns:ins,&CIns:ins,] static const i [0x108]const *fn(*CPU)void = { I(0x00)I(0x08)I(0x10)I(0x18)I(0x20)I(0x28)I(0x30)I(0x38) I(0x40)I(0x48)I(0x50)I(0x58)I(0x60)I(0x68)I(0x70)I(0x78) I(0x80)I(0x88)I(0x90)I(0x98)I(0xA0)I(0xA8)I(0xB0)I(0xB8) I(0xC0)I(0xC8)I(0xD0)I(0xD8)I(0xE0)I(0xE8)I(0xF0)I(0xF8) I(0x100) }; i[op](cpu); cpu.reset = #f; } extern fn main(argc int, argv **u8) int { if SDL_Init() != 0 { fprintf(stderr, "SDL: %s", SDL_GetError()); return 1; } let fp *FILE #?; if argc < 2 { fprintf(stderr, "ROM path?\n"); return 1; } fp = fopen(argv[1], "rb"); if fp == #null { fprintf(stderr, "error opening rom\n"); return 1; } // read rom file header if !(fgetc(fp) == 'N' and fgetc(fp) == 'E' and fgetc(fp) == 'S' and fgetc(fp) == 0x1A) { fprintf(stderr, "bad rom\n"); return 1; } let rom16count u8 = fgetc(fp), vrom8count u8 = fgetc(fp), ctrlbyte u8 = fgetc(fp), mappernum u8 = fgetc(fp) | (ctrlbyte>>4); fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp); if mappernum >= 0x40 { mappernum &= 15; } // Read the ROM data if rom16count > 0 { g_pak.rom = (as(*u8)malloc(rom16count * 0x4000))[0::rom16count*0x4000]; } if vrom8count > 0 { g_pak.vram =(as(*u8)malloc(vrom8count * 0x2000))[0::vrom8count*0x2000]; } else { g_pak.vram = (as(*u8)malloc(0x2000))[0::0x2000]; } fread(&g_pak.rom[0], rom16count, 0x4000, fp); fread(&g_pak.vram[0], vrom8count, 0x2000, fp); fclose(fp); printf("%u * 16kB ROM, %u * 8kB VROM, mapper %u, ctrlbyte %02X\n", rom16count, vrom8count, mappernum, ctrlbyte); g_io->init(); g_pak->init(); g_pak.mapperno = mappernum; g_cpu->init(); g_ppu->init(); g_apu->init(); // Pre-initialize RAM the same way as FCEUX does, to improve TAS sync. for let a=0; a<0x800; ++a { g_cpu.ram[a] = (a&4)!=0 ? 0xFF : 0x00; } for ;; { cpu_op(&g_cpu); } }