diff options
Diffstat (limited to 'examples/nesemu1')
| -rw-r--r-- | examples/nesemu1/nesemu1.cc | 951 | ||||
| -rw-r--r-- | examples/nesemu1/nesemu1.cff | 929 | ||||
| -rw-r--r-- | examples/nesemu1/sdl.hff | 43 |
3 files changed, 0 insertions, 1923 deletions
diff --git a/examples/nesemu1/nesemu1.cc b/examples/nesemu1/nesemu1.cc deleted file mode 100644 index 3be2489..0000000 --- a/examples/nesemu1/nesemu1.cc +++ /dev/null @@ -1,951 +0,0 @@ -#include <stdint.h> -#include <signal.h> -#include <assert.h> -#include <cmath> - -#include <SDL/SDL.h> -#include <vector> - -/* NESEMU1 : EMULATOR FOR THE NINTENDO ENTERTAINMENT SYSTEM (R) ARCHITECTURE */ -/* Written by and copyright (C) 2011 Joel Yliluoma - http://iki.fi/bisqwit/ */ -/* Trademarks are owned by their respective owners. Lawyers love tautologies. */ - -static const char* inputfn = "input.fmv"; - -// Integer types -typedef uint_least32_t u32; -typedef uint_least16_t u16; -typedef uint_least8_t u8; -typedef int_least8_t s8; - -// Bitfield utilities -template<unsigned bitno, unsigned nbits=1, typename T=u8> -struct RegBit -{ - T data; - enum { mask = (1u << nbits) - 1u }; - template<typename T2> - RegBit& operator=(T2 val) - { - data = (data & ~(mask << bitno)) | ((nbits > 1 ? val & mask : !!val) << bitno); - return *this; - } - operator unsigned() const { return (data >> bitno) & mask; } - RegBit& operator++ () { return *this = *this + 1; } - unsigned operator++ (int) { unsigned r = *this; ++*this; return r; } -}; - -namespace IO -{ - SDL_Surface *s; - void Init() - { - SDL_Init(SDL_INIT_VIDEO); - SDL_InitSubSystem(SDL_INIT_VIDEO); - s = SDL_SetVideoMode(256, 240, 32,0); - signal(SIGINT, SIG_DFL); - } - - void PutPixel(unsigned px,unsigned py, unsigned pixel, int offset) - { - // The input value is a NES color index (with de-emphasis bits). - // We need RGB values. To produce a RGB value, we emulate the NTSC circuitry. - // For most part, this process is described at: - // http://wiki.nesdev.com/w/index.php/NTSC_video - // Incidentally, this code is shorter than a table of 64*8 RGB values. - static unsigned palette[3][64][512] = {}, prev=~0u; - // Caching the generated colors - if(prev == ~0u) - for(int o=0; o<3; ++o) - for(int u=0; u<3; ++u) - for(int p0=0; p0<512; ++p0) - for(int p1=0; p1<64; ++p1) - { - // Calculate the luma and chroma by emulating the relevant circuits: - auto s = "\372\273\32\305\35\311I\330D\357\175\13D!}N"; - int y=0, i=0, q=0; - for(int p=0; p<12; ++p) // 12 samples of NTSC signal constitute a color. - { - // Sample either the previous or the current pixel. - int r = (p+o*4)%12, pixel = r < 8-u*2 ? p0 : p1; // Use pixel=p0 to disable artifacts. - // Decode the color index. - int c = pixel%16, l = c<0xE ? pixel/4 & 12 : 4, e=p0/64; - // NES NTSC modulator (square wave between up to four voltage levels): - int b = 40 + s[(c > 12*((c+8+p)%12 < 6)) + 2*!(0451326 >> p/2*3 & e) + l]; - // Ideal TV NTSC demodulator: - y += b; - i += b * int(std::cos(M_PI * p / 6) * 5909); - q += b * int(std::sin(M_PI * p / 6) * 5909); - } - // Convert the YIQ color into RGB - auto gammafix = [=](float f) { return f <= 0.f ? 0.f : std::pow(f, 2.2f / 1.8f); }; - auto clamp = [](int v) { return v>255 ? 255 : v; }; - // Store color at subpixel precision - if(u==2) palette[o][p1][p0] += 0x10000*clamp(255 * gammafix(y/1980.f + i* 0.947f/9e6f + q* 0.624f/9e6f)); - if(u==1) palette[o][p1][p0] += 0x00100*clamp(255 * gammafix(y/1980.f + i*-0.275f/9e6f + q*-0.636f/9e6f)); - if(u==0) palette[o][p1][p0] += 0x00001*clamp(255 * gammafix(y/1980.f + i*-1.109f/9e6f + q* 1.709f/9e6f)); - } - // Store the RGB color into the frame buffer. - ((u32*) s->pixels) [py * 256 + px] = palette[offset][prev%64][pixel]; - prev = pixel; - } - void FlushScanline(unsigned py) - { - if(py == 239) { - SDL_Flip(s); - } - } - - int joy_current[2]={0,0}, joy_next[2]={0,0}, joypos[2]={0,0}; - void JoyStrobe(unsigned v) - { - if(v) { joy_current[0] = joy_next[0]; joypos[0]=0; } - if(v) { joy_current[1] = joy_next[1]; joypos[1]=0; } - } - u8 JoyRead(unsigned idx) - { - static const u8 masks[8] = {0x20,0x10,0x40,0x80,0x04,0x08,0x02,0x01}; - return ((joy_current[idx] & masks[joypos[idx]++ & 7]) ? 1 : 0); - } -} - -namespace GamePak -{ - std::vector<u8> ROM, VRAM(0x2000); - unsigned mappernum; - const unsigned VROM_Granularity = 0x0400, VROM_Pages = 0x2000 / VROM_Granularity; - const unsigned ROM_Granularity = 0x2000, ROM_Pages = 0x10000 / ROM_Granularity; - unsigned char NRAM[0x1000], PRAM[0x2000]; - unsigned char* banks[ROM_Pages] = {}; - unsigned char* Vbanks[VROM_Pages] = {}; - unsigned char *Nta[4] = { NRAM+0x0000, NRAM+0x0400, NRAM+0x0000, NRAM+0x0400 }; - - template<unsigned npages,unsigned char*(&b)[npages], std::vector<u8>& r, unsigned granu> - static void SetPages(unsigned size, unsigned baseaddr, unsigned index) - { - for(unsigned v = r.size() + index * size, - p = baseaddr / granu; - p < (baseaddr + size) / granu && p < npages; - ++p, v += granu) { - b[p] = &r[v % r.size()]; - } - } - auto& SetROM = SetPages< ROM_Pages, banks, ROM, ROM_Granularity>; - auto& SetVROM = SetPages<VROM_Pages,Vbanks,VRAM,VROM_Granularity>; - - u8 Access(unsigned addr, u8 value, bool write) - { - if(write && addr >= 0x8000 && mappernum == 7) // e.g. Rare games - { - SetROM(0x8000, 0x8000, (value&7)); - Nta[0] = Nta[1] = Nta[2] = Nta[3] = &NRAM[0x400 * ((value>>4)&1)]; - } - if(write && addr >= 0x8000 && mappernum == 2) // e.g. Rockman, Castlevania - { - SetROM(0x4000, 0x8000, value); - } - if(write && addr >= 0x8000 && mappernum == 3) // e.g. Kage, Solomon's Key - { - value &= Access(addr,0,false); // Simulate bus conflict - SetVROM(0x2000, 0x0000, (value&3)); - } - if(write && addr >= 0x8000 && mappernum == 1) // e.g. Rockman 2, Simon's Quest - { - static u8 regs[4]={0x0C,0,0,0}, counter=0, cache=0; - if(value & 0x80) { regs[0]=0x0C; goto configure; } - cache |= (value&1) << counter; - if(++counter == 5) - { - regs[ (addr>>13) & 3 ] = value = cache; - configure: - cache = counter = 0; - static const u8 sel[4][4] = { {0,0,0,0}, {1,1,1,1}, {0,1,0,1}, {0,0,1,1} }; - for(unsigned m=0; m<4; ++m) Nta[m] = &NRAM[0x400 * sel[regs[0]&3][m]]; - SetVROM(0x1000, 0x0000, ((regs[0]&16) ? regs[1] : ((regs[1]&~1)+0))); - SetVROM(0x1000, 0x1000, ((regs[0]&16) ? regs[2] : ((regs[1]&~1)+1))); - switch( (regs[0]>>2)&3 ) - { - case 0: case 1: - SetROM(0x8000, 0x8000, (regs[3] & 0xE) / 2); - break; - case 2: - SetROM(0x4000, 0x8000, 0); - SetROM(0x4000, 0xC000, (regs[3] & 0xF)); - break; - case 3: - SetROM(0x4000, 0x8000, (regs[3] & 0xF)); - SetROM(0x4000, 0xC000, ~0); - break; - } - } - } - if( (addr >> 13) == 3 ) return PRAM[addr & 0x1FFF ]; - return banks[ (addr / ROM_Granularity) % ROM_Pages] [addr % ROM_Granularity]; - } - void Init() - { - SetVROM(0x2000, 0x0000, 0); - for(unsigned v=0; v<4; ++v) SetROM(0x4000, v*0x4000, v==3 ? -1 : 0); - } -} - -namespace CPU /* CPU: Ricoh RP2A03 (based on MOS6502, almost the same as in Commodore 64) */ -{ - u8 RAM[0x800]; - bool reset=true, nmi=false, nmi_edge_detected=false, intr=false; - - template<bool write> u8 MemAccess(u16 addr, u8 v=0); - u8 RB(u16 addr) { printf("READ from %.4X\n", addr); return MemAccess<0>(addr); } - u8 WB(u16 addr,u8 v) { return MemAccess<1>(addr, v); } - void tick(); -} - -namespace PPU /* Picture Processing Unit */ -{ - union regtype // PPU register file - { - u32 value; - // Reg0 (write) // Reg1 (write) // Reg2 (read) - RegBit<0,8,u32> sysctrl; RegBit< 8,8,u32> dispctrl; RegBit<16,8,u32> status; - RegBit<0,2,u32> BaseNTA; RegBit< 8,1,u32> Grayscale; RegBit<21,1,u32> SPoverflow; - RegBit<2,1,u32> Inc; RegBit< 9,1,u32> ShowBG8; RegBit<22,1,u32> SP0hit; - RegBit<3,1,u32> SPaddr; RegBit<10,1,u32> ShowSP8; RegBit<23,1,u32> InVBlank; - RegBit<4,1,u32> BGaddr; RegBit<11,1,u32> ShowBG; // Reg3 (write) - RegBit<5,1,u32> SPsize; RegBit<12,1,u32> ShowSP; RegBit<24,8,u32> OAMaddr; - RegBit<6,1,u32> SlaveFlag; RegBit<11,2,u32> ShowBGSP; RegBit<24,2,u32> OAMdata; - RegBit<7,1,u32> NMIenabled; RegBit<13,3,u32> EmpRGB; RegBit<26,6,u32> OAMindex; - } reg; - // Raw memory data as read&written by the game - u8 palette[32], OAM[256]; - // Decoded sprite information, used & changed during each scanline - struct { u8 sprindex, y, index, attr, x; u16 pattern; } OAM2[8], OAM3[8]; - - union scrolltype - { - RegBit<3,16,u32> raw; // raw VRAM address (16-bit) - RegBit<0, 8,u32> xscroll; // low 8 bits of first write to 2005 - RegBit<0, 3,u32> xfine; // low 3 bits of first write to 2005 - RegBit<3, 5,u32> xcoarse; // high 5 bits of first write to 2005 - RegBit<8, 5,u32> ycoarse; // high 5 bits of second write to 2005 - RegBit<13,2,u32> basenta; // nametable index (copied from 2000) - RegBit<13,1,u32> basenta_h; // horizontal nametable index - RegBit<14,1,u32> basenta_v; // vertical nametable index - RegBit<15,3,u32> yfine; // low 3 bits of second write to 2005 - RegBit<11,8,u32> vaddrhi; // first write to 2006 (with high 2 bits set to zero) - RegBit<3, 8,u32> vaddrlo; // second write to 2006 - } scroll, vaddr; - - unsigned pat_addr, sprinpos, sproutpos, sprrenpos, sprtmp; - u16 tileattr, tilepat, ioaddr; - u32 bg_shift_pat, bg_shift_attr; - - int scanline=241, x=0, scanline_end=341, VBlankState=0, cycle_counter=0; - int read_buffer=0, open_bus=0, open_bus_decay_timer=0; - bool even_odd_toggle=false, offset_toggle=false; - - /* Memory mapping: Convert PPU memory address into a reference to relevant data */ - u8& mmap(int i) - { - i &= 0x3FFF; - if(i >= 0x3F00) { if(i%4==0) i &= 0x0F; return palette[i & 0x1F]; } - if(i < 0x2000) return GamePak::Vbanks[(i / GamePak::VROM_Granularity) % GamePak::VROM_Pages] - [ i % GamePak::VROM_Granularity]; - return GamePak::Nta[ (i>>10)&3][i&0x3FF]; - } - // External I/O: read or write - u8 Access(u16 index, u8 v, bool write) - { - auto RefreshOpenBus = [&](u8 v) { return open_bus_decay_timer = 77777, open_bus = v; }; - u8 res = open_bus; - if(write) RefreshOpenBus(v); - switch(index) // Which port from $200x? - { - case 0: if(write) { reg.sysctrl = v; scroll.basenta = reg.BaseNTA; } break; - case 1: if(write) { reg.dispctrl = v; } break; - case 2: if(write) break; - res = reg.status | (open_bus & 0x1F); - reg.InVBlank = false; // Reading $2002 clears the vblank flag. - offset_toggle = false; // Also resets the toggle for address updates. - if(VBlankState != -5) - VBlankState = 0; // This also may cancel the setting of InVBlank. - break; - case 3: if(write) reg.OAMaddr = v; break; // Index into Object Attribute Memory - case 4: if(write) OAM[reg.OAMaddr++] = v; // Write or read the OAM (sprites). - else res = RefreshOpenBus(OAM[reg.OAMaddr] & (reg.OAMdata==2 ? 0xE3 : 0xFF)); - break; - case 5: if(!write) break; // Set background scrolling offset - if(offset_toggle) { scroll.yfine = v & 7; scroll.ycoarse = v >> 3; } - else { scroll.xscroll = v; } - offset_toggle = !offset_toggle; - break; - case 6: if(!write) break; // Set video memory position for reads/writes - if(offset_toggle) { scroll.vaddrlo = v; vaddr.raw = (unsigned) scroll.raw; } - else { scroll.vaddrhi = v & 0x3F; } - offset_toggle = !offset_toggle; - break; - case 7: - res = read_buffer; - u8& t = mmap(vaddr.raw); // Access the video memory. - if(write) res = t = v; - else { if((vaddr.raw & 0x3F00) == 0x3F00) // palette? - res = read_buffer = (open_bus & 0xC0) | (t & 0x3F); - read_buffer = t; } - RefreshOpenBus(res); - vaddr.raw = vaddr.raw + (reg.Inc ? 32 : 1); // The address is automatically updated. - break; - } - return res; - } - void rendering_tick() - { - bool tile_decode_mode = 0x10FFFF & (1u << (x/16)); // When x is 0..255, 320..335 - - // Each action happens in two steps: 1) select memory address; 2) receive data and react on it. - switch(x % 8) - { - case 2: // Point to attribute table - ioaddr = 0x23C0 + 0x400*vaddr.basenta + 8*(vaddr.ycoarse/4) + (vaddr.xcoarse/4); - if(tile_decode_mode) break; // Or nametable, with sprites. - case 0: // Point to nametable - ioaddr = 0x2000 + (vaddr.raw & 0xFFF); - // Reset sprite data - if(x == 0) { sprinpos = sproutpos = 0; if(reg.ShowSP) reg.OAMaddr = 0; } - if(!reg.ShowBG) break; - // Reset scrolling (vertical once, horizontal each scanline) - if(x == 304 && scanline == -1) vaddr.raw = (unsigned) scroll.raw; - if(x == 256) { vaddr.xcoarse = (unsigned)scroll.xcoarse; - vaddr.basenta_h = (unsigned)scroll.basenta_h; - sprrenpos = 0; } - break; - case 1: - if(x == 337 && scanline == -1 && even_odd_toggle && reg.ShowBG) scanline_end = 340; - // Name table access - pat_addr = 0x1000*reg.BGaddr + 16*mmap(ioaddr) + vaddr.yfine; - if(!tile_decode_mode) break; - // Push the current tile into shift registers. - // The bitmap pattern is 16 bits, while the attribute is 2 bits, repeated 8 times. - bg_shift_pat = (bg_shift_pat >> 16) + 0x00010000 * tilepat; - bg_shift_attr = (bg_shift_attr >> 16) + 0x55550000 * tileattr; - break; - case 3: - // Attribute table access - if(tile_decode_mode) - { - tileattr = (mmap(ioaddr) >> ((vaddr.xcoarse&2) + 2*(vaddr.ycoarse&2))) & 3; - // Go to the next tile horizontally (and switch nametable if it wraps) - if(!++vaddr.xcoarse) { vaddr.basenta_h = 1-vaddr.basenta_h; } - // At the edge of the screen, do the same but vertically - if(x==251 && !++vaddr.yfine && ++vaddr.ycoarse == 30) - { vaddr.ycoarse = 0; vaddr.basenta_v = 1-vaddr.basenta_v; } - } - else if(sprrenpos < sproutpos) - { - // Select sprite pattern instead of background pattern - auto& o = OAM3[sprrenpos]; // Sprite to render on next scanline - memcpy(&o, &OAM2[sprrenpos], sizeof(o)); - unsigned y = (scanline) - o.y; - if(o.attr & 0x80) y ^= (reg.SPsize ? 15 : 7); - pat_addr = 0x1000 * (reg.SPsize ? (o.index & 0x01) : reg.SPaddr); - pat_addr += 0x10 * (reg.SPsize ? (o.index & 0xFE) : (o.index & 0xFF)); - pat_addr += (y&7) + (y&8)*2; - } - break; - // Pattern table bytes - case 5: - tilepat = mmap(pat_addr|0); - break; - case 7: // Interleave the bits of the two pattern bytes - unsigned p = tilepat | (mmap(pat_addr|8) << 8); - p = (p&0xF00F) | ((p&0x0F00)>>4) | ((p&0x00F0)<<4); - p = (p&0xC3C3) | ((p&0x3030)>>2) | ((p&0x0C0C)<<2); - p = (p&0x9999) | ((p&0x4444)>>1) | ((p&0x2222)<<1); - tilepat = p; - // When decoding sprites, save the sprite graphics and move to next sprite - if(!tile_decode_mode && sprrenpos < sproutpos) - OAM3[sprrenpos++].pattern = tilepat; - break; - } - // Find which sprites are visible on next scanline (TODO: implement crazy 9-sprite malfunction) - switch(x>=64 && x<256 && x%2 ? (reg.OAMaddr++ & 3) : 4) - { - default: - // Access OAM (object attribute memory) - sprtmp = OAM[reg.OAMaddr]; - break; - case 0: - if(sprinpos >= 64) { reg.OAMaddr=0; break; } - ++sprinpos; // next sprite - if(sproutpos<8) OAM2[sproutpos].y = sprtmp; - if(sproutpos<8) OAM2[sproutpos].sprindex = reg.OAMindex; - {int y1 = sprtmp, y2 = sprtmp + (reg.SPsize?16:8); - if(!( scanline >= y1 && scanline < y2 )) - reg.OAMaddr = sprinpos != 2 ? reg.OAMaddr+3 : 8;} - break; - case 1: - if(sproutpos<8) OAM2[sproutpos].index = sprtmp; - break; - case 2: - if(sproutpos<8) OAM2[sproutpos].attr = sprtmp; - break; - case 3: - if(sproutpos<8) OAM2[sproutpos].x = sprtmp; - if(sproutpos<8) ++sproutpos; else reg.SPoverflow = true; - if(sprinpos == 2) reg.OAMaddr = 8; - break; - } - } - void render_pixel() - { - bool edge = u8(x+8) < 16; // 0..7, 248..255 - bool showbg = reg.ShowBG && (!edge || reg.ShowBG8); - bool showsp = reg.ShowSP && (!edge || reg.ShowSP8); - - // Render the background - unsigned fx = scroll.xfine, xpos = 15 - (( (x&7) + fx + 8*!!(x&7) ) & 15); - - unsigned pixel = 0, attr = 0; - if(showbg) // Pick a pixel from the shift registers - { - pixel = (bg_shift_pat >> (xpos*2)) & 3; - attr = (bg_shift_attr >> (xpos*2)) & (pixel ? 3 : 0); - } - else if( (vaddr.raw & 0x3F00) == 0x3F00 && !reg.ShowBGSP ) - pixel = vaddr.raw; - - // Overlay the sprites - if(showsp) - for(unsigned sno=0; sno<sprrenpos; ++sno) - { - auto& s = OAM3[sno]; - // Check if this sprite is horizontally in range - unsigned xdiff = x - s.x; - if(xdiff >= 8) continue; // Also matches negative values - // Determine which pixel to display; skip transparent pixels - if(!(s.attr & 0x40)) xdiff = 7-xdiff; - u8 spritepixel = (s.pattern >> (xdiff*2)) & 3; - if(!spritepixel) continue; - // Register sprite-0 hit if applicable - if(x < 255 && pixel && s.sprindex == 0) reg.SP0hit = true; - // Render the pixel unless behind-background placement wanted - if(!(s.attr & 0x20) || !pixel) - { - attr = (s.attr & 3) + 4; - pixel = spritepixel; - } - // Only process the first non-transparent sprite pixel. - break; - } - pixel = palette[ (attr*4 + pixel) & 0x1F ] & (reg.Grayscale ? 0x30 : 0x3F); - IO::PutPixel(x, scanline, pixel | (reg.EmpRGB << 6), cycle_counter); - } - - // PPU::tick() -- This function is called 3 times per each CPU cycle. - // Each call iterates through one pixel of the screen. - // The screen is divided into 262 scanlines, each having 341 columns, as such: - // - // x=0 x=256 x=340 - // ___|____________________|__________| - // y=-1 | pre-render scanline| prepare | > - // ___|____________________| sprites _| > Graphics - // y=0 | visible area | for the | > processing - // | - this is rendered | next | > scanlines - // y=239 | on the screen. | scanline | > - // ___|____________________|______ - // y=240 | idle - // ___|_______________________________ - // y=241 | vertical blanking (idle) - // | 20 scanlines long - // y=260___|____________________|__________| - // - // On actual PPU, the scanline begins actually before x=0, with - // sync/colorburst/black/background color being rendered, and - // ends after x=256 with background/black being rendered first, - // but in this emulator we only care about the visible area. - // - // When background rendering is enabled, scanline -1 is - // 340 or 341 pixels long, alternating each frame. - // In all other situations the scanline is 341 pixels long. - // Thus, it takes 89341 or 89342 PPU::tick() calls to render 1 frame. - void tick() - { - // Set/clear vblank where needed - switch(VBlankState) - { - case -5: reg.status = 0; break; - case 2: reg.InVBlank = true; break; - case 0: CPU::nmi = reg.InVBlank && reg.NMIenabled; break; - } - if(VBlankState != 0) VBlankState += (VBlankState < 0 ? 1 : -1); - if(open_bus_decay_timer) if(!--open_bus_decay_timer) open_bus = 0; - - // Graphics processing scanline? - if(scanline < 240) - { - /* Process graphics for this cycle */ - if(reg.ShowBGSP) rendering_tick(); - if(scanline >= 0 && x < 256) render_pixel(); - } - - // Done with the cycle. Check for end of scanline. - if(++cycle_counter == 3) cycle_counter = 0; // For NTSC pixel shifting - if(++x >= scanline_end) - { - // Begin new scanline - IO::FlushScanline(scanline); - scanline_end = 341; - x = 0; - // Does something special happen on the new scanline? - switch(scanline += 1) - { - case 261: // Begin of rendering - scanline = -1; // pre-render line - even_odd_toggle = !even_odd_toggle; - // Clear vblank flag - VBlankState = -5; - break; - case 241: // Begin of vertical blanking - // I cheat here: I did not bother to learn how to use SDL events, - // so I simply read button presses from a movie file, which happens - // to be a TAS, rather than from the keyboard or from a joystick. - static FILE* fp = fopen(inputfn, "rb"); - if(fp) - { - static unsigned ctrlmask = 0; - if(!ftell(fp)) - { - fseek(fp, 0x05, SEEK_SET); - ctrlmask = fgetc(fp); - fseek(fp, 0x90, SEEK_SET); // Famtasia Movie format. - } - if(ctrlmask & 0x80) { IO::joy_next[0] = fgetc(fp); if(feof(fp)) IO::joy_next[0] = 0; } - if(ctrlmask & 0x40) { IO::joy_next[1] = fgetc(fp); if(feof(fp)) IO::joy_next[1] = 0; } - } - // Set vblank flag - VBlankState = 2; - } - } - } -} - -namespace APU /* Audio Processing Unit */ -{ - static const u8 LengthCounters[32] = { 10,254,20, 2,40, 4,80, 6,160, 8,60,10,14,12,26,14, - 12, 16,24,18,48,20,96,22,192,24,72,26,16,28,32,30 }; - static const u16 NoisePeriods[16] = { 2,4,8,16,32,48,64,80,101,127,190,254,381,508,1017,2034 }; - static const u16 DMCperiods[16] = { 428,380,340,320,286,254,226,214,190,160,142,128,106,84,72,54 }; - - bool FiveCycleDivider = false, IRQdisable = true, ChannelsEnabled[5] = { false }; - bool PeriodicIRQ = false, DMC_IRQ = false; - bool count(int& v, int reset) { return --v < 0 ? (v=reset),true : false; } - - struct channel - { - int length_counter, linear_counter, address, envelope; - int sweep_delay, env_delay, wave_counter, hold, phase, level; - union // Per-channel register file - { - // 4000, 4004, 400C, 4012: // 4001, 4005, 4013: // 4002, 4006, 400A, 400E: - RegBit<0,8,u32> reg0; RegBit< 8,8,u32> reg1; RegBit<16,8,u32> reg2; - RegBit<6,2,u32> DutyCycle; RegBit< 8,3,u32> SweepShift; RegBit<16,4,u32> NoiseFreq; - RegBit<4,1,u32> EnvDecayDisable; RegBit<11,1,u32> SweepDecrease; RegBit<23,1,u32> NoiseType; - RegBit<0,4,u32> EnvDecayRate; RegBit<12,3,u32> SweepRate; RegBit<16,11,u32> WaveLength; - RegBit<5,1,u32> EnvDecayLoopEnable; RegBit<15,1,u32> SweepEnable; // 4003, 4007, 400B, 400F, 4010: - RegBit<0,4,u32> FixedVolume; RegBit< 8,8,u32> PCMlength; RegBit<24,8,u32> reg3; - RegBit<5,1,u32> LengthCounterDisable; RegBit<27,5,u32> LengthCounterInit; - RegBit<0,7,u32> LinearCounterInit; RegBit<30,1,u32> LoopEnabled; - RegBit<7,1,u32> LinearCounterDisable; RegBit<31,1,u32> IRQenable; - } reg; - - // Function for updating the wave generators and taking the sample for each channel. - template<unsigned c> - int tick() - { - channel& ch = *this; - if(!ChannelsEnabled[c]) return c==4 ? 64 : 8; - int wl = (ch.reg.WaveLength+1) * (c >= 2 ? 1 : 2); - if(c == 3) wl = NoisePeriods[ ch.reg.NoiseFreq ]; - int volume = ch.length_counter ? ch.reg.EnvDecayDisable ? ch.reg.FixedVolume : ch.envelope : 0; - // Sample may change at wavelen intervals. - auto& S = ch.level; - if(!count(ch.wave_counter, wl)) return S; - switch(c) - { - default:// Square wave. With four different 8-step binary waveforms (32 bits of data total). - if(wl < 8) return S = 8; - return S = (0xF33C0C04u & (1u << (++ch.phase % 8 + ch.reg.DutyCycle * 8))) ? volume : 0; - - case 2: // Triangle wave - if(ch.length_counter && ch.linear_counter && wl >= 3) ++ch.phase; - return S = (ch.phase & 15) ^ ((ch.phase & 16) ? 15 : 0); - - case 3: // Noise: Linear feedback shift register - if(!ch.hold) ch.hold = 1; - ch.hold = (ch.hold >> 1) - | (((ch.hold ^ (ch.hold >> (ch.reg.NoiseType ? 6 : 1))) & 1) << 14); - return S = (ch.hold & 1) ? 0 : volume; - - case 4: // Delta modulation channel (DMC) - // hold = 8 bit value, phase = number of bits buffered - if(ch.phase == 0) // Nothing in sample buffer? - { - if(!ch.length_counter && ch.reg.LoopEnabled) // Loop? - { - ch.length_counter = ch.reg.PCMlength*16 + 1; - ch.address = (ch.reg.reg0 | 0x300) << 6; - } - if(ch.length_counter > 0) // Load next 8 bits if available - { - // Note: Re-entrant! But not recursive, because even - // the shortest wave length is greater than the read time. - // TODO: proper clock - if(ch.reg.WaveLength>20) - for(unsigned t=0; t<3; ++t) CPU::RB(u16(ch.address) | 0x8000); // timing - ch.hold = CPU::RB(u16(ch.address++) | 0x8000); // Fetch byte - ch.phase = 8; - --ch.length_counter; - } - else // Otherwise, disable channel or issue IRQ - ChannelsEnabled[4] = ch.reg.IRQenable && (CPU::intr = DMC_IRQ = true); - } - if(ch.phase != 0) // Update the signal if sample buffer nonempty - { - int v = ch.linear_counter; - if(ch.hold & (0x80 >> --ch.phase)) v += 2; else v -= 2; - if(v >= 0 && v <= 0x7F) ch.linear_counter = v; - } - return S = ch.linear_counter; - } - } - } channels[5] = { }; - - struct { short lo, hi; } hz240counter = { 0,0 }; - - void Write(u8 index, u8 value) - { - channel& ch = channels[(index/4) % 5]; - switch(index<0x10 ? index%4 : index) - { - case 0: if(ch.reg.LinearCounterDisable) ch.linear_counter=value&0x7F; ch.reg.reg0 = value; break; - case 1: ch.reg.reg1 = value; ch.sweep_delay = ch.reg.SweepRate; break; - case 2: ch.reg.reg2 = value; break; - case 3: - ch.reg.reg3 = value; - if(ChannelsEnabled[index/4]) - ch.length_counter = LengthCounters[ch.reg.LengthCounterInit]; - ch.linear_counter = ch.reg.LinearCounterInit; - ch.env_delay = ch.reg.EnvDecayRate; - ch.envelope = 15; - if(index < 8) ch.phase = 0; - break; - case 0x10: ch.reg.reg3 = value; ch.reg.WaveLength = DMCperiods[value&0x0F]; break; - case 0x12: ch.reg.reg0 = value; ch.address = (ch.reg.reg0 | 0x300) << 6; break; - case 0x13: ch.reg.reg1 = value; ch.length_counter = ch.reg.PCMlength*16 + 1; break; // sample length - case 0x11: ch.linear_counter = value & 0x7F; break; // dac value - case 0x15: - for(unsigned c=0; c<5; ++c) - ChannelsEnabled[c] = value & (1 << c); - for(unsigned c=0; c<5; ++c) - if(!ChannelsEnabled[c]) - channels[c].length_counter = 0; - else if(c == 4 && channels[c].length_counter == 0) - channels[c].length_counter = ch.reg.PCMlength*16 + 1; - break; - case 0x17: - IRQdisable = value & 0x40; - FiveCycleDivider = value & 0x80; - hz240counter = { 0,0 }; - if(IRQdisable) PeriodicIRQ = DMC_IRQ = false; - } - } - u8 Read() - { - u8 res = 0; - for(unsigned c=0; c<5; ++c) res |= (channels[c].length_counter ? 1 << c : 0); - if(PeriodicIRQ) res |= 0x40; PeriodicIRQ = false; - if(DMC_IRQ) res |= 0x80; DMC_IRQ = false; - CPU::intr = false; - return res; - } - - void tick() // Invoked at CPU's rate. - { - // Divide CPU clock by 7457.5 to get a 240 Hz, which controls certain events. - if((hz240counter.lo += 2) >= 14915) - { - hz240counter.lo -= 14915; - if(++hz240counter.hi >= 4+FiveCycleDivider) hz240counter.hi = 0; - - // 60 Hz interval: IRQ. IRQ is not invoked in five-cycle mode (48 Hz). - if(!IRQdisable && !FiveCycleDivider && hz240counter.hi==0) - CPU::intr = PeriodicIRQ = true; - - // Some events are invoked at 96 Hz or 120 Hz rate. Others, 192 Hz or 240 Hz. - bool HalfTick = (hz240counter.hi&5)==1, FullTick = hz240counter.hi < 4; - for(unsigned c=0; c<4; ++c) - { - channel& ch = channels[c]; - int wl = ch.reg.WaveLength; - - // Length tick (all channels except DMC, but different disable bit for triangle wave) - if(HalfTick && ch.length_counter - && !(c==2 ? ch.reg.LinearCounterDisable : ch.reg.LengthCounterDisable)) - ch.length_counter -= 1; - - // Sweep tick (square waves only) - if(HalfTick && c < 2 && count(ch.sweep_delay, ch.reg.SweepRate)) - if(wl >= 8 && ch.reg.SweepEnable && ch.reg.SweepShift) - { - int s = wl >> ch.reg.SweepShift, d[4] = {s, s, ~s, -s}; - wl += d[ch.reg.SweepDecrease*2 + c]; - if(wl < 0x800) ch.reg.WaveLength = wl; - } - - // Linear tick (triangle wave only) - if(FullTick && c == 2) - ch.linear_counter = ch.reg.LinearCounterDisable - ? ch.reg.LinearCounterInit - : (ch.linear_counter > 0 ? ch.linear_counter - 1 : 0); - - // Envelope tick (square and noise channels) - if(FullTick && c != 2 && count(ch.env_delay, ch.reg.EnvDecayRate)) - if(ch.envelope > 0 || ch.reg.EnvDecayLoopEnable) - ch.envelope = (ch.envelope-1) & 15; - } - } - - // Mix the audio: Get the momentary sample from each channel and mix them. - #define s(c) channels[c].tick<c==1 ? 0 : c>() - auto v = [](float m,float n, float d) { return n!=0.f ? m/n : d; }; - short sample = 30000 * - (v(95.88f, (100.f + v(8128.f, s(0) + s(1), -100.f)), 0.f) - + v(159.79f, (100.f + v(1.0, s(2)/8227.f + s(3)/12241.f + s(4)/22638.f, -100.f)), 0.f) - - 0.5f - ); - #undef s - // I cheat here: I did not bother to learn how to use SDL mixer, let alone use it in <5 lines of code, - // so I simply use a combination of external programs for outputting the audio. - // Hooray for Unix principles! A/V sync will be ensured in post-process. - return; // Disable sound because already device is in use - static FILE* fp = popen("resample mr1789800 r48000 | aplay -fdat 2>/dev/null", "w"); - fputc(sample, fp); - fputc(sample/256, fp); - } -} - -namespace CPU -{ - void tick() - { - // PPU clock: 3 times the CPU rate - for(unsigned n=0; n<3; ++n) PPU::tick(); - // APU clock: 1 times the CPU rate - for(unsigned n=0; n<1; ++n) APU::tick(); - } - - template<bool write> u8 MemAccess(u16 addr, u8 v) - { - // Memory writes are turned into reads while reset is being signalled - if(reset && write) return MemAccess<0>(addr); - - tick(); - // Map the memory from CPU's viewpoint. - /**/ if(addr < 0x2000) { u8& r = RAM[addr & 0x7FF]; if(!write)return r; r=v; } - else if(addr < 0x4000) return PPU::Access(addr&7, v, write); - else if(addr < 0x4018) - switch(addr & 0x1F) - { - case 0x14: // OAM DMA: Copy 256 bytes from RAM into PPU's sprite memory - if(write) for(unsigned b=0; b<256; ++b) WB(0x2004, RB((v&7)*0x0100+b)); - return 0; - case 0x15: if(!write) return APU::Read(); APU::Write(0x15,v); break; - case 0x16: if(!write) return IO::JoyRead(0); IO::JoyStrobe(v); break; - case 0x17: if(!write) return IO::JoyRead(1); // write:passthru - default: if(!write) break; - APU::Write(addr&0x1F, v); - } - else return GamePak::Access(addr, v, write); - return 0; - } - - // CPU registers: - u16 PC=0xC000; - u8 A=0,X=0,Y=0,S=0; - union /* Status flags: */ - { - u8 raw; - RegBit<0> C; // carry - RegBit<1> Z; // zero - RegBit<2> I; // interrupt enable/disable - RegBit<3> D; // decimal mode (unsupported on NES, but flag exists) - // 4,5 (0x10,0x20) don't exist - RegBit<6> V; // overflow - RegBit<7> N; // negative - } P; - - u16 wrap(u16 oldaddr, u16 newaddr) { return (oldaddr & 0xFF00) + u8(newaddr); } - void Misfire(u16 old, u16 addr) { u16 q = wrap(old, addr); if(q != addr) RB(q); } - u8 Pop() { return RB(0x100 | u8(++S)); } - void Push(u8 v) { WB(0x100 | u8(S--), v); } - - template<u16 op> // Execute a single CPU instruction, defined by opcode "op". - void Ins() // With template magic, the compiler will literally synthesize >256 different functions. - { - // Note: op 0x100 means "NMI", 0x101 means "Reset", 0x102 means "IRQ". They are implemented in terms of "BRK". - // User is responsible for ensuring that WB() will not store into memory while Reset is being processed. - unsigned addr=0, d=0, t=0xFF, c=0, sb=0, pbits = op<0x100 ? 0x30 : 0x20; - - // Define the opcode decoding matrix, which decides which micro-operations constitute - // any particular opcode. (Note: The PLA of 6502 works on a slightly different principle.) - enum { o8 = op/8, o8m = 1 << (op%8) }; - // Fetch op'th item from a bitstring encoded in a data-specific variant of base64, - // where each character transmits 8 bits of information rather than 6. - // This peculiar encoding was chosen to reduce the source code size. - // Enum temporaries are used in order to ensure compile-time evaluation. - #define t(s,code) { enum { \ - i=o8m & (s[o8]>90 ? (130+" (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[s[o8]-94]) \ - : (s[o8]-" (("[s[o8]/39])) }; if(i) { code; } } - - // Decode address operand - t(" !", addr = 0xFFFA) // NMI vector location - t(" *", addr = 0xFFFC) // Reset vector location - t("! ,", addr = 0xFFFE) // Interrupt vector location - t("zy}z{y}zzy}zzy}zzy}zzy}zzy}zzy}z ", addr = RB(PC++)) - t("2 yy2 yy2 yy2 yy2 XX2 XX2 yy2 yy ", d = X) // register index - t(" 62 62 62 62 om om 62 62 ", d = Y) - t("2 y 2 y 2 y 2 y 2 y 2 y 2 y 2 y ", addr=u8(addr+d); d=0; tick()) // add zeropage-index - t(" y z!y z y z y z y z y z y z y z ", addr=u8(addr); addr+=256*RB(PC++)) // absolute address - t("3 6 2 6 2 6 286 2 6 2 6 2 6 2 6 /", addr=RB(c=addr); addr+=256*RB(wrap(c,c+1)))// indirect w/ page wrap - t(" *Z *Z *Z *Z 6z *Z *Z ", Misfire(addr, addr+d)) // abs. load: extra misread when cross-page - t(" 4k 4k 4k 4k 6z 4k 4k ", RB(wrap(addr, addr+d)))// abs. store: always issue a misread - // Load source operand - t("aa__ff__ab__,4 ____ - ____ ", t &= A) // Many operations take A or X as operand. Some try in - t(" knnn 4 99 ", t &= X) // error to take both; the outcome is an AND operation. - t(" 9989 99 ", t &= Y) // sty,dey,iny,tya,cpy - t(" 4 ", t &= S) // tsx, las - t("!!!! !! !! !! ! !! !! !!/", t &= P.raw|pbits; c = t)// php, flag test/set/clear, interrupts - t("_^__dc___^__ ed__98 ", c = t; t = 0xFF) // save as second operand - t("vuwvzywvvuwvvuwv zy|zzywvzywv ", t &= RB(addr+d)) // memory operand - t(",2 ,2 ,2 ,2 -2 -2 -2 -2 ", t &= RB(PC++)) // immediate operand - // Operations that mogrify memory operands directly - t(" 88 ", P.V = t & 0x40; P.N = t & 0x80) // bit - t(" nink nnnk ", sb = P.C) // rol,rla, ror,rra,arr - t("nnnknnnk 0 ", P.C = t & 0x80) // rol,rla, asl,slo,[arr,anc] - t(" nnnknink ", P.C = t & 0x01) // lsr,sre, ror,rra,asr - t("ninknink ", t = (t << 1) | (sb * 0x01)) - t(" nnnknnnk ", t = (t >> 1) | (sb * 0x80)) - t(" ! kink ", t = u8(t - 1)) // dec,dex,dey,dcp - t(" ! khnk ", t = u8(t + 1)) // inc,inx,iny,isb - // Store modified value (memory) - t("kgnkkgnkkgnkkgnkzy|J kgnkkgnk ", WB(addr+d, t)) - t(" q ", WB(wrap(addr, addr+d), t &= ((addr+d) >> 8))) // [shx,shy,shs,sha?] - // Some operations used up one clock cycle that we did not account for yet - t("rpstljstqjstrjst - - - -kjstkjst/", tick()) // nop,flag ops,inc,dec,shifts,stack,transregister,interrupts - // Stack operations and unconditional jumps - t(" ! ! ! ", tick(); t = Pop()) // pla,plp,rti - t(" ! ! ", RB(PC++); PC = Pop(); PC |= (Pop() << 8)) // rti,rts - t(" ! ", RB(PC++)) // rts - t("! ! /", d=PC+(op?-1:1); Push(d>>8); Push(d)) // jsr, interrupts - t("! ! 8 8 /", PC = addr) // jmp, jsr, interrupts - t("!! ! /", Push(t)) // pha, php, interrupts - // Bitmasks - t("! !! !! !! !! ! !! !! !!/", t = 1) - t(" ! ! !! !! ", t <<= 1) - t("! ! ! !! !! ! ! !/", t <<= 2) - t(" ! ! ! ! ! ", t <<= 4) - t(" ! ! ! !____ ", t = u8(~t)) // sbc, isb, clear flag - t("`^__ ! ! !/", t = c | t) // ora, slo, set flag - t(" !!dc`_ !! ! ! !! !! ! ", t = c & t) // and, bit, rla, clear/test flag - t(" _^__ ", t = c ^ t) // eor, sre - // Conditional branches - t(" ! ! ! ! ", if(t) { tick(); Misfire(PC, addr = s8(addr) + PC); PC=addr; }) - t(" ! ! ! ! ", if(!t) { tick(); Misfire(PC, addr = s8(addr) + PC); PC=addr; }) - // Addition and subtraction - t(" _^__ ____ ", c = t; t += A + P.C; P.V = (c^t) & (A^t) & 0x80; P.C = t & 0x100) - t(" ed__98 ", t = c - t; P.C = ~t & 0x100) // cmp,cpx,cpy, dcp, sbx - // Store modified value (register) - t("aa__aa__aa__ab__ 4 !____ ____ ", A = t) - t(" nnnn 4 ! ", X = t) // ldx, dex, tax, inx, tsx,lax,las,sbx - t(" ! 9988 ! ", Y = t) // ldy, dey, tay, iny - t(" 4 0 ", S = t) // txs, las, shs - t("! ! ! !! ! ! ! ! !/", P.raw = t & ~0x30) // plp, rti, flag set/clear - // Generic status flag updates - t("wwwvwwwvwwwvwxwv 5 !}}||{}wv{{wv ", P.N = t & 0x80) - t("wwwv||wvwwwvwxwv 5 !}}||{}wv{{wv ", P.Z = u8(t) == 0) - t(" 0 ", P.V = (((t >> 5)+1)&2)) // [arr] - /* All implemented opcodes are cycle-accurate and memory-access-accurate. - * [] means that this particular separate rule exists only to provide the indicated unofficial opcode(s). - */ - } - - void Op() - { - /* Check the state of NMI flag */ - bool nmi_now = nmi; - - unsigned op = RB(PC++); - - if(reset) { op=0x101; } - else if(nmi_now && !nmi_edge_detected) { op=0x100; nmi_edge_detected = true; } - else if(intr && !P.I) { op=0x102; } - if(!nmi_now) nmi_edge_detected=false; - - // Define function pointers for each opcode (00..FF) and each interrupt (100,101,102) - #define c(n) Ins<0x##n>,Ins<0x##n+1>, - #define o(n) c(n)c(n+2)c(n+4)c(n+6) - static void(*const i[0x108])() = - { - o(00)o(08)o(10)o(18)o(20)o(28)o(30)o(38) - o(40)o(48)o(50)o(58)o(60)o(68)o(70)o(78) - o(80)o(88)o(90)o(98)o(A0)o(A8)o(B0)o(B8) - o(C0)o(C8)o(D0)o(D8)o(E0)o(E8)o(F0)o(F8) o(100) - }; - #undef o - #undef c - i[op](); - printf("o %.2X A %.2X X %.2X Y %.2X PC %.4X P %.2X S %.2X\n", op, A, X, Y, PC, P.raw, S); - - reset = false; - } -} - -int main(int/*argc*/, char** argv) -{ - // Open the ROM file specified on commandline - FILE* fp = fopen(argv[1], "rb"); - inputfn = argv[2]; - - // Read the ROM file header - assert(fgetc(fp)=='N' && fgetc(fp)=='E' && fgetc(fp)=='S' && fgetc(fp)=='\32'); - u8 rom16count = fgetc(fp); - u8 vrom8count = fgetc(fp); - u8 ctrlbyte = fgetc(fp); - u8 mappernum = fgetc(fp) | (ctrlbyte>>4); - fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp); - if(mappernum >= 0x40) mappernum &= 15; - GamePak::mappernum = mappernum; - - // Read the ROM data - if(rom16count) GamePak::ROM.resize(rom16count * 0x4000); - if(vrom8count) GamePak::VRAM.resize(vrom8count * 0x2000); - fread(&GamePak::ROM[0], rom16count, 0x4000, fp); - fread(&GamePak::VRAM[0], vrom8count, 0x2000, fp); - - fclose(fp); - printf("%u * 16kB ROM, %u * 8kB VROM, mapper %u, ctrlbyte %02X\n", rom16count, vrom8count, mappernum, ctrlbyte); - - // Start emulation - GamePak::Init(); - IO::Init(); - PPU::reg.value = 0; - - // Pre-initialize RAM the same way as FCEUX does, to improve TAS sync. - for(unsigned a=0; a<0x800; ++a) - CPU::RAM[a] = (a&4) ? 0xFF : 0x00; - - // Run the CPU until the program is killed. - for(;;) CPU::Op(); -} - - - - diff --git a/examples/nesemu1/nesemu1.cff b/examples/nesemu1/nesemu1.cff deleted file mode 100644 index bcfa3ca..0000000 --- a/examples/nesemu1/nesemu1.cff +++ /dev/null @@ -1,929 +0,0 @@ -import "../libc.hff"; -import "sdl.hff"; - -// A port of https://bisqwit.iki.fi/jutut/kuvat/programming_examples/nesemu1/nesemu1.cc -// Original program by Joel Yliluoma - -struct IO { - s *SDL_Surface, - win *SDL_Window, - screen *SDL_Surface, - curjoy [2]int, nextjoy [2]int, joypos [2]int, - - fn init(io *IO) void { - io.win = SDL_CreateWindow("nesemu1", 0x1FFF0000u, 0x1FFF0000u, 256, 240, 0); - if io.win == #null { - fprintf(stderr, "SDL: %s", SDL_GetError()); - exit(1); - } - io.screen = SDL_GetWindowSurface(io.win); - io.s = SDL_CreateRGBSurface(0, 256, 240, 32, 0,0,0,0); - if io.s == #null { - fprintf(stderr, "SDL: %s\n", SDL_GetError()); - exit(1); - } - } - - fn putpixel(io *IO, px uint, py uint, pixel uint, offset int) void { - static palette [64]u32 = { - 0x666666ff, 0x002a88ff, 0x1412a7ff, 0x3b00a4ff, 0x5c007eff, 0x6e0040ff, 0x6c0600ff, 0x561d00ff, - 0x333500ff, 0x0b4800ff, 0x005200ff, 0x004f08ff, 0x00404dff, 0x000000ff, 0x000000ff, 0x000000ff, - 0xadadadff, 0x155fd9ff, 0x4240ffff, 0x7527feff, 0xa01accff, 0xb71e7bff, 0xb53120ff, 0x994e00ff, - 0x6b6d00ff, 0x388700ff, 0x0c9300ff, 0x008f32ff, 0x007c8dff, 0x000000ff, 0x000000ff, 0x000000ff, - 0xfffeffff, 0x64b0ffff, 0x9290ffff, 0xc676ffff, 0xf36affff, 0xfe6eccff, 0xfe8170ff, 0xea9e22ff, - 0xbcbe00ff, 0x88d800ff, 0x5ce430ff, 0x45e082ff, 0x48cddeff, 0x4f4f4fff, 0x000000ff, 0x000000ff, - 0xfffeffff, 0xc0dfffff, 0xd3d2ffff, 0xe8c8ffff, 0xfbc2ffff, 0xfec4eaff, 0xfeccc5ff, 0xf7d8a5ff, - 0xe4e594ff, 0xcfef96ff, 0xbdf4abff, 0xb3f3ccff, 0xb5ebf2ff, 0xb8b8b8ff, 0x000000ff, 0x000000ff, - }; - // Store the RGB color into the frame buffer. - ((as(*u32)io.s.pixels))[(py * 256) + px] = palette[pixel%64]; - } - - fn flush_scanline(io *IO, py uint) void { - if py == 239 { - SDL_UpperBlit(io.s, #null, io.screen, #null); - SDL_UpdateWindowSurface(io.win); - let evt SDL_Event #?; - while SDL_PollEvent(&evt) { - if evt.t == :quit { exit(0); } - } - // SDL_Delay(100); - } - } - - fn joy_strobe(io *IO, v bool) void { - if v { - io.curjoy[0] = io.nextjoy[0]; io.joypos[0] = 0; - io.curjoy[1] = io.nextjoy[1]; io.joypos[1] = 0; - } - } - fn joy_read(io *IO, idx uint) u8 { - static const masks [8]const u8 = {0x20,0x10,0x40,0x80,0x04,0x08,0x02,0x01}; - return (io.curjoy[idx] & masks[io.joypos[idx]++ & 7]) != 0 ? 1 : 0; - } -} -static g_io IO = {}; - -def VROM_GRANULARITY = 0x400, - VROM_PAGES = 0x2000 / VROM_GRANULARITY, - ROM_GRANULARITY = 0x2000, - ROM_PAGES = 0x10000 / ROM_GRANULARITY; - -struct GamePak { - rom [#]u8, - vram [#]u8, - mapperno uint, - nram [0x1000]u8, - pram [0x2000]u8, - banks [ROM_PAGES]*u8, - vbanks [VROM_PAGES]*u8, - nta [4]*u8, - - fn setrom(this *GamePak, size uint, baseaddr uint, index uint) void { - let r = this.rom; - def granu = ROM_GRANULARITY; - for let v = r.#len + (index * size), p = baseaddr / granu; - p < (baseaddr + size) / granu and p < ROM_PAGES; - (do ++p; v += granu;) - { - this.banks[p] = &r[v % r.#len]; - } - } - - fn setvrom(this *GamePak, size uint, baseaddr uint, index uint) void { - let r = &this.vram; - def granu = VROM_GRANULARITY; - for let v = (*r).#len + (index * size), p = baseaddr / granu; - p < (baseaddr + size) / granu and p < VROM_PAGES; - (do ++p; v += granu;) - { - this.vbanks[p] = &r.[v % (*r).#len]; - } - } - - fn access(this *GamePak, addr u16, val u8, write bool) u8 { - switch { - case write and addr >= 0x8000 and this.mapperno == 7; // e.g. Rare games - this->setrom(0x8000, 0x8000, val & 7); - for let i = 0; i < 4; i++ { - this.nta[i] = &this.nram[0x400 * ((val >> 4) & 1)]; - } - case write and addr >= 0x8000 and this.mapperno == 2; // e.g. Rockman, Castlevania - this->setrom(0x4000, 0x8000, val); - - case write and addr >= 0x8000 and this.mapperno == 3; // e.g. Kage, Solomon's Key - val &= this->access(addr, 0, #f); // Bus conflict - this->setvrom(0x2000, 0, val & 3); - - case write and addr >= 0x8000 and this.mapperno == 1; // e.g. Rockman 2, Simon's Quest - static regs [4]u8 = {0xC, 0, 0, 0}, - counter = 0, - cache = 0; - let configure = #f; - if val & 0x80 != 0 { - regs[0] = 0xC; - configure = #t; - } else { - cache |= (val & 1) << counter; - } - if configure or ++counter == 5 { - if !configure { regs[(addr >> 13) & 3] = (val = cache); } - cache = (counter = 0); - static sel [4][4]u8 = { {0,0,0,0}, {1,1,1,1}, {0,1,0,1}, {0,0,1,1} }; - for let m = 0; m < 4; ++m { - this.nta[m] = &this.nram[0x400 * sel[regs[0] & 3][m]]; - } - this->setrom(0x1000, 0x0000, ((regs[0]&16 != 0) ? regs[1] : ((regs[1]&~1)+0))); - this->setrom(0x1000, 0x1000, ((regs[0]&16 != 0) ? regs[2] : ((regs[1]&~1)+1))); - switch (regs[0] >> 2) & 3 { - case 0, 1; - this->setrom(0x8000, 0x8000, (regs[3] & 0xE) / 2); - case 2; - this->setrom(0x4000, 0x8000, 0); - this->setrom(0x4000, 0xC000, regs[3] & 0xF); - case 3; - this->setrom(0x4000, 0x8000, regs[3] & 0xF); - this->setrom(0x4000, 0xC000, ~0); - } - } - } - if addr >> 13 == 3 { - return this.pram[addr & 0x1FFF]; - } - // printf("read addr %.4X\n",addr); - return this.banks[(addr / ROM_GRANULARITY) % ROM_PAGES][addr % ROM_GRANULARITY]; - } - fn init(this *GamePak) void { - this.nta = { &this.nram[0], &this.nram[0x400], &this.nram[0], &this.nram[0x400] }; - this->setvrom(0x2000, 0x0000, 0); - for let v = 0; v < 4; ++v { this->setrom(0x4000, v * 0x4000, v == 3 ? ~0 : 0); } - } -} - -static g_pak GamePak = {}; - -struct CPU; -fn cpu_access(*CPU, addr u16, val u8, write bool) u8; -fn cpu_tick() void; -struct CPU { //CPU: Ricoh RP2A03 (based on MOS6502, almost the same as in Commodore 64) - ram [0x800]u8, - reset bool, - nmi bool, - nmi_edge bool, - intr bool, - pc u16, a u8, x u8, y u8, s u8, // registers - p bitfield : u8 { // status flags - c (0, 1) bool, ic (0, 1), // carry - z (1, 1) bool, iz (1, 1), // zero - i (2, 1) bool, ii (2, 1), // interrupt - d (3, 1) bool, id (3, 1), // decimal - v (6, 1) bool, iv (6, 1), // overflow - n (7, 1) bool, in (7, 1), // negative - }, - - fn init(cpu *CPU) void { - cpu.reset = #t; - cpu.p.i = #t; - } - fn RB(cpu *CPU, addr u16) u8 { return cpu_access(cpu, addr, 0, #f); } - fn WB(cpu *CPU, addr u16, val u8) u8 { return cpu_access(cpu, addr, val, #t); } -} - -static g_cpu CPU = {}; - -struct PPUSpr { - sprindex u8, y u8, index u8, attr u8, x u8, - pattern u16 -} -bitfield PPUScrolltype : u32 { - raw (3,16), // raw VRAM address (16-bit) - xscroll (0, 8), // low 8 bits of first write to 2005 - xfine (0, 3), // low 3 bits of first write to 2005 - xcoarse (3, 5), // high 5 bits of first write to 2005 - ycoarse (8, 5), // high 5 bits of second write to 2005 - basenta (13,2), // nametable index (copied from 2000) - basenta_h (13,1), // horizontal nametable index - basenta_v (14,1), // vertical nametable index - yfine (15,3), // low 3 bits of second write to 2005 - vaddrhi (11,8), // first write to 2006 (with high 2 bits set to zero) - vaddrlo (3, 8) // second write to 2006 -} -struct PPU { - reg bitfield : u32 { - // reg 0 (w) // reg 1 (w) // reg 2 (r) - sysctrl (0, 8), dispctrl (8, 8), status (16, 8), - basenta (0, 2), grayscale (8, 1) bool, spoverflow (21, 1) bool, - inc (2, 1), showbg8 (9, 1) bool, sp0hit (22, 1) bool, - spaddr (3, 1), showsp8 (10,1) bool, invblank (23, 1) bool, - bgaddr (4, 1), showbg (11,1) bool, // reg 3 (w) - spsize (5, 1) bool, showsp (12,1) bool, oamaddr (24, 8), - slaveflag (6, 1) bool, showbgsp (11,2), oamdata (24, 2), - nmienable (7, 1) bool, emprgb (13,3), oamindex (26, 2), - }, - palette [32]u8, - oam [256]u8, oam2 [8]PPUSpr, oam3 [8]PPUSpr, - scroll PPUScrolltype, vaddr PPUScrolltype, - pat_addr uint, sprinpos uint, sproutpos uint, sprrenpos uint, sprtmp uint, - tileattr u16, tilepat u16, ioaddr u16, - bg_shift_pat u32, bg_shift_attr u32, - scanline int, x int, scanline_end int, vblankstate int, ncycles int, - readbuffer int, openbus int, openbus_decaytimer int, - parity bool, offset_toggle bool, - - fn init(ppu *PPU) void { - ppu.scanline = 241; - ppu.scanline_end = 341; - } - - // Memory mapping: Convert PPU memory address into a reference to relevant data - fn mmap(ppu *PPU, i int) *u8 { - i &= 0x3FFF; - if i >= 0x3F00 { - if i % 4 == 0 { i &= 0x0F; } - return &ppu.palette[i & 0x1F]; - } - if i < 0x2000 { - return &g_pak.vbanks[(i / VROM_GRANULARITY) % VROM_PAGES][i % VROM_GRANULARITY]; - } - return &g_pak.nta[(i >> 10) & 3][i & 0x3FF]; - } - - // external I/O: read or write - fn access(ppu *PPU, index uint, v u8, write bool) u8 { - defmacro refreshopenbus(v) [(do ppu.openbus_decaytimer = 77777; ppu.openbus = v; )] - let res u8 = ppu.openbus; - if write { refreshopenbus(v); } - switch index { - case 0; if write { ppu.reg.sysctrl = v; ppu.scroll.basenta = ppu.reg.basenta; } - case 1; if write { ppu.reg.dispctrl = v; } - case 2; - if !write { - res = ppu.reg.status | (ppu.openbus & 0x1F); - ppu.reg.invblank = #f; // Reading $2002 clears the vblank flag. - ppu.offset_toggle = #f; // Also resets the toggle for address updates. - if ppu.vblankstate != -5 { - ppu.vblankstate = 0; // This also may cancel the setting of InVBlank. - } - } - case 3; if write { ppu.reg.oamaddr = v; } // Index into OAM - case 4; if write { ppu.oam[ppu.reg.oamaddr++] = v; } // Write/read the OAM - else { res = refreshopenbus(ppu.oam[ppu.reg.oamaddr] & (ppu.reg.oamdata == 2 ? 0xE3 : 0xFF)); } - case 5; // set background scrolling offset - if write { - if ppu.offset_toggle { ppu.scroll.yfine = v & 7; ppu.scroll.ycoarse = v >> 3; - } else { ppu.scroll.xscroll = v; } - ppu.offset_toggle = !ppu.offset_toggle; - } - case 6; // set video memory position for access - if write { - if ppu.offset_toggle { ppu.scroll.vaddrlo = v; ppu.vaddr = ppu.scroll; - } else { ppu.scroll.vaddrhi = v & 0x3F; } - ppu.offset_toggle = !ppu.offset_toggle; - } - case 7; - res = ppu.readbuffer; - let t = mmap(ppu, ppu.vaddr.#raw); // access video memory - if write { res = (*t = v); } - else { - if ppu.vaddr.#raw & 0x3F00 == 0x3F00 { // palette? - res = (ppu.readbuffer = (ppu.openbus & 0xC0) | (*t & 0x3F)); - } - ppu.readbuffer = *t; - } - refreshopenbus(res); - ppu.vaddr.#raw += (ppu.reg.inc!=0 ? 32 : 1); // update address - } - return res; - } - - fn rendering_tick(ppu *PPU) void { - let tile_decode_mode = as(bool)(0x10FFFF & (1u << (ppu.x / 16))); // when x is 0..255, 320..335 - - // Each action happens in two steps: 1) select memory address; 2) receive data and react on it. - switch ppu.x % 8 { - case 0, 2; // point to nametable / attribute table - if ppu.x % 8 == 2 { - ppu.ioaddr = 0x23C0 + (0x400*ppu.vaddr.basenta) + (8*(ppu.vaddr.ycoarse/4)) + (ppu.vaddr.xcoarse/4); - } - if ppu.x % 8 == 0 or !tile_decode_mode { - ppu.ioaddr = 0x2000 + (ppu.vaddr.#raw & 0xFFF); - // reset sprite data - if ppu.x == 0 { - ppu.sprinpos = (ppu.sproutpos = 0); - if ppu.reg.showsp { ppu.reg.oamaddr = 0; } - } - if ppu.reg.showbg { - // reset scrolling (vertical once, horizontal each scanline) - if ppu.x == 304 and ppu.scanline == -1 { ppu.vaddr = ppu.scroll; } - if ppu.x == 256 { - ppu.vaddr.xcoarse = ppu.scroll.xcoarse; - ppu.vaddr.basenta_h = ppu.scroll.basenta_h; - ppu.sprrenpos = 0; - } - } - } - case 1; - if ppu.x == 337 and ppu.scanline == -1 and ppu.parity and ppu.reg.showbg { - ppu.scanline_end = 340; - } - // name table access - ppu.pat_addr = (0x1000*ppu.reg.bgaddr) + (16 * *mmap(ppu, ppu.ioaddr)) + ppu.vaddr.yfine; - if tile_decode_mode { - // push current tile into shift regs - // the bitmap pattern is 16 bits, while the attribute is 2 bits, repeated 8 times - ppu.bg_shift_pat = (ppu.bg_shift_pat >> 16) + (0x00010000 * ppu.tilepat); - ppu.bg_shift_attr = (ppu.bg_shift_attr >> 16) + (0x55550000 * ppu.tileattr); - } - case 3; - // attribute table access - if tile_decode_mode { - ppu.tileattr = (*mmap(ppu, ppu.ioaddr) >> ((ppu.vaddr.xcoarse&2) + (2*(ppu.vaddr.ycoarse&2)))) & 3; - // go to the next tile horizontally (and switch nametable if it wraps) - if ++ppu.vaddr.xcoarse == 0 { ppu.vaddr.basenta_h = 1 - ppu.vaddr.basenta_h; } - // at the edge of the screen do the same but vertically - if ppu.x == 251 and ++ppu.vaddr.yfine == 0 and ++ppu.vaddr.ycoarse == 30 { - ppu.vaddr.ycoarse = 0; ppu.vaddr.basenta_v = 1 - ppu.vaddr.basenta_v; - } - } else if ppu.sprrenpos < ppu.sproutpos { - // select sprite pattern instead of background pattern - let o = &ppu.oam3[ppu.sprrenpos]; // sprite to render on next scanline - memcpy(o, &ppu.oam2[ppu.sprrenpos], sizeof(o)); - let y uint = ppu.scanline - o.y; - if o.attr & 0x80 != 0 { y ^= ppu.reg.spsize ? 15 : 7; } - ppu.pat_addr = 0x1000 * (ppu.reg.spsize ? (o.index & 0x01) : ppu.reg.spaddr); - ppu.pat_addr += 0x10 * (ppu.reg.spsize ? (o.index & 0xFE) : (o.index & 0xFF)); - ppu.pat_addr += (y&7) + ((y&8)*2); - } - case 5; // pattern table bytes - ppu.tilepat = *mmap(ppu, ppu.pat_addr); - case 7; // interleave bits of the two pattern bytes - let p uint = ppu.tilepat | (*mmap(ppu, ppu.pat_addr|8) << 8); - p = (p&0xF00F) | ((p&0x0F00)>>4) | ((p&0x00F0)<<4); - p = (p&0xC3C3) | ((p&0x3030)>>2) | ((p&0x0C0C)<<2); - p = (p&0x9999) | ((p&0x4444)>>1) | ((p&0x2222)<<1); - ppu.tilepat = p; - // When decoding sprites, save the sprite graphics and move to next sprite - if !tile_decode_mode and ppu.sprrenpos < ppu.sproutpos { - ppu.oam3[ppu.sprrenpos++].pattern = ppu.tilepat; - } - } - // find which sprites are visible on next scanline (TODO: implement crazy 9-sprite malfunction) - switch ppu.x >= 64 and ppu.x < 256 and ppu.x%2 == 0 ? (ppu.reg.oamaddr++ & 3) : 4 { - case else - // access oam - ppu.sprtmp = ppu.oam[ppu.reg.oamaddr]; - case 0; - if ppu.sprinpos >= 64 { ppu.reg.oamaddr = 0; } - else { - ++ppu.sprinpos; // next sprite - if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].y = ppu.sprtmp; } - if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].sprindex = ppu.reg.oamindex; } - let y1 = ppu.sprtmp, y2 int = ppu.sprtmp + (ppu.reg.spsize ? 16 : 8); - if !(ppu.scanline >= y2 and ppu.scanline < y2) { - ppu.reg.oamaddr = ppu.sprinpos != 2 ? ppu.reg.oamaddr + 3 : 8; - } - } - case 1; - if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].index = ppu.sprtmp; } - case 2; - if ppu.sproutpos < 8 { ppu.oam2[ppu.sproutpos].attr = ppu.sprtmp; } - case 3; - if ppu.sproutpos < 8 { - ppu.oam2[ppu.sproutpos].x = ppu.sprtmp; - ++ppu.sproutpos; - } else { ppu.reg.spoverflow = #t; } - if ppu.sprinpos == 2 { ppu.reg.oamaddr = 8; } - } - } - - fn render_pixel(ppu *PPU) void { - let edge = as(u8)(ppu.x + 8) < 16; // 0..7, 248..255 - let showbg = ppu.reg.showbg and (!edge or ppu.reg.showbg8); - let showsp = ppu.reg.showsp and (!edge or ppu.reg.showsp8); - - //render the background - let fx = ppu.scroll.xfine, xpos = 15u - (((ppu.x&7) + fx + (ppu.x&7!=0 ? 8 : 0)) & 15); - let pixel = 0u, attr = 0u; - if showbg { // pick a pixel from shift registers - pixel = (ppu.bg_shift_pat >> (xpos*2)) & 3; - attr = (ppu.bg_shift_attr >> (xpos*2)) & (pixel != 0 ? 3 : 0); - } else if ppu.vaddr.raw & 0x3F00 == 0x3F00 and ppu.reg.showbgsp == 0 { - pixel = ppu.vaddr.raw; - } - - // overlay the sprites - if showsp { - for let sno = 0u; sno < ppu.sprrenpos; ++sno { - let s = &ppu.oam3[sno]; - //check if sprite is horizontall in range - let xdiff uint = ppu.x - s.x; - if xdiff >= 8 { continue; } - // determine which pixel to display; skip transparent ones - if s.attr & 0x40 == 0 { xdiff = 7 - xdiff; } - let spritepixel u8 = (s.pattern >> (xdiff*2)) & 3; - if spritepixel == 0 { continue; } - // check sprite-0 hit - if ppu.x < 255 and pixel != 0 and s.sprindex == 0 { ppu.reg.sp0hit = #t; } - // render pixel unless behind-background placement wanted - if s.attr & 0x20 == 0 or pixel == 0 { - attr = (s.attr & 3) + 4; - pixel = spritepixel; - } - // only process first non-transparent sprite pixel - break; - } - } - pixel = ppu.palette[((attr*4) + pixel) & 0x1F] & (ppu.reg.grayscale ? 0x30 : 0x3F); - g_io->putpixel(ppu.x, ppu.scanline, pixel | (ppu.reg.emprgb << 6), ppu.ncycles); - } - - // PPU:tick() -- This function is called 3 times per each CPU cycle. - // Each call iterates through one pixel of the screen. - // The screen is divided into 262 scanlines, each having 341 columns, as such: - // - // x=0 x=256 x=340 - // ___|____________________|__________| - // y=-1 | pre-render scanline| prepare | > - // ___|____________________| sprites _| > Graphics - // y=0 | visible area | for the | > processing - // | - this is rendered | next | > scanlines - // y=239 | on the screen. | scanline | > - // ___|____________________|______ - // y=240 | idle - // ___|_______________________________ - // y=241 | vertical blanking (idle) - // | 20 scanlines long - // y=260___|____________________|__________| - // - // On actual PPU, the scanline begins actually before x=0, with - // sync/colorburst/black/background color being rendered, and - // ends after x=256 with background/black being rendered first, - // but in this emulator we only care about the visible area. - // - // When background rendering is enabled, scanline -1 is - // 340 or 341 pixels long, alternating each frame. - // In all other situations the scanline is 341 pixels long. - // Thus, it takes 89341 or 89342 PPU::tick() calls to render 1 frame. - fn tick(ppu *PPU) void { - // set/clear vblank where needed - switch ppu.vblankstate { - case -5; ppu.reg.status = 0; - case 2; ppu.reg.invblank = #t; - case 0; g_cpu.nmi = ppu.reg.invblank and ppu.reg.nmienable; - } - if ppu.vblankstate != 0 { ppu.vblankstate += (ppu.vblankstate < 0 ? 1 : -1); } - if ppu.openbus_decaytimer > 0 { - if --ppu.openbus_decaytimer == 0 { ppu.openbus = 0; } - } - // graphics processing scanline? - if ppu.scanline < 240 { - // process graphics for this cycle - if ppu.reg.showbgsp != 0 { ppu->rendering_tick(); } - if ppu.scanline >= 0 and ppu.x < 256 { ppu->render_pixel(); } - } - // done with cycle. check for end of scanline - if ++ppu.ncycles == 3 { ppu.ncycles = 0; #{ for NTSC rendering } } - if ++ppu.x >= ppu.scanline_end { - // begin new scanline - g_io->flush_scanline(ppu.scanline); - ppu.scanline_end = 341; - ppu.x = 0; - switch ppu.scanline += 1 { - case 261; // begin rendering - ppu.scanline = -1; // pre render line - ppu.parity = !ppu.parity; - // clear vblank - ppu.vblankstate = -5; - case 241; // begin of vblank - static fp *FILE = {}; - if fp == #null { fp = fopen("input.fmv", "rb"); }; - if(fp) - { - static ctrlmask = 0u; - if(ftell(fp) == 0) - { - fseek(fp, 0x05, SEEK_SET); - ctrlmask = fgetc(fp); - fseek(fp, 0x90, SEEK_SET); // Famtasia Movie format. - } - if(ctrlmask & 0x80!=0) { g_io.nextjoy[0] = fgetc(fp); if feof(fp){g_io.nextjoy[0] = 0;} } - if(ctrlmask & 0x40!=0) { g_io.nextjoy[1] = fgetc(fp); if feof(fp){g_io.nextjoy[1] = 0;} } - } - // set vblank flag - ppu.vblankstate = 2; - } - } - } -} - -static g_ppu PPU = {}; - -struct APU { // Audio Processing Unit - fivecycledivider bool, irqdisable bool, channelsenabled [5]bool, - periodicirq bool, dmc_irq bool, - channels [5]struct { - length_counter int, linear_counter int, address int, envelope int, - sweep_delay int, env_delay int, wave_counter int, hold int, phase int, level int, - reg bitfield : u32 { // per channel register file - // 4000, 4004, 400C, 4012: // 4001, 4005, 4013: // 4002, 4006, 400A, 400E: - reg0 (0,8), reg1 (8, 8), reg2 (16, 8), - dutycycle (6,2), sweepshift (8, 3), noisefreq (16, 4), - envdecaydisable (4,1) bool, sweepdecrease (11,1), noisetype (23, 1) bool, - envdecayrate (0,4), sweeprate (12,3), wavelength (16,11), - envdecayloopenable (5,1) bool, sweepenable (15,1) bool, // 4003, 4007, 400b, 400f, 4010: - fixedvolume (0,4), pcmlength (8, 8), reg3 (24, 8), - lengthcounterdisable (5,1) bool, lengthcounterinit (27, 5), - linearcounterinit (0,7), loopenabled (30, 1) bool, - linearcounterdisable (7,1) bool, irqenable (31, 1) bool, - } - }, - hz240counter struct { lo i16, hi i16 }, - - fn count(v *int, reset int) bool { - if --*v < 0 { *v = reset; return #t; } - return #f; - } - - typedef Channel typeof((APU{}).channels[0]); - - fn tick_channel(apu *APU, ch *Channel, c uint) int { - if !apu.channelsenabled[c] { return c == 4 ? 64 : 8; } - let wl = (ch.reg.wavelength + 1) * (c >= 2 ? 1 : 2); - static const NoisePeriods [16]const u16 = { 2,4,8,16,32,48,64,80,101,127,190,254,381,508,1017,2034 }; - if c == 3 { wl = NoisePeriods[ch.reg.noisefreq]; } - let volume = ch.length_counter > 0 ? (ch.reg.envdecaydisable ? ch.reg.fixedvolume : ch.envelope) : 0; - let S = &ch.level; - if count(&ch.wave_counter, wl) { return *S; } - switch c { - case else // square wave. with 4 different 8-step binary waveforms (32 bits of data total) - if wl < 8 { return *S = 8; } - return *S = (0xF33C0C04u & (1u << ((++ch.phase % 8) + (ch.reg.dutycycle * 8)))) != 0 ? volume : 0; - - case 2; // triangle wave - if ch.length_counter > 0 and ch.linear_counter > 0 and wl >= 3 { ++ch.phase; } - return *S = (ch.phase & 15) ^ ((ch.phase & 16) != 0 ? 15 : 0); - - case 3; // noise: LSFR - if ch.hold == 0 { ch.hold = 1; } - ch.hold = (ch.hold >> 1) - | (((ch.hold ^ (ch.hold >> (ch.reg.noisetype ? 6 : 1))) & 1) << 14); - return *S = (ch.hold & 1) != 0 ? 0 : volume; - - case 4; // delta modulation channel (DMC) - // hold = 8 bit value, phase = number of bits buffered - if ch.phase == 0 { // nothing in sample buffer? - if ch.length_counter == 0 and ch.reg.loopenabled { // Loop? - ch.length_counter = (ch.reg.pcmlength * 16) + 1; - ch.address = (ch.reg.reg0 | 0x300) << 6; - } - if ch.length_counter > 0 { // load next 8 bits - // Note: Re-entrant! But not recursive, because even - // the shortest wave length is greater than the read time. - // TODO: proper clock - if ch.reg.wavelength > 20 { - for let t=0; t<3; ++t { g_cpu->RB(as(u16)(ch.address) | 0x8000); } // timing - } - ch.hold = g_cpu->RB(as(u16)(ch.address++) | 0x8000); // fetch byte - ch.phase = 8; - --ch.length_counter; - } else { // disable channeel or issue irq - apu.channelsenabled[4] = ch.reg.irqenable and (g_cpu.intr = (apu.dmc_irq = #t)); - } - } - if ch.phase != 0 { // update the signal if sample buffer non empty - let v = ch.linear_counter; - if ch.hold != 0 and (0x80 >> --ch.phase) != 0 { v += 2; } else { v -= 2; } - if v >= 0 and v <= 0x7F { ch.linear_counter = v; } - } - return *S = ch.linear_counter; - } - } - - fn init(apu *APU) void { - apu.irqdisable = #t; - } - - fn write(apu *APU, index u8, value u8) void { - let ch = &apu.channels[(index/4)%5]; - switch index < 0x10 ? index%4 : index { - case 0; if ch.reg.linearcounterdisable { ch.linear_counter = value&0x7F; ch.reg.reg0 = value; } - case 1; ch.reg.reg1 = value; ch.sweep_delay = ch.reg.sweeprate; - case 2; ch.reg.reg2 = value; - case 3; - ch.reg.reg3 = value; - if apu.channelsenabled[index/4] { - static const LengthCounters[32]const u8 = { 10,254,20, 2,40, 4,80, 6,160, 8,60,10,14,12,26,14, - 12, 16,24,18,48,20,96,22,192,24,72,26,16,28,32,30 }; - ch.length_counter = LengthCounters[ch.reg.lengthcounterinit]; - } - ch.linear_counter = ch.reg.linearcounterinit; - ch.env_delay = ch.reg.envdecayrate; - ch.envelope = 15; - if index < 8 { ch.phase = 0; } - case 0x10; - static const DMCperiods[16]const u16 = { 428,380,340,320,286,254,226,214,190,160,142,128,106,84,72,54 }; - ch.reg.reg3 = value; ch.reg.wavelength = DMCperiods[value&0xF]; - case 0x12; ch.reg.reg0 = value; ch.address = (ch.reg.reg0 | 0x300) << 6; - case 0x13; ch.reg.reg1 = value; ch.length_counter = (ch.reg.pcmlength*16) + 1; // sample length - case 0x11; ch.linear_counter = value & 0x7F; // dac value - case 0x15; - for let c = 0; c<5; ++c { - apu.channelsenabled[c] = value & (1 << c) != 0; - } - for let c = 0; c<5; ++c { - if !apu.channelsenabled[c] { - apu.channels[c].length_counter = 0; - } else if c == 4 and apu.channels[c].length_counter == 0 { - apu.channels[c].length_counter = (ch.reg.pcmlength*16) + 1; - } - } - case 0x17; - apu.irqdisable = value & 0x40 != 0; - apu.fivecycledivider = value & 0x80 != 0; - apu.hz240counter = { 0, 0 }; - if apu.irqdisable { apu.periodicirq = (apu.dmc_irq = #f); } - } - } - - fn read(apu *APU) u8 { - let res u8 = 0; - for let c=0; c<5; ++c { res |= (apu.channels[c].length_counter > 0 ? 1 << c : 0); } - if apu.periodicirq { res |= 0x40; apu.periodicirq = #f; } - if apu.dmc_irq { res |= 0x80; apu.dmc_irq = #f; } - g_cpu.intr = #f; - return res; - } - - fn tick(apu *APU) void { // invoked at cpu's rate - // Divide CPU clock by 7457.5 to get a 240 Hz, which controls certain events. - if (apu.hz240counter.lo += 2) >= 14915 { - apu.hz240counter.lo -= 14915; - if ++apu.hz240counter.hi >= 4+as(int)apu.fivecycledivider { apu.hz240counter.hi = 0; } - - // 60 Hz interval: IRQ. IRQ is not invoked in five-cycle mode (48 Hz). - if !apu.irqdisable and !apu.fivecycledivider and apu.hz240counter.hi == 0 { - g_cpu.intr = (apu.periodicirq = #t); - } - - // Some events are invoked at 96 Hz or 120 Hz rate. Others, 192 Hz or 240 Hz. - let halftick = (apu.hz240counter.hi & 5) == 1, fulltick = apu.hz240counter.hi < 4; - for let c = 0; c < 4; ++c { - let ch = &apu.channels[c]; - let wl = ch.reg.wavelength; - - // Length tick (all channels except DMC, but different disable bit for triangle wave) - if halftick and ch.length_counter > 0 - and !(c == 2 ? ch.reg.linearcounterdisable : ch.reg.lengthcounterdisable) { - ch.length_counter -= 1; - } - - // Sweep tick (square waves only) - if halftick and c < 2 and count(&ch.sweep_delay, ch.reg.sweeprate) { - if wl >= 9 and ch.reg.sweepenable and ch.reg.sweepshift != 0 { - let s = wl >> ch.reg.sweepshift, d [4]int = {s,s,~s,-s}; - wl += d[(ch.reg.sweepdecrease*2)+ c]; - if wl < 0x800 { ch.reg.wavelength = wl; } - } - } - - // Linear tick (triangle wave only) - if fulltick and c == 2 { - ch.linear_counter = - ch.reg.linearcounterdisable ? ch.reg.linearcounterinit - : (ch.linear_counter > 0 ? ch.linear_counter - 1 : 0); - } - - // envelope tick (square and noise channels) - if fulltick and c != 2 and count(&ch.env_delay, ch.reg.envdecayrate) { - if ch.envelope > 0 or ch.reg.envdecayloopenable { - ch.envelope = (ch.envelope - 1) & 15; - } - } - } - } - // mix the audio: get the momentary sample from each channel and mix them - defmacro s(c) [ (apu->tick_channel(&apu.channels[c], c == 1 ? 0 : c)) ] - fn v(m f32, n f32, d f32) f32 { return n != 0.f ? m/n : d; } - let sample i16 = 30000 * - (v(95.88f, (100.f + v(8128.f, s(0) + s(1), -100.f)), 0.f) - + v(159.79f, (100.f + v(1.0, s(2)/8227.f + s(3)/12241.f + s(4)/22638.f, -100.f)), 0.f) - + -0.5f); - // SDL_QueueAudio(g_io.auddev, &sample, 2); - static r *FILE = {}; - } -} -static g_apu APU = {}; - -fn cpu_tick() void { - for let n = 0; n < 3; ++n { g_ppu->tick(); } - for let n = 0; n < 1; ++n { g_apu->tick(); } -} -fn cpu_access(cpu *CPU, addr u16, v u8, write bool) u8 { - // memory writes are turned into reads while reset is being signalled - if cpu.reset and write { return cpu_access(cpu, addr, 0, #f); } - cpu_tick(); - //map the memory from cpu's viewpoint - switch { - case addr < 0x2000; let r = &cpu.ram[addr & 0x7FF]; - if !write { return *r; } - *r = v; - case addr < 0x4000; return g_ppu->access(addr & 7, v, write); - case addr < 0x4018; - switch addr & 0x1F { - case 0x14; // OAM DMA - if write { for let b = 0; b<256; ++b { cpu->WB(0x2004, cpu->RB(((v&7)*0x100)+b)); } } - case 0x15; if !write { return g_apu->read(); } - g_apu->write(0x15,v); - case 0x16; if !write { return g_io->joy_read(0); } - g_io->joy_strobe(v); - case 0x17; if !write { return g_io->joy_read(1); } - g_apu->write(addr & 0x1F, v); - case else if write { g_apu->write(addr&0x1F, v); } - } - case else - return g_pak->access(addr, v, write); - } - return 0; -} - -fn Cwrap(oldaddr u16, newaddr u16) u16 { return (oldaddr & 0xFF00) + as(u8)newaddr; } -fn Cmisfire(cpu *CPU, old u16, addr u16) void { let q = Cwrap(old, addr); if q != addr { cpu->RB(q); }} -fn Cpop(cpu *CPU) u8 { return cpu->RB(0x100 | as(u8)++cpu.s); } -fn Cpush(cpu *CPU, v u8) u8 { cpu->WB(0x100 | as(u8)cpu.s--, v); } -struct CIns<op u16> { // Execute a single CPU instruction, defined by opcode "op". - fn ins(cpu *CPU) void { // With template magic, the compiler will literally synthesize >256 different functions. - // Note: op 0x100 means "NMI", 0x101 means "Reset", 0x102 means "IRQ". They are implemented in terms of "BRK". - // User is responsible for ensuring that WB() will not store into memory while Reset is being processed. - let addr=0u, d=0u, t=0xFFu, c=0u, sb=0u, pbits = op<0x100 ? 0x30u : 0x20u; - - // Define the opcode decoding matrix, which decides which micro-operations constitute - // any particular opcode. (Note: The PLA of 6502 works on a slightly different principle.) - def const o8 int = op/8; - def const o8m int = 1 << (op%8); - // Fetch op'th item from a bitstring encoded in a data-specific variant of base64, - // where each character transmits 8 bits of information rather than 6. - // This peculiar encoding was chosen to reduce the source code size. - defmacro O(s,code) [ - { - def const i int = o8m & (s[o8]>90 ? (130+" (),-089<>?BCFGHJLSVWZ[^hlmnxy|}"[s[o8]-94]) - : (s[o8]-" (("[s[o8]/39])); - if i!=0 { code; } - } - ] - def X = cpu.x, A = cpu.a, Y = cpu.y, PC = cpu.pc, S = cpu.s, P = cpu.p; - defmacro RB(a) [ cpu->RB(a) ] - defmacro WB(a,x) [ cpu->WB(a,x) ] - defmacro Misfire(...args) [Cmisfire(cpu, args)] - defmacro Pop() [Cpop(cpu)] - defmacro Push(x) [Cpush(cpu,x)] - def wrap = Cwrap; - - // Decode address operand - O(" !", addr = 0xFFFA) // NMI vector location - O(" *", addr = 0xFFFC) // Reset vector location - O("! ,", addr = 0xFFFE) // Interrupt vector location - O("zy}z{y}zzy}zzy}zzy}zzy}zzy}zzy}z ", addr = RB(PC++)) - O("2 yy2 yy2 yy2 yy2 XX2 XX2 yy2 yy ", d = X) // register index - O(" 62 62 62 62 om om 62 62 ", d = Y) - O("2 y 2 y 2 y 2 y 2 y 2 y 2 y 2 y ", addr=as(u8)(addr+d); d=0; cpu_tick()) // add zeropage-index - O(" y z!y z y z y z y z y z y z y z ", addr=as(u8)(addr); addr+=256*RB(PC++)) // absolute address - O("3 6 2 6 2 6 286 2 6 2 6 2 6 2 6 /", addr=RB(c=addr); addr+=256*RB(wrap(c,c+1)))// indirect w/ page wrap - O(" *Z *Z *Z *Z 6z *Z *Z ", Misfire(addr, addr+d)) // abs. load: extra misread when cross-page - O(" 4k 4k 4k 4k 6z 4k 4k ", RB(wrap(addr, addr+d)))// abs. store: always issue a misread - // Load source operand - O("aa__ff__ab__,4 ____ - ____ ", t &= A) // Many operations take A or X as operand. Some try in - O(" knnn 4 99 ", t &= X) // error to take both; the outcome is an AND operation. - O(" 9989 99 ", t &= Y) // sty,dey,iny,tya,cpy - O(" 4 ", t &= S) // tsx, las - O("!!!! !! !! !! ! !! !! !!/", t &= P.#raw|pbits; c = t)// php, flag test/set/clear, interrupts - O("_^__dc___^__ ed__98 ", c = t; t = 0xFF) // save as second operand - O("vuwvzywvvuwvvuwv zy|zzywvzywv ", t &= RB(addr+d)) // memory operand - O(",2 ,2 ,2 ,2 -2 -2 -2 -2 ", t &= RB(PC++)) // immediate operand - // Operations that mogrify memory operands directly - O(" 88 ", P.v = 0!= t & 0x40; P.n = 0!= t & 0x80) // bit - O(" nink nnnk ", sb = P.ic) // rol,rla, ror,rra,arr - O("nnnknnnk 0 ", P.c = 0!= t & 0x80) // rol,rla, asl,slo,[arr,anc] - O(" nnnknink ", P.c = 0!= t & 0x01) // lsr,sre, ror,rra,asr - O("ninknink ", t = (t << 1) | (sb * 0x01)) - O(" nnnknnnk ", t = (t >> 1) | (sb * 0x80)) - O(" ! kink ", t = as(u8)(t - 1)) // dec,dex,dey,dcp - O(" ! khnk ", t = as(u8)(t + 1)) // inc,inx,iny,isb - // Store modified value (memory) - O("kgnkkgnkkgnkkgnkzy|J kgnkkgnk ", WB(addr+d, t)) - O(" q ", WB(wrap(addr, addr+d), t &= ((addr+d) >> 8))) // [shx,shy,shs,sha?] - // Some operations used up one clock cycle that we did not account for yet - O("rpstljstqjstrjst - - - -kjstkjst/", cpu_tick()) // nop,flag ops,inc,dec,shifts,stack,transregister,interrupts - // Stack operations and unconditional jumps - O(" ! ! ! ", cpu_tick(); t = Pop()) // pla,plp,rti - O(" ! ! ", RB(PC++); PC = Pop(); PC |= (Pop() << 8)) // rti,rts - O(" ! ", RB(PC++)) // rts - O("! ! /", d=PC+(op!=0?-1:1); Push(d>>8); Push(d)) // jsr, interrupts - O("! ! 8 8 /", PC = addr) // jmp, jsr, interrupts - O("!! ! /", Push(t)) // pha, php, interrupts - // Bitmasks - O("! !! !! !! !! ! !! !! !!/", t = 1) - O(" ! ! !! !! ", t <<= 1) - O("! ! ! !! !! ! ! !/", t <<= 2) - O(" ! ! ! ! ! ", t <<= 4) - O(" ! ! ! !____ ", t = as(u8)(~t)) // sbc, isb, clear flag - O("`^__ ! ! !/", t = c | t) // ora, slo, set flag - O(" !!dc`_ !! ! ! !! !! ! ", t = c & t) // and, bit, rla, clear/test flag - O(" _^__ ", t = c ^ t) // eor, sre - // Conditional branches - O(" ! ! ! ! ", if 0!=t { cpu_tick(); Misfire(PC, addr = as(i8)(addr) + PC); PC=addr; }) - O(" ! ! ! ! ", if 0==t { cpu_tick(); Misfire(PC, addr = as(i8)(addr) + PC); PC=addr; }) - // Addition and subtraction - O(" _^__ ____ ", c = t; t += A + P.ic; P.v = 0!= (c^t) & (A^t) & 0x80; P.c = 0!= t & 0x100) - O(" ed__98 ", t = c - t; P.c = 0!= ~t & 0x100) // cmp,cpx,cpy, dcp, sbx - // Store modified value (register) - O("aa__aa__aa__ab__ 4 !____ ____ ", A = t) - O(" nnnn 4 ! ", X = t) // ldx, dex, tax, inx, tsx,lax,las,sbx - O(" ! 9988 ! ", Y = t) // ldy, dey, tay, iny - O(" 4 0 ", S = t) // txs, las, shs - O("! ! ! !! ! ! ! ! !/", P.#raw = t & ~0x30) // plp, rti, flag set/clear - // Generic status flag updates - O("wwwvwwwvwwwvwxwv 5 !}}||{}wv{{wv ", P.n = 0!= t & 0x80) - O("wwwv||wvwwwvwxwv 5 !}}||{}wv{{wv ", P.z = as(u8)(t) == 0) - O(" 0 ", P.v = 0!= (((t >> 5)+1)&2)) // [arr] - // All implemented opcodes are cycle-accurate and memory-access-accurate. - // [] means that this particular separate rule exists only to provide the indicated unofficial opcode(s). - } -} - -fn cpu_op(cpu *CPU) void { - let nmi_now = cpu.nmi; // check nmi - let op int = cpu->RB(cpu.pc++); - switch { - case cpu.reset; op = 0x101; - case nmi_now and !cpu.nmi_edge; op = 0x100; cpu.nmi_edge = #t; - case cpu.intr and !cpu.p.i; op = 0x102; - } - if !nmi_now { cpu.nmi_edge = #f; } - defmacro I(n) [&CIns<n+0>:ins,&CIns<n+1>:ins,&CIns<n+2>:ins,&CIns<n+3>:ins, - &CIns<n+4>:ins,&CIns<n+5>:ins,&CIns<n+6>:ins,&CIns<n+7>:ins,] - static const i [0x108]const *fn(*CPU)void = { - I(0x00)I(0x08)I(0x10)I(0x18)I(0x20)I(0x28)I(0x30)I(0x38) - I(0x40)I(0x48)I(0x50)I(0x58)I(0x60)I(0x68)I(0x70)I(0x78) - I(0x80)I(0x88)I(0x90)I(0x98)I(0xA0)I(0xA8)I(0xB0)I(0xB8) - I(0xC0)I(0xC8)I(0xD0)I(0xD8)I(0xE0)I(0xE8)I(0xF0)I(0xF8) I(0x100) - }; - i[op](cpu); - cpu.reset = #f; -} - -extern fn main(argc int, argv **u8) int { - if SDL_Init() != 0 { - fprintf(stderr, "SDL: %s", SDL_GetError()); - return 1; - } - let fp *FILE #?; - if argc < 2 { - fprintf(stderr, "ROM path?\n"); - return 1; - } - fp = fopen(argv[1], "rb"); - if fp == #null { - fprintf(stderr, "error opening rom\n"); - return 1; - } - - // read rom file header - if !(fgetc(fp) == 'N' and fgetc(fp) == 'E' and fgetc(fp) == 'S' and fgetc(fp) == 0x1A) { - fprintf(stderr, "bad rom\n"); - return 1; - } - - - let rom16count u8 = fgetc(fp), - vrom8count u8 = fgetc(fp), - ctrlbyte u8 = fgetc(fp), - mappernum u8 = fgetc(fp) | (ctrlbyte>>4); - fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp);fgetc(fp); - if mappernum >= 0x40 { mappernum &= 15; } - - // Read the ROM data - if rom16count > 0 { g_pak.rom = (as(*u8)malloc(rom16count * 0x4000))[0::rom16count*0x4000]; } - if vrom8count > 0 { g_pak.vram =(as(*u8)malloc(vrom8count * 0x2000))[0::vrom8count*0x2000]; } - else { g_pak.vram = (as(*u8)malloc(0x2000))[0::0x2000]; } - fread(&g_pak.rom[0], rom16count, 0x4000, fp); - fread(&g_pak.vram[0], vrom8count, 0x2000, fp); - - fclose(fp); - printf("%u * 16kB ROM, %u * 8kB VROM, mapper %u, ctrlbyte %02X\n", rom16count, vrom8count, mappernum, ctrlbyte); - - g_io->init(); - g_pak->init(); - g_pak.mapperno = mappernum; - g_cpu->init(); - g_ppu->init(); - g_apu->init(); - - // Pre-initialize RAM the same way as FCEUX does, to improve TAS sync. - for let a=0; a<0x800; ++a { - g_cpu.ram[a] = (a&4)!=0 ? 0xFF : 0x00; - } - - for ;; { - cpu_op(&g_cpu); - } -} diff --git a/examples/nesemu1/sdl.hff b/examples/nesemu1/sdl.hff deleted file mode 100644 index d001842..0000000 --- a/examples/nesemu1/sdl.hff +++ /dev/null @@ -1,43 +0,0 @@ -extern fn SDL_Init() int; -extern fn SDL_Quit() void; -extern fn SDL_GetError() *const u8; -extern fn SDL_Delay(u32) void; - -struct SDL_Window; -extern fn SDL_CreateWindow(title *const u8, x int, y int, w int, h int, flags u32) *SDL_Window; -extern fn SDL_DestroyWindow(*SDL_Window) void; - -struct SDL_PixelFormat; -struct SDL_BlitMap; - -struct SDL_Rect { - x int, y int, w int, h int -} - -struct SDL_Surface { - _flags u32, - format *SDL_PixelFormat, - w int, h int, - pitch int, - pixels *void, - userdata *void, - _locked intbool, - _lock_data *void, - clip_rect SDL_Rect, - _map *SDL_BlitMap, - refcount int, -} -extern fn SDL_CreateRGBSurface( - flags u32, width int, height int, depth int, Rmask u32, Gmask u32, Bmask u32, Amask u32 -) *SDL_Surface; -extern fn SDL_GetWindowSurface(*SDL_Window) *SDL_Surface; -extern fn SDL_UpdateWindowSurface(*SDL_Window) int; -extern fn SDL_UpperBlit(src *SDL_Surface, srcrect *SDL_Rect, dst *SDL_Surface, dstrect *SDL_Rect) int; - -union SDL_Event { - t enum { quit = 0x100 }, - _pad [8]f64, // idc exactly how big this union is but this should suffice -} -extern fn SDL_PollEvent(evt *SDL_Event) intbool; - -extern fn SDL_QueueAudio(dev int, data *const void, len u32) int; |