Reply 40 of 58, by Darkcrafter07
- Rank
- Newbie
analog_programmer wrote on 2025-08-12, 07:28:Thanks for the clarification! If I understand correctly, when "ydetail" variable in "ENG386.C" is set to 3, the produced executable will be for even lower details.
I have a suggestion. For easier compiling and linking of different executable versions you can separate make-files and altered .C and .H files in separate folders like "D07_SRC", "D07_SRC_LQ2" and "D07_SRC_LQ3".
Not yet sir, let it be like that.
What I've been trying to do for more than 4 months is getting a non-fpu version of slopes drawing to work. There's a version that finally doesn't crash and does perspective correction, yet faster on a nonfpu SX processor, it still looks awful and coming with a lot of geometric distortions.
a part from A.asm:
BITSOFPRECISNLQ equ 3BITSOFPRECISNLQPOW equ 8ALIGN 16PUBLIC setupslopevlin2_setupslopevlin2_:mov dword ptr [slopmach3b+3], ebx ;ptrmov dword ptr [slopmach5b+2], ecx ;pincneg ecxmov dword ptr [slopmach6b+2], ecx ;-pincmov edx, 1mov cl, alshl edx, cldec edxmov cl, ahshl edx, clmov dword ptr [slopmach7b+2], edxneg ahmov byte ptr [slopmach2b+2], ahsub ah, almov byte ptr [slopmach1b+2], ah; FPU removal: Convert floating-point init to integer scaling; Original: fild _asm1 + fstp _asm2mov eax, [_asm1];shl eax, 16 ; not needed nowmov [_asm2], eaxretALIGN 16PUBLIC slopevlin2_slopevlin2_:mov _ebpbak, ebpmov _espbak, espsub ecx, espmov dword ptr [slopmach4b+3], ecxmov ebp, eax ; Remove FPU loadslopmach6b: lea ebp, [eax+88888888h]add ebx, dword ptr _asm2 ; Replace FPU add with integer opmov _asm1, ebxshl ebx, 3mov eax, _globalx3mov ecx, _globaly3imul eax, ebximul ecx, ebxadd esi, eaxadd edi, ecxmov ebx, edxjmp short bigslopeloopbALIGN 16bigslopeloopb:
mov dword ptr _fpuasm, ebx ; Replace FPU storemov eax, ebx ; Modified from original FPU pathadd eax, eaxsbb edx, edxmov ecx, eaxshr ecx, 24and eax, 00ffe000hshr eax, 11sub cl, 2mov eax, dword ptr _reciptable[eax]shr eax, clxor eax, edxmov edx, _asm1mov ecx, _globalx3mov _asm1, eaxsub eax, edxmov edx, _globaly3imul ecx, eaximul eax, edxadd ebx, dword ptr _asm2 ; Replace FPU add with integer opcmp ebx, BITSOFPRECISNLQPOWmov _asm4, ebxmov cl, bljl short slopeskipminbmov cl, BITSOFPRECISNLQPOWslopeskipminb:mov ebx, esimov edx, edibeginnerslopeloopb:slopmach1b: shr ebx, 20add esi, ecxslopmach2b: shr edx, 26slopmach7b: and ebx, 88888888hadd edi, eaxslopmach5b: add ebp, 88888888hslopmach3b: mov dl, byte ptr [ebx+edx+88888888h]slopmach4b: mov ebx, dword ptr [esp+88888888h]sub esp, 4dec clmov al, byte ptr [ebx+edx]mov ebx, esimov [ebp], almov edx, edijnz short beginnerslopeloopbmov ebx, _asm4sub ebx, BITSOFPRECISNLQPOWjg short bigslopeloopbmov esp, _espbakmov ebp, _ebpbakret
the parts of engine.c:
long reciptable[2048], fpuasm, fpuasmnonfpu;long reciptablenonfpu[2048], deltaztable[2048];#define FIX16_SHIFT 14 // Matches Build's 30-bit fixed-point#define FIX16_FACTOR 0x40000000 // 1<<30 in 32-bit (1073741824)#define RECIP_TABLE_OFFSET 2048#define F1_0 0x10000 // 16.16 fixed-point scalingloadtables(){long i, fil;float z, dz, nextz, deltaz;if (tablesloaded == 0){initksqrt();// Generate original FPU-based reciptable at all times (it's int too)for(i = 0; i < 2048; i++){reciptable[i] = divscale30(2048L, i+2048);}if ( (use_fpu) == 0){// Generate non-FPU reciptable if nofpu parameter was passedreciptablenonfpu[0] = F1_0 / RECIP_TABLE_OFFSET;for(i = 1; i < 2048; i++){//reciptablenonfpu[i] = FIX16_FACTOR / (i + RECIP_TABLE_OFFSET);reciptablenonfpu[i] = F1_0 / i;}z = 1.0f; // initial Z (matches original FPU setup)dz = 0.01f; // step size (adjust based on your needs)for (i = 0; i < 2048; i++){// match max line heightnextz = z + dz;// hyperbolic stepdeltaz = (1.0f/z) - (1.0f/nextz);// convert to fixed-pointdeltaztable[i] = (long)(deltaz * (1 << 16));z = nextz;}}// Load built-in tables (sintable, radar angles, fonts)if ((fil = kopen4load("tables.dat", 0)) != -1){// Reciptable is NOT loaded from file - generated abovekread(fil, sintable, 2048 * 2);kread(fil, radarang, 640 * 2);for(i = 0; i < 640; i++) radarang[1279 - i] = -radarang[i];kread(fil, textfont, 1024);kread(fil, smalltextfont, 1024);kread(fil, britable, 1024);kclose(fil);
}tablesloaded = 1;}}
I think the biggest challenge is this line: add ebx, dword ptr _asm2
because as soon as FPU addition is removed it breaks the "hyperbolical continuity" as AI said. There are two lines like this in the asm code and I even tried to simulate it with deltaztable yet without much success.
Maybe somebody knows it better.