Duke07 - another MS-DOS port of Duke3D

Reply 40 of 58, by Darkcrafter07

Posted on 2025-10-08, 15:21

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

analog_programmer wrote on 2025-08-12, 07:28:

Thanks for the clarification! If I understand correctly, when "ydetail" variable in "ENG386.C" is set to 3, the produced executable will be for even lower details.

I have a suggestion. For easier compiling and linking of different executable versions you can separate make-files and altered .C and .H files in separate folders like "D07_SRC", "D07_SRC_LQ2" and "D07_SRC_LQ3".

Not yet sir, let it be like that.

What I've been trying to do for more than 4 months is getting a non-fpu version of slopes drawing to work. There's a version that finally doesn't crash and does perspective correction, yet faster on a nonfpu SX processor, it still looks awful and coming with a lot of geometric distortions.

a part from A.asm:

1BITSOFPRECISNLQ equ 3
2BITSOFPRECISNLQPOW equ 8
3
4ALIGN 16
5PUBLIC setupslopevlin2_
6setupslopevlin2_:
7	mov dword ptr [slopmach3b+3], ebx    ;ptr
8	mov dword ptr [slopmach5b+2], ecx    ;pinc
9	neg ecx
10	mov dword ptr [slopmach6b+2], ecx    ;-pinc
11
12	mov edx, 1
13	mov cl, al
14	shl edx, cl
15	dec edx
16	mov cl, ah
17	shl edx, cl
18	mov dword ptr [slopmach7b+2], edx
19
20	neg ah
21	mov byte ptr [slopmach2b+2], ah
22
23	sub ah, al
24	mov byte ptr [slopmach1b+2], ah
25
26	; FPU removal: Convert floating-point init to integer scaling
27	; Original: fild _asm1 + fstp _asm2
28	mov eax, [_asm1]
29	;shl eax, 16                  ; not needed now
30	mov [_asm2], eax
31
32	ret
33
34ALIGN 16
35PUBLIC slopevlin2_
36slopevlin2_:
37	mov _ebpbak, ebp
38	mov _espbak, esp
39
40	sub ecx, esp
41	mov dword ptr [slopmach4b+3], ecx
42
43	mov ebp, eax                            ; Remove FPU load
44slopmach6b: lea ebp, [eax+88888888h]
45	add ebx, dword ptr _asm2                 ; Replace FPU add with integer op
46
47	mov _asm1, ebx
48	shl ebx, 3
49
50	mov eax, _globalx3
51	mov ecx, _globaly3
52	imul eax, ebx
53	imul ecx, ebx
54	add esi, eax
55	add edi, ecx
56
57	mov ebx, edx
58	jmp short bigslopeloopb
59ALIGN 16
60bigslopeloopb:

…Show last 58 lines

61	mov dword ptr _fpuasm, ebx              ; Replace FPU store
62
63	mov eax, ebx                            ; Modified from original FPU path
64	add eax, eax
65	sbb edx, edx
66	mov ecx, eax
67	shr ecx, 24
68	and eax, 00ffe000h
69	shr eax, 11
70	sub cl, 2
71	mov eax, dword ptr _reciptable[eax]
72	shr eax, cl
73	xor eax, edx
74	mov edx, _asm1
75	mov ecx, _globalx3
76	mov _asm1, eax
77	sub eax, edx
78	mov edx, _globaly3
79	imul ecx, eax
80	imul eax, edx
81
82	add ebx, dword ptr _asm2                ; Replace FPU add with integer op
83
84	cmp ebx, BITSOFPRECISNLQPOW
85	mov _asm4, ebx
86	mov cl, bl
87	jl short slopeskipminb
88	mov cl, BITSOFPRECISNLQPOW
89slopeskipminb:
90
91	mov ebx, esi
92	mov edx, edi
93
94beginnerslopeloopb:
95slopmach1b: shr ebx, 20
96	add esi, ecx
97slopmach2b: shr edx, 26
98slopmach7b: and ebx, 88888888h
99	add edi, eax
100slopmach5b: add ebp, 88888888h
101slopmach3b: mov dl, byte ptr [ebx+edx+88888888h]
102slopmach4b: mov ebx, dword ptr [esp+88888888h]
103	sub esp, 4
104	dec cl
105	mov al, byte ptr [ebx+edx]
106	mov ebx, esi
107	mov [ebp], al
108	mov edx, edi
109	jnz short beginnerslopeloopb
110
111	mov ebx, _asm4
112	sub ebx, BITSOFPRECISNLQPOW
113	jg short bigslopeloopb
114
115	mov esp, _espbak
116	mov ebp, _ebpbak
117	ret

the parts of engine.c:

1
2long reciptable[2048], fpuasm, fpuasmnonfpu;
3long reciptablenonfpu[2048], deltaztable[2048];
4
5#define FIX16_SHIFT 14          // Matches Build's 30-bit fixed-point
6#define FIX16_FACTOR 0x40000000 // 1<<30 in 32-bit (1073741824)
7#define RECIP_TABLE_OFFSET 2048
8
9#define F1_0	0x10000         // 16.16 fixed-point scaling
10
11loadtables()
12{
13    long i, fil;
14    float z, dz, nextz, deltaz;
15
16    if (tablesloaded == 0)
17    {
18        initksqrt();
19
20        // Generate original FPU-based reciptable at all times (it's int too)
21            for(i = 0; i < 2048; i++)
22            { 
23                reciptable[i] = divscale30(2048L, i+2048);
24            }
25        if ( (use_fpu) == 0)
26        {
27            // Generate non-FPU reciptable if nofpu parameter was passed
28            reciptablenonfpu[0] = F1_0 / RECIP_TABLE_OFFSET;
29            for(i = 1; i < 2048; i++)
30            {
31                //reciptablenonfpu[i] = FIX16_FACTOR / (i + RECIP_TABLE_OFFSET);
32                reciptablenonfpu[i] = F1_0 / i;
33            }
34
35            z = 1.0f;  // initial Z (matches original FPU setup)
36            dz = 0.01f; // step size (adjust based on your needs)
37            for (i = 0; i < 2048; i++)
38            {
39                // match max line height
40                nextz = z + dz;
41                // hyperbolic step
42                deltaz = (1.0f/z) - (1.0f/nextz);
43                // convert to fixed-point
44                deltaztable[i] = (long)(deltaz * (1 << 16));
45                z = nextz;
46            }
47        }
48
49
50        // Load built-in tables (sintable, radar angles, fonts)
51        if ((fil = kopen4load("tables.dat", 0)) != -1)
52        {
53            // Reciptable is NOT loaded from file - generated above
54            kread(fil, sintable, 2048 * 2);
55            kread(fil, radarang, 640 * 2);
56            for(i = 0; i < 640; i++) radarang[1279 - i] = -radarang[i];
57            kread(fil, textfont, 1024);
58            kread(fil, smalltextfont, 1024);
59            kread(fil, britable, 1024);
60            kclose(fil);

…Show last 6 lines

61        }
62
63        tablesloaded = 1;
64    }
65}

I think the biggest challenge is this line: add ebx, dword ptr _asm2
because as soon as FPU addition is removed it breaks the "hyperbolical continuity" as AI said. There are two lines like this in the asm code and I even tried to simulate it with deltaztable yet without much success.

Maybe somebody knows it better.

Reply 41 of 58, by Darkcrafter07

Posted on 2025-10-24, 17:43

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

So we're almost there on non-fpu slopes, I think it still looks meh but much closer to the result I'd desire to have. You can download the compiled exe to see how it's going. My further attempt would be to try decreasing "amplitude" of ebx _asm2 additions. It's slower than low detail degraded version but much faster than fully detailed fpu version on 486sx. Later we could combine both approaches and do fully integer slopes in 2x detail loss instead of 8x or even keep it as is. If it goes fine for sure.

Part of A.asm:

1EXTRN _surfx : dword
2EXTRN _surfy : dword
3EXTRN _slopedastat : byte
4
5BITSOFPRECISNLQ equ 3
6BITSOFPRECISNLQPOW equ 8
7
8ALIGN 16
9PUBLIC setupslopevlin2_
10setupslopevlin2_:
11	mov dword ptr [slopmach3b+3], ebx    ;ptr
12	mov dword ptr [slopmach5b+2], ecx    ;pinc
13	neg ecx
14	mov dword ptr [slopmach6b+2], ecx    ;-pinc
15
16	mov edx, 1
17	mov cl, al
18	shl edx, cl
19	dec edx
20	mov cl, ah
21	shl edx, cl
22	mov dword ptr [slopmach7b+2], edx
23
24	neg ah
25	mov byte ptr [slopmach2b+2], ah
26
27	sub ah, al
28	mov byte ptr [slopmach1b+2], ah
29
30	mov eax, [_asm1]
31	mov [_asm2], eax
32
33	ret
34
35ALIGN 16
36PUBLIC slopevlin2_
37slopevlin2_:
38	; Save critical registers
39	push ebp
40	push esi
41	push edi
42	push ebx
43
44	; Surface type check (ceiling(0) vs floor(1))
45	cmp byte ptr [_slopedastat], 0
46	jz slp2drawasusual                      ; Skip inversion for ceilings
47
48	; Only invert for floors (dastat=1)
49	neg dword ptr [_asm3]
50	neg dword ptr [_globalx3]
51	neg dword ptr [_globaly3]
52
53slp2drawasusual:
54	mov _ebpbak, ebp
55	mov _espbak, esp
56
57	sub ecx, esp
58	mov dword ptr [slopmach4b+3], ecx
59
60	mov ebp, eax                            ; Remove FPU load

…Show last 81 lines

61slopmach6b: lea ebp, [eax+88888888h]
62	add ebx, dword ptr _asm2                ; Replace FPU add with integer op
63
64	mov _asm1, ebx
65	shl ebx, 3
66
67	mov eax, [_globalx3]                    ; Proper dereference
68	mov ecx, [_globaly3]                    ; Proper dereference
69	imul eax, ebx
70	imul ecx, ebx
71	add esi, eax
72	add edi, ecx
73
74	mov ebx, edx
75	jmp short bigslopeloopb
76ALIGN 16
77bigslopeloopb:
78	mov dword ptr _fpuasm, ebx              ; Replace FPU store
79
80	mov eax, ebx                            ; Modified from original FPU path
81	add eax, eax
82	sbb edx, edx
83	mov ecx, eax
84	shr ecx, 24
85	and eax, 00ffe000h
86	shr eax, 11
87	sub cl, 2
88	mov eax, dword ptr _reciptable[eax]
89	shr eax, cl
90	xor eax, edx
91	mov edx, _asm1
92	mov ecx, _surfx
93	mov _asm1, eax
94	sub eax, edx
95	mov edx, _surfy
96	imul ecx, eax
97	imul eax, edx
98
99	add ebx, dword ptr _asm2                ; Replace FPU add with integer op
100	add ebx, 1
101
102
103	cmp ebx, BITSOFPRECISNLQPOW
104	mov _asm4, ebx
105	mov cl, bl
106	jl short slopeskipminb
107	mov cl, BITSOFPRECISNLQPOW
108slopeskipminb:
109
110	mov ebx, esi
111	mov edx, edi
112
113beginnerslopeloopb:
114slopmach1b: shr ebx, 20
115	add esi, ecx
116slopmach2b: shr edx, 26
117slopmach7b: and ebx, 88888888h
118	add edi, eax
119slopmach5b: add ebp, 88888888h
120slopmach3b: mov dl, byte ptr [ebx+edx+88888888h]
121slopmach4b: mov ebx, dword ptr [esp+88888888h]
122	sub esp, 4
123	dec cl
124	mov al, byte ptr [ebx+edx]
125	mov ebx, esi
126	mov [ebp], al
127	mov edx, edi
128	jnz short beginnerslopeloopb
129
130	mov ebx, _asm4
131	sub ebx, BITSOFPRECISNLQPOW
132	jg short bigslopeloopb
133
134	mov esp, _espbak
135	mov ebp, _ebpbak
136	pop ebx
137	pop edi
138	pop esi
139	pop ebp
140	ret

Part of engine.c:

1#define BITSOFPRECISNLQ 3
2long SCALEFACTOR = 15; // extra precision
3long surfx, surfy, surfstepx, surfstepy, surfz, surfstepz;
4long surfx_prev, surfy_prev;
5char slopedastat;
6grouscan_nonfpu (long dax1, long dax2, long sectnum, char dastat)
7{
8	long i, j, k, l, m, n, x, y, dx, dy, wx, wy, x1, y1, x2, y2, daz;
9	long daslope, dasqr;
10	long dashade, shoffs, shinc, m1, m2, *mptr1, *mptr2, *nptr1, *nptr2;
11	walltype *wal;
12	sectortype *sec;
13
14	sec = &sector[sectnum];
15
16	slopedastat = dastat; // pass to asm to draw ceilings-floors differently
17
18	if (dastat == 0)
19	{
20		if (globalposz <= getceilzofslope(sectnum,globalposx,globalposy))
21			return;  //Back-face culling
22		globalorientation = sec->ceilingstat;
23		globalpicnum = sec->ceilingpicnum;
24		globalshade = sec->ceilingshade;
25		globalpal = sec->ceilingpal;
26		daslope = sec->ceilingheinum;
27		daz = sec->ceilingz;
28	}
29	else
30	{
31		if (globalposz >= getflorzofslope(sectnum,globalposx,globalposy))
32			return;  //Back-face culling
33		globalorientation = sec->floorstat;
34		globalpicnum = sec->floorpicnum;
35		globalshade = sec->floorshade;
36		globalpal = sec->floorpal;
37		daslope = sec->floorheinum;
38		daz = sec->floorz;
39	}
40
41	if ((picanm[globalpicnum]&192) != 0) globalpicnum += animateoffs(globalpicnum,sectnum);
42	setgotpic(globalpicnum);
43	if ((tilesizx[globalpicnum] <= 0) || (tilesizy[globalpicnum] <= 0)) return;
44	if (waloff[globalpicnum] == 0) loadtile(globalpicnum);
45
46	wal = &wall[sec->wallptr];
47	wx = wall[wal->point2].x - wal->x;
48	wy = wall[wal->point2].y - wal->y;
49	dasqr = krecipasm(nsqrtasm(wx*wx+wy*wy));
50	i = mulscale21(daslope,dasqr);
51	wx *= i; wy *= i;
52
53	globalx = -mulscale19(singlobalang,xdimenrecip);
54	globaly = mulscale19(cosglobalang,xdimenrecip);
55	globalx1 = (globalposx<<8);
56	globaly1 = -(globalposy<<8);
57	i = (dax1-halfxdimen)*xdimenrecip;
58	globalx2 = mulscale16(cosglobalang<<4,viewingrangerecip) - mulscale27(singlobalang,i);
59	globaly2 = mulscale16(singlobalang<<4,viewingrangerecip) + mulscale27(cosglobalang,i);
60	globalzd = (xdimscale<<9);

…Show last 131 lines

61	globalzx = -dmulscale17(wx,globaly2,-wy,globalx2) + mulscale10(1-globalhoriz,globalzd);
62	globalz = -dmulscale25(wx,globaly,-wy,globalx);
63
64	if (globalorientation&64)  //Relative alignment
65	{
66		dx = mulscale14(wall[wal->point2].x-wal->x,dasqr);
67		dy = mulscale14(wall[wal->point2].y-wal->y,dasqr);
68
69		i = nsqrtasm(daslope*daslope+16777216);
70
71		x = globalx; y = globaly;
72		globalx = dmulscale16(x,dx,y,dy);
73		globaly = mulscale12(dmulscale16(-y,dx,x,dy),i);
74
75		x = ((wal->x-globalposx)<<8); y = ((wal->y-globalposy)<<8);
76		globalx1 = dmulscale16(-x,dx,-y,dy);
77		globaly1 = mulscale12(dmulscale16(-y,dx,x,dy),i);
78
79		x = globalx2; y = globaly2;
80		globalx2 = dmulscale16(x,dx,y,dy);
81		globaly2 = mulscale12(dmulscale16(-y,dx,x,dy),i);
82	}
83	if (globalorientation&0x4)
84	{
85		i = globalx; globalx = -globaly; globaly = -i;
86		i = globalx1; globalx1 = globaly1; globaly1 = i;
87		i = globalx2; globalx2 = -globaly2; globaly2 = -i;
88	}
89	if (globalorientation&0x10) { globalx1 = -globalx1, globalx2 = -globalx2, globalx = -globalx; }
90	if (globalorientation&0x20) { globaly1 = -globaly1, globaly2 = -globaly2, globaly = -globaly; }
91
92      //isn't "daz" a texture scale for the whole function here?
93	daz = dmulscale9(wx,globalposy-wal->y,-wy,globalposx-wal->x) + ((daz-globalposz)<<8);
94	globalx2 = mulscale20(globalx2,daz); globalx = mulscale28(globalx,daz);
95	globaly2 = mulscale20(globaly2,-daz); globaly = mulscale28(globaly,-daz);
96
97	i = 8-(picsiz[globalpicnum]&15); j = 8-(picsiz[globalpicnum]>>4);
98	if (globalorientation&8) { i++; j++; }
99	globalx1 <<= (i+12); globalx2 <<= i; globalx <<= i;
100	globaly1 <<= (j+12); globaly2 <<= j; globaly <<= j;
101
102	if (dastat == 0)
103	{
104		globalx1 += (((long)sec->ceilingxpanning)<<24);
105		globaly1 += (((long)sec->ceilingypanning)<<24);
106	}
107	else
108	{
109		globalx1 += (((long)sec->floorxpanning)<<24);
110		globaly1 += (((long)sec->floorypanning)<<24);
111	}
112
113	asm1 = -(globalzd>>(16-BITSOFPRECISNLQ));
114
115	globvis = globalvisibility;
116	if (sec->visibility != 0) globvis = mulscale4(globvis,(long)((unsigned char)(sec->visibility+16)));
117	globvis = mulscale13(globvis,daz);
118	globvis = mulscale16(globvis,xdimscale);
119	j = FP_OFF(palookup[globalpal]);
120
121	setupslopevlin2(((long)(picsiz[globalpicnum]&15))+(((long)(picsiz[globalpicnum]>>4))<<8),waloff[globalpicnum],-ylookup[1]);
122
123	l = (globalzd>>16);
124
125	shinc = mulscale16(globalz,xdimenscale);
126	if (shinc > 0) shoffs = (4<<15); else shoffs = ((2044-ydimen)<<15);
127	if (dastat == 0) y1 = umost[dax1]; else y1 = max(umost[dax1],dplc[dax1]);
128	m1 = mulscale16(y1,globalzd) + (globalzx>>6);
129		//Avoid visibility overflow by crossing horizon
130	if (globalzd > 0) m1 += (globalzd>>16); else m1 -= (globalzd>>16);
131	m2 = m1+l;
132	mptr1 = (long *)&slopalookup[y1+(shoffs>>15)]; mptr2 = mptr1+1;
133
134	for(x=dax1;x<=dax2;x++)
135	{
136		if (dastat == 0) { y1 = umost[x]; y2 = min(dmost[x],uplc[x])-1; }
137				else { y1 = max(umost[x],dplc[x]); y2 = dmost[x]-1; }
138
139		if (y1 <= y2)
140		{
141			nptr1 = (long *)&slopalookup[y1+(shoffs>>15)];
142			nptr2 = (long *)&slopalookup[y2+(shoffs>>15)];
143			while (nptr1 <= mptr1)
144			{
145				*mptr1-- = j + (getpalookup((long)mulscale24(krecipasm(m1),globvis),globalshade)<<8);
146				m1 -= l;
147			}
148			while (nptr2 >= mptr2)
149			{
150				*mptr2++ = j + (getpalookup((long)mulscale24(krecipasm(m2),globvis),globalshade)<<8);
151				m2 += l;
152			}
153
154			if ( (dastat) == 0)
155			{
156			    //ceilings
157			    globalx3 = (globalx2>>10);
158			    globaly3 = (globaly2>>10);
159			} else {
160			    //floors
161			    globalx3 = -(globalx2>>10);
162			    globaly3 = -(globaly2>>10);
163			}
164
165			    // Replace globalx3/globaly3 with scaled versions
166			    surfx = globalx3 / SCALEFACTOR;
167			    surfy = globaly3 / SCALEFACTOR;
168			    surfz = globalz / SCALEFACTOR;
169			    
170			    // Calculate stepping parameters
171			    surfstepx = globalx / SCALEFACTOR;
172			    surfstepy = globaly / SCALEFACTOR;
173			    surfstepz = globalz / SCALEFACTOR;
174
175			asm3 = mulscale16(y2,globalzd) + (globalzx>>6);
176			slopevlin2(ylookup[y2]+x+frameoffset,krecipasm(asm3>>3),(long)nptr2,y2-y1+1,globalx1,globaly1);
177
178			if ((x&15) == 0) faketimerhandler();
179		}
180		globalx2 += globalx + (globalx >> 16);
181		globaly2 += globaly + (globaly >> 16);
182		globalzx += globalz;
183
184		surfx += surfstepx;
185		surfy += surfstepy;
186		surfz += surfstepz;
187
188		shoffs += shinc;
189	}
190}

Last edited by Darkcrafter07 on 2025-10-24, 19:26. Edited 1 time in total.

Reply 42 of 58, by marxveix

Posted on 2025-10-24, 18:07

marxveix Offline

Rank Oldbie

Rank: Oldbie
Posts: 974
Joined: 2018-03-05, 21:46

Thank you! I try it later, but not today.

Best ATi Rage3 drivers for 3DCIF / Direct3D / OpenGL / DVD : ATi RagePro drivers and software
30+MiniGL / OpenGL Win 9x dll files for all ATi Rage3 cards : Re: ATi RagePro OpenGL files

Reply 43 of 58, by Darkcrafter07

Posted on 2025-10-24, 18:12

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

marxveix wrote on 2025-10-24, 18:07:

Thank you! I try it later, but not today.

You're welcome but don't hurry, this version is really buggy and may make your computer freeze, there are illegal memory writes are still going.
Upd... fixed

Reply 44 of 58, by marxveix

Posted on 2025-10-27, 10:45

marxveix Offline

Rank Oldbie

Rank: Oldbie
Posts: 974
Joined: 2018-03-05, 21:46

Darkcrafter07 wrote on 2025-10-24, 18:12:

marxveix wrote on 2025-10-24, 18:07:

Thank you! I try it later, but not today.

You're welcome but don't hurry, this version is really buggy and may make your computer freeze, there are illegal memory writes are still going.
Upd... fixed

Now its bugfixed, better to try with it?

Best ATi Rage3 drivers for 3DCIF / Direct3D / OpenGL / DVD : ATi RagePro drivers and software
30+MiniGL / OpenGL Win 9x dll files for all ATi Rage3 cards : Re: ATi RagePro OpenGL files

Reply 45 of 58, by Darkcrafter07

Posted on 2025-10-29, 14:19

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

marxveix wrote on 2025-10-24, 18:07:

Now its bugfixed, better to try with it?

Yes but it still looks wonky, I don't know for a way to fix it yet.

Reply 46 of 58, by Darkcrafter07

Posted on 2025-12-29, 06:49

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

v0.17 - distance rendering optimization. Make distant walls render less 8 times less often than close according to the game ticker. May speed-up rendering on big open maps but may introduce visual artifacts. Enable by typing /distslow command line parameter. Both Duke07.exe and D07LQ2x.exe are bundled with this. Currently acts since 32000 map units. Edit engine.c or eng386.c drawalls_distslow function to change distance.

Duke07_src_n_exe_v017

Reply 47 of 58, by marxveix

Posted on 2025-12-29, 07:06

marxveix Offline

Rank Oldbie

Rank: Oldbie
Posts: 974
Joined: 2018-03-05, 21:46

Darkcrafter07 wrote on 2025-12-29, 06:49:

v0.17 - distance rendering optimization. Make distant walls render less 8 times less often than close according to the game ticker. May speed-up rendering on big open maps but may introduce visual artifacts. Enable by typing /distslow command line parameter. Both Duke07.exe and D07LQ2x.exe are bundled with this. Currently acts since 32000 map units. Edit engine.c or eng386.c drawalls_distslow function to change distance.

Duke07_src_n_exe_v017

Thanks, downloaded it for later use.

Best ATi Rage3 drivers for 3DCIF / Direct3D / OpenGL / DVD : ATi RagePro drivers and software
30+MiniGL / OpenGL Win 9x dll files for all ATi Rage3 cards : Re: ATi RagePro OpenGL files

Reply 48 of 58, by Darkcrafter07

Posted on 2026-01-11, 04:12

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

marxveix wrote on 2025-12-29, 07:06:

Darkcrafter07 wrote on 2025-12-29, 06:49:

v0.17 - distance rendering optimization. Make distant walls render less 8 times less often than close according to the game ticker. May speed-up rendering on big open maps but may introduce visual artifacts. Enable by typing /distslow command line parameter. Both Duke07.exe and D07LQ2x.exe are bundled with this. Currently acts since 32000 map units. Edit engine.c or eng386.c drawalls_distslow function to change distance.

Duke07_src_n_exe_v017

Thanks, downloaded it for later use.

You're welocme, glad someone found it cool! Btw, there must be a better way to do that distance magic but that must be hard to implement.

So we're rendering sky first (the background), as we know that parallax skies setup is done not so flexible in the game so for a parallax map to show up all tiles it consists of, the entire enclosed area must have the same picnum.
So for the idea to work there must be an option to override all skies in the area or even the whole map.

That could also lead to bad picture in the distance as distant geometry can't keep up, so maybe additional parallax-sky-alike horizontal and vertical scrolling on the distance geometry buffer, so the roadmap will be like:

1) Render distant geometry in a separate buffer and apply parallax scrolling to it;
2) Use a 2nd buffer and render parallax skies first here;
3) Copy distant geometry buffer result (scrolling corrected) and draw on top of sky;
4) On the top of it all render only close geometry.

Reply 49 of 58, by Darkcrafter07

Posted on 2026-03-08, 12:01

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

So I could finally get a bit further regarding the non-fpu slopes. It still does use some FPU operations like krecipasm calls but tries to do it less.

Texture bending and drifting effect reduced due to onscreen slope segmenting aka tesselation. The fpu krecipasm routine (reciprocal of 1/z) is attempted to be skipped each scanline (SLOPE_FPU_FACTOR = 1).

Here's exe and src of the quite unstable version but working... The result looks sawwy and jaggy but pixelization is less (3x instead of 8x). I heard there are techniques like subpixel correction but it's a wonder it works at least like this.

Use "/nofpu" argument to activate.

Reply 50 of 58, by Darkcrafter07

Posted on 2026-03-10, 01:37

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

Hmm we're getting some progress on integer slopes again:
- slopes off the screen solved (faulty int add command in a.asm slopevlin2 in bigslopeloop);
- fully integer versions of krecipasm each with their own reciptables (separate for texturing and shading), that gives solid FPS boost (up to 50% ?);
- a lookup table optimized segments texture calculation (+15% FPS);

As usual, use "/nofpu" command to activate...

Reply 51 of 58, by Darkcrafter07

Posted on 2026-03-13, 11:19

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

Some work is going on the alternative implementation based on Ken's own C variant of setupslopevlin-slopevlin function but optimized.
Use "/nofpu" command as always.

Reply 52 of 58, by Darkcrafter07

Posted on 2026-03-14, 12:53

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

Ok, this C variant seems like the way to go for 486SX. Only 2x quality degradation instead of 8x with almost the same speed is a big milestone.
This time it doesn't just simulate Ken's original asm "slopevlin" way to optimize between each vertical 8 pixels (4 px in C version because 8 looks ugly) but also unrolls the loop exactly four times, delivering the best speedup. This one also has visual bugs fixed.

Use "/nofpu" command to activate...

Reply 53 of 58, by Darkcrafter07

Posted on 2026-03-16, 23:16

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

So here comes big update about integer slopes, this must be one of the best attempts I've ever had.
They look almost as good as the original fpu ones, have no pixelization (except for 386 versions) and pretty fast!

The way original "slopevlin" is doing the magic is called relative interpolation of DU/DV between true perspective calculation per e 8px.
This time we're bundling not just C version but also an SMC (self modifying code) asm routine .
It was born thanks to disassembling the resulting "slopevlin2relativeC" function and took performance even further.

So here comes Duke07 v0.18 sources and compiled binaries...
Another google drive download link

Some fast comparison on IMGUR: https://imgur.com/a/uzs3LqJ

Reply 54 of 58, by Darkcrafter07

Posted on 2026-03-18, 15:30

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

Some update over integer slopes:
- even more optimized and stabilized rendering;
- Duke07.exe compiled entirely in 486 optimization mode resulting in a quite noticeable performance improvement.

At this moment, the speed difference between original FPU slopes and fully integer mode, as the overall combined ingame image is about 15-20% in PCem 486sx2-66MHz in the most hard scenes.

So here come Duke07 v0.19 sources and compiled binaries...
Another google drive download link

Reply 55 of 58, by marxveix

Posted on 2026-03-27, 12:03

marxveix Offline

Rank Oldbie

Rank: Oldbie
Posts: 974
Joined: 2018-03-05, 21:46

Darkcrafter07 wrote on 2026-03-18, 15:30:
Some update over integer slopes: - even more optimized and stabilized rendering; - Duke07.exe compiled entirely in 486 optimizat […]
Show full quote

Some update over integer slopes:
- even more optimized and stabilized rendering;
- Duke07.exe compiled entirely in 486 optimization mode resulting in a quite noticeable performance improvement.

At this moment, the speed difference between original FPU slopes and fully integer mode, as the overall combined ingame image is about 15-20% in PCem 486sx2-66MHz in the most hard scenes.

So here come Duke07 v0.19 sources and compiled binaries...
Another google drive download link

Thanks, downloaded v0.19 and i will use it if try duke3d again.

Best ATi Rage3 drivers for 3DCIF / Direct3D / OpenGL / DVD : ATi RagePro drivers and software
30+MiniGL / OpenGL Win 9x dll files for all ATi Rage3 cards : Re: ATi RagePro OpenGL files

Reply 56 of 58, by Darkcrafter07

Posted on 2026-04-11, 16:36

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

You're welcome guys, btw, I just made a github repository for Duke07!
https://github.com/Darkcrafter07/Duke07

Reply 57 of 58, by Darkcrafter07

Posted on 2026-04-22, 16:39

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

So after long attempts to implement portals the first crappy but working atttempt is finaly here!
I really thought it would be a nice thing to have to fake 3D window or door passages in such geometry, that doesn't extend to full floor or ceiling height.
It's done via specially defined PORTAL#x, CAMERA1 and PRTLTELEPDEST sprites and configured just like security cameras (read instructions in readme).
The basic idea is to duplicate the game logic that VIEWSCREEN (security monitor) uses, modify in such a way that it translates content on flat sprites but also modified to show texture in the same parallax fashion skies are shown, so that there's no flat texture feel to it. The first version also contains logic hardwired that rotates linked camera along player lookout and also attempts to move it fro and about. Well, you better look at the difference on github by comparing commits.

For sure, they're are SO far from being perfect but I decided it would be a nice idea to upload it as the starting point and the whole thing is not going missing. It took a long year, and a massive leap forward was done in the last week, fine-tuning the stuff out of the source code to preserve compatibility with original viewscreen capabilities, also tested on PCem 486SX2-66 and made sure they're fast. Yeah, there's a HUGE room for improvements but it's going to take a while.

The "exe" folder contains everything necessary to test portals and see it with your own eyes, just copy and paste files in your Duke Nukem 3D 1.4(or later) Atomic Edition folder and run "SkyPRT.bat".

In case GitHub gives you wrong sources:
Duke07_src_n_exe_v021b

From ReadMe:
v0.21 features (April 22, 2026):
- first attempt to implement wall aligned sprite portals.
They're like 1-sided only but allow to connect any part of a map,
in any place, via sprite portal, forming a window passage. It was
developed really throughly to make sure they don't bog even systems
as old as 486SX too much and not cause errors and crashes.

HOW to create them: select a tile #4096 (PORTAL0), put it anywhere in map,
make it flat (recommended) by placing a cursor at them and pressing "R",
you may align them to walls by pressing "O", hold shift and hold left
moust button and pull them slightly away from the wall so that it doesn't
get swolen by it. Then place a tile #621 (CAMERA1) in any place of a map
where you want this portal to show image from. Duplicate CAMERA1 and give
it a tile #5062 (PRTLTELEPDEST) - that's your teleport destination. The
engine makes both CAMERA1 and PRTLTELEPDEST that ARE LINKED to a portal
INVISIBLE.

Give your portal a hitag that corresponds to a lotag of CAMERA1 and
PRTLTELEPDEST (yeah CAMERA1 and PRTLTELEPDEST are configured SAME WAY!).
For example, PORTAL(hitag:9970,lotag:0), CAMERA1(hitag:0,lotag9970),
PRTLTELEPDEST(hitag:0,lotag9970).
E.g, they're set just like security cameras and don't interfere with them
but make sure NOT to use a CAMERA1 sprite that is already connected to
any security camera. Get in 2D mode (NumPad ENTER), get cursor
over a sprite, press ALT+H to assign a hitag, enter a value, press reg-
-ular ENTER, then press ALT+T to assign a lotag, enter a value, press reg
-ular ENTER again.
At this time you can only create 1 PORTAL PAIR (PORTAL0 and PORTAL1), do
NOT use the same tile PORTAL0 for another side! Use PORTAL1 like that
PORTAL1(hitag:9971,lotag:0), CAMERA1(hitag:0,lotag9971),
PRTLTELEPDEST(hitag:0,lotag9971).

Known limitations:
- there's just ONE pair PORTAL0, PORTAL1, more to come;
- low detail mode doesn't update picture in bottom left corner,
I don't know how to fix this yet. At least I could fix tilt!
- D07LQ2X still draws scanline tutti-frutti inside portals because
the asm routines are designed to draw 320x200 image, not 128x128.
The way to fix that would be externing "long ydim" in asm, load
that in a register and use instead of hardcoded "200" or whatever
value is there...
- parallax inside portals suck, it's a subject to improve.
- PORTAL tiles MUST BE SQUARE dimensions like 128x128 (low quality),
192x192, 256x256 etc to work properly, there is NO way to fix that
because there is NO need to do that as you may still resize sprite
as you wish and it's not going to stretch contents inside.
- it's best to have the same sizes of sprites for one pair like
PORTAL0 and PORTAL1. In case it goes crazy, you'd delete and
create them again without copy pasting one another for best
stability.

--- implementation details or how it works start ---
***global.c***
add new variables below "short camsprite":
short portalsprite0 = -1; short portalsprite1 = -1;
***duke3d.h***
extern those two new variables below "extern char env_music_fn[4][13];"
"extern short camsprite, portalsprite0, portalsprite1;"
***actors.c:***
for each PORTAL#x tile create a function that's gonna act like a trigger
to activate this particular PORTAL#x tile. movestandablesportal0(ID#128)
for PORTAL0 and movestandablesportal1(ID#129) for PORTAL1.
***game.c***
include "portals.c" file before displayrooms function, modify it as well
in order to call particular portal drawing functions as se40codeportal#x
and se40_DrawPortal#x and restore screen after them. Make a function
before "short spawn" - startspriteportal and call movestandablesportal0,
movestandablesportal1 inside. Modify function "domovethings" in order
to call "startspriteportal" function after "movefta();" call.
Modify function "short spawn" in order to include "PORTAL0 and PORTAL1"
actors to be included in this line "if( PN != SPEAKER && PN != LETTER..."
Place new cases on the new line like if(PN!=PORTAL0&&PN!=PORTAL1 etc),
if not done, portals are not going to activate drawing automatically.
In the same function "short spawn" extend T temp_data from 6 to 12...
like "T1=T2=T3=T4=T5=T6=T7=T8=T9=T10=T11=T12=0;" In the same function,
add "case PORTAL0, case PORTAL1" before or after "case VIEWSCREEN".
Modify "case CAMERA1-CAMERA4-CAMERAPOLE" to identify cameras linked to
portals and hide them. Also add "case PRTLTELEPDEST" to hide them too.
***portals.c*** contains code to draw portals to their respectve tiles.
you can add it right in game.c before "displayrooms" but it was decided
to put them out in a separate file for an ease of coding. Make sure that
if you changed portals.c, you must delete "game.obj" before recompile,
otherwise the changes are not going to become.
***sector.c***. Modify function "void checksectors", the very beginning
of that function contains a code of portals to teleport player in 3D
space. That means it will not teleport you if you jump above it (made
specially to allow for better 3D like windows and doors place on facade
of multistory buildings). That makes it possible to stack many different
portals in the same X,Y space but different height so you can have
PORTAL0-1 pair on story #3 and PORTAL2-3 pair on story #4 for example
(I didn't implement more than 1 pair though). Make sure that this version
only teleports from a CENTER of a sprite, so that's a disatvantage I'm
looking forward to fix...
***names.h, soundefs.h, user.con*** include new cases PORTAL0 4960,
PORTAL1 4961, PRTLTELEPDEST 5062.
***engine.c, eng386.c***. Modify "drawsprite and drawsprite_LQ2X" functs
in order to depict texture contents on flats sprites like parascan does
to floors and ceilings "parallax textures" but inside a trapezoid frame,
also known as a "wall aligned flat sprite". I'd like to do the same to
ground aligned sprites so that we could have holes in floors, ceilings.
--- implementation details or how it works finish ---

Reply 58 of 58, by Darkcrafter07

Posted on 2026-04-26, 02:11

Darkcrafter07 Offline

Rank Newbie

Rank: Newbie
Posts: 39
Joined: 2024-06-06, 09:38

So the portals are still developed and there are quite good improvements!
https://github.com/Darkcrafter07/Duke07/releases/tag/v0.22
v0.22 features (April 26, 2026):
- major sprite portals improvements:
we're still dealing with just 1 portal pair (PORTAL0-PORTAL1) but this
time, there's a better parallax portal-sprite projection made in
engine.c, eng386.c in "drawsprite, drawsprite_LQ2X" function.
So there's less seams, image is way more stable and moves around less.
On the other hand, XY camera movement, height tracking and left-right-
-up-down reverse motion capture is introduced, tweaked and aligned
between each portal and its toolset sprites. Portals framerate increased
twice allowing to introduce less seams.

Portal-teleportation now depends on your sprite size so be careful as
larger tend to teleport earlier (place telepdest sprites further from
your portals!) and smaller ones teleport later so you need to get closer
to them.

Main menu