VOGONS


Duke07 - another MS-DOS port of Duke3D

Topic actions

Reply 40 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie
analog_programmer wrote on 2025-08-12, 07:28:

Thanks for the clarification! If I understand correctly, when "ydetail" variable in "ENG386.C" is set to 3, the produced executable will be for even lower details.

I have a suggestion. For easier compiling and linking of different executable versions you can separate make-files and altered .C and .H files in separate folders like "D07_SRC", "D07_SRC_LQ2" and "D07_SRC_LQ3".

Not yet sir, let it be like that.

What I've been trying to do for more than 4 months is getting a non-fpu version of slopes drawing to work. There's a version that finally doesn't crash and does perspective correction, yet faster on a nonfpu SX processor, it still looks awful and coming with a lot of geometric distortions.

a part from A.asm:

BITSOFPRECISNLQ equ 3
BITSOFPRECISNLQPOW equ 8

ALIGN 16
PUBLIC setupslopevlin2_
setupslopevlin2_:
mov dword ptr [slopmach3b+3], ebx ;ptr
mov dword ptr [slopmach5b+2], ecx ;pinc
neg ecx
mov dword ptr [slopmach6b+2], ecx ;-pinc

mov edx, 1
mov cl, al
shl edx, cl
dec edx
mov cl, ah
shl edx, cl
mov dword ptr [slopmach7b+2], edx

neg ah
mov byte ptr [slopmach2b+2], ah

sub ah, al
mov byte ptr [slopmach1b+2], ah

; FPU removal: Convert floating-point init to integer scaling
; Original: fild _asm1 + fstp _asm2
mov eax, [_asm1]
;shl eax, 16 ; not needed now
mov [_asm2], eax

ret

ALIGN 16
PUBLIC slopevlin2_
slopevlin2_:
mov _ebpbak, ebp
mov _espbak, esp

sub ecx, esp
mov dword ptr [slopmach4b+3], ecx

mov ebp, eax ; Remove FPU load
slopmach6b: lea ebp, [eax+88888888h]
add ebx, dword ptr _asm2 ; Replace FPU add with integer op

mov _asm1, ebx
shl ebx, 3

mov eax, _globalx3
mov ecx, _globaly3
imul eax, ebx
imul ecx, ebx
add esi, eax
add edi, ecx

mov ebx, edx
jmp short bigslopeloopb
ALIGN 16
bigslopeloopb:
Show last 58 lines
	mov dword ptr _fpuasm, ebx              ; Replace FPU store

mov eax, ebx ; Modified from original FPU path
add eax, eax
sbb edx, edx
mov ecx, eax
shr ecx, 24
and eax, 00ffe000h
shr eax, 11
sub cl, 2
mov eax, dword ptr _reciptable[eax]
shr eax, cl
xor eax, edx
mov edx, _asm1
mov ecx, _globalx3
mov _asm1, eax
sub eax, edx
mov edx, _globaly3
imul ecx, eax
imul eax, edx

add ebx, dword ptr _asm2 ; Replace FPU add with integer op

cmp ebx, BITSOFPRECISNLQPOW
mov _asm4, ebx
mov cl, bl
jl short slopeskipminb
mov cl, BITSOFPRECISNLQPOW
slopeskipminb:

mov ebx, esi
mov edx, edi

beginnerslopeloopb:
slopmach1b: shr ebx, 20
add esi, ecx
slopmach2b: shr edx, 26
slopmach7b: and ebx, 88888888h
add edi, eax
slopmach5b: add ebp, 88888888h
slopmach3b: mov dl, byte ptr [ebx+edx+88888888h]
slopmach4b: mov ebx, dword ptr [esp+88888888h]
sub esp, 4
dec cl
mov al, byte ptr [ebx+edx]
mov ebx, esi
mov [ebp], al
mov edx, edi
jnz short beginnerslopeloopb

mov ebx, _asm4
sub ebx, BITSOFPRECISNLQPOW
jg short bigslopeloopb

mov esp, _espbak
mov ebp, _ebpbak
ret

the parts of engine.c:


long reciptable[2048], fpuasm, fpuasmnonfpu;
long reciptablenonfpu[2048], deltaztable[2048];

#define FIX16_SHIFT 14 // Matches Build's 30-bit fixed-point
#define FIX16_FACTOR 0x40000000 // 1<<30 in 32-bit (1073741824)
#define RECIP_TABLE_OFFSET 2048

#define F1_0 0x10000 // 16.16 fixed-point scaling

loadtables()
{
long i, fil;
float z, dz, nextz, deltaz;

if (tablesloaded == 0)
{
initksqrt();

// Generate original FPU-based reciptable at all times (it's int too)
for(i = 0; i < 2048; i++)
{
reciptable[i] = divscale30(2048L, i+2048);
}
if ( (use_fpu) == 0)
{
// Generate non-FPU reciptable if nofpu parameter was passed
reciptablenonfpu[0] = F1_0 / RECIP_TABLE_OFFSET;
for(i = 1; i < 2048; i++)
{
//reciptablenonfpu[i] = FIX16_FACTOR / (i + RECIP_TABLE_OFFSET);
reciptablenonfpu[i] = F1_0 / i;
}

z = 1.0f; // initial Z (matches original FPU setup)
dz = 0.01f; // step size (adjust based on your needs)
for (i = 0; i < 2048; i++)
{
// match max line height
nextz = z + dz;
// hyperbolic step
deltaz = (1.0f/z) - (1.0f/nextz);
// convert to fixed-point
deltaztable[i] = (long)(deltaz * (1 << 16));
z = nextz;
}
}


// Load built-in tables (sintable, radar angles, fonts)
if ((fil = kopen4load("tables.dat", 0)) != -1)
{
// Reciptable is NOT loaded from file - generated above
kread(fil, sintable, 2048 * 2);
kread(fil, radarang, 640 * 2);
for(i = 0; i < 640; i++) radarang[1279 - i] = -radarang[i];
kread(fil, textfont, 1024);
kread(fil, smalltextfont, 1024);
kread(fil, britable, 1024);
kclose(fil);
Show last 6 lines
        }

tablesloaded = 1;
}
}

I think the biggest challenge is this line: add ebx, dword ptr _asm2
because as soon as FPU addition is removed it breaks the "hyperbolical continuity" as AI said. There are two lines like this in the asm code and I even tried to simulate it with deltaztable yet without much success.

Maybe somebody knows it better.

Reply 41 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

So we're almost there on non-fpu slopes, I think it still looks meh but much closer to the result I'd desire to have. You can download the compiled exe to see how it's going. My further attempt would be to try decreasing "amplitude" of ebx _asm2 additions. It's slower than low detail degraded version but much faster than fully detailed fpu version on 486sx. Later we could combine both approaches and do fully integer slopes in 2x detail loss instead of 8x or even keep it as is. If it goes fine for sure.

Part of A.asm:

EXTRN _surfx : dword
EXTRN _surfy : dword
EXTRN _slopedastat : byte

BITSOFPRECISNLQ equ 3
BITSOFPRECISNLQPOW equ 8

ALIGN 16
PUBLIC setupslopevlin2_
setupslopevlin2_:
mov dword ptr [slopmach3b+3], ebx ;ptr
mov dword ptr [slopmach5b+2], ecx ;pinc
neg ecx
mov dword ptr [slopmach6b+2], ecx ;-pinc

mov edx, 1
mov cl, al
shl edx, cl
dec edx
mov cl, ah
shl edx, cl
mov dword ptr [slopmach7b+2], edx

neg ah
mov byte ptr [slopmach2b+2], ah

sub ah, al
mov byte ptr [slopmach1b+2], ah

mov eax, [_asm1]
mov [_asm2], eax

ret

ALIGN 16
PUBLIC slopevlin2_
slopevlin2_:
; Save critical registers
push ebp
push esi
push edi
push ebx

; Surface type check (ceiling(0) vs floor(1))
cmp byte ptr [_slopedastat], 0
jz slp2drawasusual ; Skip inversion for ceilings

; Only invert for floors (dastat=1)
neg dword ptr [_asm3]
neg dword ptr [_globalx3]
neg dword ptr [_globaly3]

slp2drawasusual:
mov _ebpbak, ebp
mov _espbak, esp

sub ecx, esp
mov dword ptr [slopmach4b+3], ecx

mov ebp, eax ; Remove FPU load
Show last 81 lines
slopmach6b: lea ebp, [eax+88888888h]
add ebx, dword ptr _asm2 ; Replace FPU add with integer op

mov _asm1, ebx
shl ebx, 3

mov eax, [_globalx3] ; Proper dereference
mov ecx, [_globaly3] ; Proper dereference
imul eax, ebx
imul ecx, ebx
add esi, eax
add edi, ecx

mov ebx, edx
jmp short bigslopeloopb
ALIGN 16
bigslopeloopb:
mov dword ptr _fpuasm, ebx ; Replace FPU store

mov eax, ebx ; Modified from original FPU path
add eax, eax
sbb edx, edx
mov ecx, eax
shr ecx, 24
and eax, 00ffe000h
shr eax, 11
sub cl, 2
mov eax, dword ptr _reciptable[eax]
shr eax, cl
xor eax, edx
mov edx, _asm1
mov ecx, _surfx
mov _asm1, eax
sub eax, edx
mov edx, _surfy
imul ecx, eax
imul eax, edx

add ebx, dword ptr _asm2 ; Replace FPU add with integer op
add ebx, 1


cmp ebx, BITSOFPRECISNLQPOW
mov _asm4, ebx
mov cl, bl
jl short slopeskipminb
mov cl, BITSOFPRECISNLQPOW
slopeskipminb:

mov ebx, esi
mov edx, edi

beginnerslopeloopb:
slopmach1b: shr ebx, 20
add esi, ecx
slopmach2b: shr edx, 26
slopmach7b: and ebx, 88888888h
add edi, eax
slopmach5b: add ebp, 88888888h
slopmach3b: mov dl, byte ptr [ebx+edx+88888888h]
slopmach4b: mov ebx, dword ptr [esp+88888888h]
sub esp, 4
dec cl
mov al, byte ptr [ebx+edx]
mov ebx, esi
mov [ebp], al
mov edx, edi
jnz short beginnerslopeloopb

mov ebx, _asm4
sub ebx, BITSOFPRECISNLQPOW
jg short bigslopeloopb

mov esp, _espbak
mov ebp, _ebpbak
pop ebx
pop edi
pop esi
pop ebp
ret

Part of engine.c:

#define BITSOFPRECISNLQ 3
long SCALEFACTOR = 15; // extra precision
long surfx, surfy, surfstepx, surfstepy, surfz, surfstepz;
long surfx_prev, surfy_prev;
char slopedastat;
grouscan_nonfpu (long dax1, long dax2, long sectnum, char dastat)
{
long i, j, k, l, m, n, x, y, dx, dy, wx, wy, x1, y1, x2, y2, daz;
long daslope, dasqr;
long dashade, shoffs, shinc, m1, m2, *mptr1, *mptr2, *nptr1, *nptr2;
walltype *wal;
sectortype *sec;

sec = &sector[sectnum];

slopedastat = dastat; // pass to asm to draw ceilings-floors differently

if (dastat == 0)
{
if (globalposz <= getceilzofslope(sectnum,globalposx,globalposy))
return; //Back-face culling
globalorientation = sec->ceilingstat;
globalpicnum = sec->ceilingpicnum;
globalshade = sec->ceilingshade;
globalpal = sec->ceilingpal;
daslope = sec->ceilingheinum;
daz = sec->ceilingz;
}
else
{
if (globalposz >= getflorzofslope(sectnum,globalposx,globalposy))
return; //Back-face culling
globalorientation = sec->floorstat;
globalpicnum = sec->floorpicnum;
globalshade = sec->floorshade;
globalpal = sec->floorpal;
daslope = sec->floorheinum;
daz = sec->floorz;
}

if ((picanm[globalpicnum]&192) != 0) globalpicnum += animateoffs(globalpicnum,sectnum);
setgotpic(globalpicnum);
if ((tilesizx[globalpicnum] <= 0) || (tilesizy[globalpicnum] <= 0)) return;
if (waloff[globalpicnum] == 0) loadtile(globalpicnum);

wal = &wall[sec->wallptr];
wx = wall[wal->point2].x - wal->x;
wy = wall[wal->point2].y - wal->y;
dasqr = krecipasm(nsqrtasm(wx*wx+wy*wy));
i = mulscale21(daslope,dasqr);
wx *= i; wy *= i;

globalx = -mulscale19(singlobalang,xdimenrecip);
globaly = mulscale19(cosglobalang,xdimenrecip);
globalx1 = (globalposx<<8);
globaly1 = -(globalposy<<8);
i = (dax1-halfxdimen)*xdimenrecip;
globalx2 = mulscale16(cosglobalang<<4,viewingrangerecip) - mulscale27(singlobalang,i);
globaly2 = mulscale16(singlobalang<<4,viewingrangerecip) + mulscale27(cosglobalang,i);
globalzd = (xdimscale<<9);
Show last 131 lines
	globalzx = -dmulscale17(wx,globaly2,-wy,globalx2) + mulscale10(1-globalhoriz,globalzd);
globalz = -dmulscale25(wx,globaly,-wy,globalx);

if (globalorientation&64) //Relative alignment
{
dx = mulscale14(wall[wal->point2].x-wal->x,dasqr);
dy = mulscale14(wall[wal->point2].y-wal->y,dasqr);

i = nsqrtasm(daslope*daslope+16777216);

x = globalx; y = globaly;
globalx = dmulscale16(x,dx,y,dy);
globaly = mulscale12(dmulscale16(-y,dx,x,dy),i);

x = ((wal->x-globalposx)<<8); y = ((wal->y-globalposy)<<8);
globalx1 = dmulscale16(-x,dx,-y,dy);
globaly1 = mulscale12(dmulscale16(-y,dx,x,dy),i);

x = globalx2; y = globaly2;
globalx2 = dmulscale16(x,dx,y,dy);
globaly2 = mulscale12(dmulscale16(-y,dx,x,dy),i);
}
if (globalorientation&0x4)
{
i = globalx; globalx = -globaly; globaly = -i;
i = globalx1; globalx1 = globaly1; globaly1 = i;
i = globalx2; globalx2 = -globaly2; globaly2 = -i;
}
if (globalorientation&0x10) { globalx1 = -globalx1, globalx2 = -globalx2, globalx = -globalx; }
if (globalorientation&0x20) { globaly1 = -globaly1, globaly2 = -globaly2, globaly = -globaly; }

//isn't "daz" a texture scale for the whole function here?
daz = dmulscale9(wx,globalposy-wal->y,-wy,globalposx-wal->x) + ((daz-globalposz)<<8);
globalx2 = mulscale20(globalx2,daz); globalx = mulscale28(globalx,daz);
globaly2 = mulscale20(globaly2,-daz); globaly = mulscale28(globaly,-daz);

i = 8-(picsiz[globalpicnum]&15); j = 8-(picsiz[globalpicnum]>>4);
if (globalorientation&8) { i++; j++; }
globalx1 <<= (i+12); globalx2 <<= i; globalx <<= i;
globaly1 <<= (j+12); globaly2 <<= j; globaly <<= j;

if (dastat == 0)
{
globalx1 += (((long)sec->ceilingxpanning)<<24);
globaly1 += (((long)sec->ceilingypanning)<<24);
}
else
{
globalx1 += (((long)sec->floorxpanning)<<24);
globaly1 += (((long)sec->floorypanning)<<24);
}

asm1 = -(globalzd>>(16-BITSOFPRECISNLQ));

globvis = globalvisibility;
if (sec->visibility != 0) globvis = mulscale4(globvis,(long)((unsigned char)(sec->visibility+16)));
globvis = mulscale13(globvis,daz);
globvis = mulscale16(globvis,xdimscale);
j = FP_OFF(palookup[globalpal]);

setupslopevlin2(((long)(picsiz[globalpicnum]&15))+(((long)(picsiz[globalpicnum]>>4))<<8),waloff[globalpicnum],-ylookup[1]);

l = (globalzd>>16);

shinc = mulscale16(globalz,xdimenscale);
if (shinc > 0) shoffs = (4<<15); else shoffs = ((2044-ydimen)<<15);
if (dastat == 0) y1 = umost[dax1]; else y1 = max(umost[dax1],dplc[dax1]);
m1 = mulscale16(y1,globalzd) + (globalzx>>6);
//Avoid visibility overflow by crossing horizon
if (globalzd > 0) m1 += (globalzd>>16); else m1 -= (globalzd>>16);
m2 = m1+l;
mptr1 = (long *)&slopalookup[y1+(shoffs>>15)]; mptr2 = mptr1+1;

for(x=dax1;x<=dax2;x++)
{
if (dastat == 0) { y1 = umost[x]; y2 = min(dmost[x],uplc[x])-1; }
else { y1 = max(umost[x],dplc[x]); y2 = dmost[x]-1; }

if (y1 <= y2)
{
nptr1 = (long *)&slopalookup[y1+(shoffs>>15)];
nptr2 = (long *)&slopalookup[y2+(shoffs>>15)];
while (nptr1 <= mptr1)
{
*mptr1-- = j + (getpalookup((long)mulscale24(krecipasm(m1),globvis),globalshade)<<8);
m1 -= l;
}
while (nptr2 >= mptr2)
{
*mptr2++ = j + (getpalookup((long)mulscale24(krecipasm(m2),globvis),globalshade)<<8);
m2 += l;
}

if ( (dastat) == 0)
{
//ceilings
globalx3 = (globalx2>>10);
globaly3 = (globaly2>>10);
} else {
//floors
globalx3 = -(globalx2>>10);
globaly3 = -(globaly2>>10);
}

// Replace globalx3/globaly3 with scaled versions
surfx = globalx3 / SCALEFACTOR;
surfy = globaly3 / SCALEFACTOR;
surfz = globalz / SCALEFACTOR;

// Calculate stepping parameters
surfstepx = globalx / SCALEFACTOR;
surfstepy = globaly / SCALEFACTOR;
surfstepz = globalz / SCALEFACTOR;

asm3 = mulscale16(y2,globalzd) + (globalzx>>6);
slopevlin2(ylookup[y2]+x+frameoffset,krecipasm(asm3>>3),(long)nptr2,y2-y1+1,globalx1,globaly1);

if ((x&15) == 0) faketimerhandler();
}
globalx2 += globalx + (globalx >> 16);
globaly2 += globaly + (globaly >> 16);
globalzx += globalz;

surfx += surfstepx;
surfy += surfstepy;
surfz += surfstepz;

shoffs += shinc;
}
}
Last edited by Darkcrafter07 on 2025-10-24, 19:26. Edited 1 time in total.

Reply 43 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie
marxveix wrote on 2025-10-24, 18:07:

Thank you! I try it later, but not today.

You're welcome but don't hurry, this version is really buggy and may make your computer freeze, there are illegal memory writes are still going.
Upd... fixed

Reply 44 of 58, by marxveix

User metadata
Rank Oldbie
Rank
Oldbie
Darkcrafter07 wrote on 2025-10-24, 18:12:
marxveix wrote on 2025-10-24, 18:07:

Thank you! I try it later, but not today.

You're welcome but don't hurry, this version is really buggy and may make your computer freeze, there are illegal memory writes are still going.
Upd... fixed

Now its bugfixed, better to try with it?

Best ATi Rage3 drivers for 3DCIF / Direct3D / OpenGL / DVD : ATi RagePro drivers and software
30+MiniGL / OpenGL Win 9x dll files for all ATi Rage3 cards : Re: ATi RagePro OpenGL files

Reply 45 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie
marxveix wrote on 2025-10-24, 18:07:

Now its bugfixed, better to try with it?

Yes but it still looks wonky, I don't know for a way to fix it yet.

Reply 46 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

v0.17 - distance rendering optimization. Make distant walls render less 8 times less often than close according to the game ticker. May speed-up rendering on big open maps but may introduce visual artifacts. Enable by typing /distslow command line parameter. Both Duke07.exe and D07LQ2x.exe are bundled with this. Currently acts since 32000 map units. Edit engine.c or eng386.c drawalls_distslow function to change distance.

Duke07_src_n_exe_v017

Reply 47 of 58, by marxveix

User metadata
Rank Oldbie
Rank
Oldbie
Darkcrafter07 wrote on 2025-12-29, 06:49:

v0.17 - distance rendering optimization. Make distant walls render less 8 times less often than close according to the game ticker. May speed-up rendering on big open maps but may introduce visual artifacts. Enable by typing /distslow command line parameter. Both Duke07.exe and D07LQ2x.exe are bundled with this. Currently acts since 32000 map units. Edit engine.c or eng386.c drawalls_distslow function to change distance.

Duke07_src_n_exe_v017

Thanks, downloaded it for later use.

Best ATi Rage3 drivers for 3DCIF / Direct3D / OpenGL / DVD : ATi RagePro drivers and software
30+MiniGL / OpenGL Win 9x dll files for all ATi Rage3 cards : Re: ATi RagePro OpenGL files

Reply 48 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie
marxveix wrote on 2025-12-29, 07:06:
Darkcrafter07 wrote on 2025-12-29, 06:49:

v0.17 - distance rendering optimization. Make distant walls render less 8 times less often than close according to the game ticker. May speed-up rendering on big open maps but may introduce visual artifacts. Enable by typing /distslow command line parameter. Both Duke07.exe and D07LQ2x.exe are bundled with this. Currently acts since 32000 map units. Edit engine.c or eng386.c drawalls_distslow function to change distance.

Duke07_src_n_exe_v017

Thanks, downloaded it for later use.

You're welocme, glad someone found it cool! Btw, there must be a better way to do that distance magic but that must be hard to implement.

So we're rendering sky first (the background), as we know that parallax skies setup is done not so flexible in the game so for a parallax map to show up all tiles it consists of, the entire enclosed area must have the same picnum.
So for the idea to work there must be an option to override all skies in the area or even the whole map.

That could also lead to bad picture in the distance as distant geometry can't keep up, so maybe additional parallax-sky-alike horizontal and vertical scrolling on the distance geometry buffer, so the roadmap will be like:

1) Render distant geometry in a separate buffer and apply parallax scrolling to it;
2) Use a 2nd buffer and render parallax skies first here;
3) Copy distant geometry buffer result (scrolling corrected) and draw on top of sky;
4) On the top of it all render only close geometry.

Reply 49 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

So I could finally get a bit further regarding the non-fpu slopes. It still does use some FPU operations like krecipasm calls but tries to do it less.

Texture bending and drifting effect reduced due to onscreen slope segmenting aka tesselation. The fpu krecipasm routine (reciprocal of 1/z) is attempted to be skipped each scanline (SLOPE_FPU_FACTOR = 1).

Here's exe and src of the quite unstable version but working... The result looks sawwy and jaggy but pixelization is less (3x instead of 8x). I heard there are techniques like subpixel correction but it's a wonder it works at least like this.

Use "/nofpu" argument to activate.

Reply 50 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

Hmm we're getting some progress on integer slopes again:
- slopes off the screen solved (faulty int add command in a.asm slopevlin2 in bigslopeloop);
- fully integer versions of krecipasm each with their own reciptables (separate for texturing and shading), that gives solid FPS boost (up to 50% ?);
- a lookup table optimized segments texture calculation (+15% FPS);

As usual, use "/nofpu" command to activate...

Reply 51 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

Some work is going on the alternative implementation based on Ken's own C variant of setupslopevlin-slopevlin function but optimized.
Use "/nofpu" command as always.

Reply 52 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

Ok, this C variant seems like the way to go for 486SX. Only 2x quality degradation instead of 8x with almost the same speed is a big milestone.
This time it doesn't just simulate Ken's original asm "slopevlin" way to optimize between each vertical 8 pixels (4 px in C version because 8 looks ugly) but also unrolls the loop exactly four times, delivering the best speedup. This one also has visual bugs fixed.

Use "/nofpu" command to activate...

Reply 53 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

So here comes big update about integer slopes, this must be one of the best attempts I've ever had.
They look almost as good as the original fpu ones, have no pixelization (except for 386 versions) and pretty fast!

The way original "slopevlin" is doing the magic is called relative interpolation of DU/DV between true perspective calculation per e 8px.
This time we're bundling not just C version but also an SMC (self modifying code) asm routine .
It was born thanks to disassembling the resulting "slopevlin2relativeC" function and took performance even further.

So here comes Duke07 v0.18 sources and compiled binaries...
Another google drive download link

Some fast comparison on IMGUR: https://imgur.com/a/uzs3LqJ

Reply 54 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

Some update over integer slopes:
- even more optimized and stabilized rendering;
- Duke07.exe compiled entirely in 486 optimization mode resulting in a quite noticeable performance improvement.

At this moment, the speed difference between original FPU slopes and fully integer mode, as the overall combined ingame image is about 15-20% in PCem 486sx2-66MHz in the most hard scenes.

So here come Duke07 v0.19 sources and compiled binaries...
Another google drive download link

Reply 55 of 58, by marxveix

User metadata
Rank Oldbie
Rank
Oldbie
Darkcrafter07 wrote on 2026-03-18, 15:30:
Some update over integer slopes: - even more optimized and stabilized rendering; - Duke07.exe compiled entirely in 486 optimizat […]
Show full quote

Some update over integer slopes:
- even more optimized and stabilized rendering;
- Duke07.exe compiled entirely in 486 optimization mode resulting in a quite noticeable performance improvement.

At this moment, the speed difference between original FPU slopes and fully integer mode, as the overall combined ingame image is about 15-20% in PCem 486sx2-66MHz in the most hard scenes.

So here come Duke07 v0.19 sources and compiled binaries...
Another google drive download link

Thanks, downloaded v0.19 and i will use it if try duke3d again.

Best ATi Rage3 drivers for 3DCIF / Direct3D / OpenGL / DVD : ATi RagePro drivers and software
30+MiniGL / OpenGL Win 9x dll files for all ATi Rage3 cards : Re: ATi RagePro OpenGL files

Reply 57 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

So after long attempts to implement portals the first crappy but working atttempt is finaly here!
I really thought it would be a nice thing to have to fake 3D window or door passages in such geometry, that doesn't extend to full floor or ceiling height.
It's done via specially defined PORTAL#x, CAMERA1 and PRTLTELEPDEST sprites and configured just like security cameras (read instructions in readme).
The basic idea is to duplicate the game logic that VIEWSCREEN (security monitor) uses, modify in such a way that it translates content on flat sprites but also modified to show texture in the same parallax fashion skies are shown, so that there's no flat texture feel to it. The first version also contains logic hardwired that rotates linked camera along player lookout and also attempts to move it fro and about. Well, you better look at the difference on github by comparing commits.

For sure, they're are SO far from being perfect but I decided it would be a nice idea to upload it as the starting point and the whole thing is not going missing. It took a long year, and a massive leap forward was done in the last week, fine-tuning the stuff out of the source code to preserve compatibility with original viewscreen capabilities, also tested on PCem 486SX2-66 and made sure they're fast. Yeah, there's a HUGE room for improvements but it's going to take a while.

The "exe" folder contains everything necessary to test portals and see it with your own eyes, just copy and paste files in your Duke Nukem 3D 1.4(or later) Atomic Edition folder and run "SkyPRT.bat".

In case GitHub gives you wrong sources:
Duke07_src_n_exe_v021b

From ReadMe:
v0.21 features (April 22, 2026):
- first attempt to implement wall aligned sprite portals.
They're like 1-sided only but allow to connect any part of a map,
in any place, via sprite portal, forming a window passage. It was
developed really throughly to make sure they don't bog even systems
as old as 486SX too much and not cause errors and crashes.

HOW to create them: select a tile #4096 (PORTAL0), put it anywhere in map,
make it flat (recommended) by placing a cursor at them and pressing "R",
you may align them to walls by pressing "O", hold shift and hold left
moust button and pull them slightly away from the wall so that it doesn't
get swolen by it. Then place a tile #621 (CAMERA1) in any place of a map
where you want this portal to show image from. Duplicate CAMERA1 and give
it a tile #5062 (PRTLTELEPDEST) - that's your teleport destination. The
engine makes both CAMERA1 and PRTLTELEPDEST that ARE LINKED to a portal
INVISIBLE.

Give your portal a hitag that corresponds to a lotag of CAMERA1 and
PRTLTELEPDEST (yeah CAMERA1 and PRTLTELEPDEST are configured SAME WAY!).
For example, PORTAL(hitag:9970,lotag:0), CAMERA1(hitag:0,lotag9970),
PRTLTELEPDEST(hitag:0,lotag9970).
E.g, they're set just like security cameras and don't interfere with them
but make sure NOT to use a CAMERA1 sprite that is already connected to
any security camera. Get in 2D mode (NumPad ENTER), get cursor
over a sprite, press ALT+H to assign a hitag, enter a value, press reg-
-ular ENTER, then press ALT+T to assign a lotag, enter a value, press reg
-ular ENTER again.
At this time you can only create 1 PORTAL PAIR (PORTAL0 and PORTAL1), do
NOT use the same tile PORTAL0 for another side! Use PORTAL1 like that
PORTAL1(hitag:9971,lotag:0), CAMERA1(hitag:0,lotag9971),
PRTLTELEPDEST(hitag:0,lotag9971).

Known limitations:
- there's just ONE pair PORTAL0, PORTAL1, more to come;
- low detail mode doesn't update picture in bottom left corner,
I don't know how to fix this yet. At least I could fix tilt!
- D07LQ2X still draws scanline tutti-frutti inside portals because
the asm routines are designed to draw 320x200 image, not 128x128.
The way to fix that would be externing "long ydim" in asm, load
that in a register and use instead of hardcoded "200" or whatever
value is there...
- parallax inside portals suck, it's a subject to improve.
- PORTAL tiles MUST BE SQUARE dimensions like 128x128 (low quality),
192x192, 256x256 etc to work properly, there is NO way to fix that
because there is NO need to do that as you may still resize sprite
as you wish and it's not going to stretch contents inside.
- it's best to have the same sizes of sprites for one pair like
PORTAL0 and PORTAL1. In case it goes crazy, you'd delete and
create them again without copy pasting one another for best
stability.

--- implementation details or how it works start ---
***global.c***
add new variables below "short camsprite":
short portalsprite0 = -1; short portalsprite1 = -1;
***duke3d.h***
extern those two new variables below "extern char env_music_fn[4][13];"
"extern short camsprite, portalsprite0, portalsprite1;"
***actors.c:***
for each PORTAL#x tile create a function that's gonna act like a trigger
to activate this particular PORTAL#x tile. movestandablesportal0(ID#128)
for PORTAL0 and movestandablesportal1(ID#129) for PORTAL1.
***game.c***
include "portals.c" file before displayrooms function, modify it as well
in order to call particular portal drawing functions as se40codeportal#x
and se40_DrawPortal#x and restore screen after them. Make a function
before "short spawn" - startspriteportal and call movestandablesportal0,
movestandablesportal1 inside. Modify function "domovethings" in order
to call "startspriteportal" function after "movefta();" call.
Modify function "short spawn" in order to include "PORTAL0 and PORTAL1"
actors to be included in this line "if( PN != SPEAKER && PN != LETTER..."
Place new cases on the new line like if(PN!=PORTAL0&&PN!=PORTAL1 etc),
if not done, portals are not going to activate drawing automatically.
In the same function "short spawn" extend T temp_data from 6 to 12...
like "T1=T2=T3=T4=T5=T6=T7=T8=T9=T10=T11=T12=0;" In the same function,
add "case PORTAL0, case PORTAL1" before or after "case VIEWSCREEN".
Modify "case CAMERA1-CAMERA4-CAMERAPOLE" to identify cameras linked to
portals and hide them. Also add "case PRTLTELEPDEST" to hide them too.
***portals.c*** contains code to draw portals to their respectve tiles.
you can add it right in game.c before "displayrooms" but it was decided
to put them out in a separate file for an ease of coding. Make sure that
if you changed portals.c, you must delete "game.obj" before recompile,
otherwise the changes are not going to become.
***sector.c***. Modify function "void checksectors", the very beginning
of that function contains a code of portals to teleport player in 3D
space. That means it will not teleport you if you jump above it (made
specially to allow for better 3D like windows and doors place on facade
of multistory buildings). That makes it possible to stack many different
portals in the same X,Y space but different height so you can have
PORTAL0-1 pair on story #3 and PORTAL2-3 pair on story #4 for example
(I didn't implement more than 1 pair though). Make sure that this version
only teleports from a CENTER of a sprite, so that's a disatvantage I'm
looking forward to fix...
***names.h, soundefs.h, user.con*** include new cases PORTAL0 4960,
PORTAL1 4961, PRTLTELEPDEST 5062.
***engine.c, eng386.c***. Modify "drawsprite and drawsprite_LQ2X" functs
in order to depict texture contents on flats sprites like parascan does
to floors and ceilings "parallax textures" but inside a trapezoid frame,
also known as a "wall aligned flat sprite". I'd like to do the same to
ground aligned sprites so that we could have holes in floors, ceilings.
--- implementation details or how it works finish ---

Reply 58 of 58, by Darkcrafter07

User metadata
Rank Newbie
Rank
Newbie

So the portals are still developed and there are quite good improvements!
https://github.com/Darkcrafter07/Duke07/releases/tag/v0.22
v0.22 features (April 26, 2026):
- major sprite portals improvements:
we're still dealing with just 1 portal pair (PORTAL0-PORTAL1) but this
time, there's a better parallax portal-sprite projection made in
engine.c, eng386.c in "drawsprite, drawsprite_LQ2X" function.
So there's less seams, image is way more stable and moves around less.
On the other hand, XY camera movement, height tracking and left-right-
-up-down reverse motion capture is introduced, tweaked and aligned
between each portal and its toolset sprites. Portals framerate increased
twice allowing to introduce less seams.

Portal-teleportation now depends on your sprite size so be careful as
larger tend to teleport earlier (place telepdest sprites further from
your portals!) and smaller ones teleport later so you need to get closer
to them.