Aktív témák

  • P.H.

    senior tag

    Under investigation: 2D gráfpont koordinátaszámítás (nagyítás + eltolás) és vágás több százezer ponton.

    - x87-es kiindulási megoldás (extended precision):

    @init:
    fldz
    @coor:
    movzx eax,word ptr [edi+STRUCTURE.RSIZE]
    mov cx,[LIMIT]
    sub edx,01h
    lea edi,[edi+eax+STRUCTURESIZE]
    fstp st(0)
    jle @sections
    or byte ptr [edi+STRUCTURE.BITFIELD],$80
    cmp [edi+STRUCTURE.RSIZE],cx
    fld dword ptr [edi+STRUCTURE.X]
    jle @coor
    fmul st,st(3)
    mov al,[BITFILTER]
    fld dword ptr [edi+STRUCTURE.Y]
    xor ecx,ecx
    mov [esi],edi
    fmul st,st(4)
    fxch
    test [edi+STRUCTURE.BITFIELD],al
    fsub st,st(3)
    mov eax,[IMAGEWIDTH]
    fistp dword ptr [edi+STRUCTURE.XCOOR]
    mov ebp,[IMAGEHEIGHT]
    fsub st,st(1)
    mov [esi+04h],ecx
    fist dword ptr [edi+STRUCTURE.YCOOR]
    jnz @coor
    sub eax,[edi+STRUCTURE.XCOOR]
    mov ecx,[edi+STRUCTURE.YCOOR]
    or eax,[edi+STRUCTURE.XCOOR]
    sub ebp,ecx
    or eax,ecx
    or eax,ebp
    js @coor
    and byte ptr [edi+STRUCTURE.BITFIELD],$7F
    add esi,04h
    jmp @coor
    @sections:
    ...

    - SSE2 (double precision):

    @initSSE:
    mov bp,[LIMIT]
    mov cl,[BITFILTER]
    mov bl,[edi+STRUCTURE.BITFIELD]
    @coorSSE:
    movzx eax,word ptr [edi+STRUCTURE.RSIZE]
    sub edx,01h
    mov [edi+STRUCTURE.BITFIELD],bl
    movapd xmm3,xmm4
    lea edi,[edi+eax+STRUCTURESIZE]
    mov bl,$80
    jle @sectionSSE
    cvtps2pd xmm0,[edi+STRUCTURE.X]
    or bl,[edi+STRUCTURE.BITFIELD]
    cmp [edi+STRUCTURE.RSIZE],bp
    mulpd xmm0,xmm2
    jle @coorSSE
    subpd xmm0,xmm1
    cvtpd2dq xmm0,xmm0
    test bl,cl
    psubd xmm3,xmm0
    movq [edi+STRUCTURE.XCOOR],xmm0
    jnz @coorSSE
    por xmm3,xmm0
    pmovmskb eax,xmm3
    test al,88h
    jnz @coorSSE
    and bl,$7F
    movnti [esi],edi
    add esi,04h
    jmp @coorSSE
    @sectionSSE:
    xor eax,eax
    sfence
    mov [esi],eax
    ...

    - 3DNow! (single precision):

    @init3DNow:
    mov bp,[LIMIT]
    mov cl,[BITFILTER]
    mov bl,[edi+STRUCTURE.BITFIELD]
    @coor3DNow:
    movq mm0,mm2
    movzx eax,word ptr [edi+STRUCTURE.RSIZE]
    sub edx,01h
    mov [edi+STRUCTURE.BITFIELD],bl
    movq mm3,mm4
    lea edi,[edi+eax+STRUCTURESIZE]
    mov bl,$80
    jle @section3DNow
    pfmul mm0,[edi+STRUCTURE.X]
    or bl,[edi+STRUCTURE.BITFIELD]
    cmp [edi+STRUCTURE.RSIZE],bp
    pfsub mm0,mm1
    jle @coor3DNow
    pf2id mm0,mm0
    test bl,cl
    psubd mm3,mm0
    movq [edi+STRUCTURE.XCOOR],mm0
    jnz @coor3DNow
    por mm3,mm0
    pmovmskb eax,mm3
    test al,88h
    jnz @coor3DNow
    and bl,$7F
    mov [esi],edi
    add esi,04h
    jmp @coor3DNow
    @section3DNow:
    xor eax,eax
    emms
    mov [esi],eax
    ...

    A sorrend (TSC alapján):
    - K8: 1.SSE2 2:3DNow! 3:x87 (kb. 5% eltéréssel)
    - Netburst: 1:x87 2.SSE2 (elhanyagolható eltéréssel)

    :F

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Aktív témák