Aktív témák

  • P.H.

    senior tag

    válasz P.H. #92 üzenetére

    Egy Pentium 4 1 nagyságrenddel nagyobb teljesítményre képes, mint egy Java-ban írt program ARM-on (legalábbis mert léteznek pointerek, nincs szigorú típusosság, nincs byte-nál automatikus előjeles kiterjesztés 4 byte-ra, amit le kell küzdeni, stb.); még úgy is, hogy a branch prediction success rate 87% körüli.

    A ciklus ASM-ben:

    @character:
    {@65} { } mov [edi],al
    { } add edi,01h
    @@DECODE:
    {@6A} { } mov eax,[esp+_aMAXMINBITS]
    @read_raw: { BPOS may be 20h since decreased soon }
    { } movzx ebp,byte ptr [esi]
    {@70} { -} mov ecx,edx
    { } sub edx,(24+1)
    { } shl ebp,cl
    { } shr edx,1Fh
    { } add esi,edx
    { } lea edx,[ecx+edx*08h]
    { } or ebx,ebp
    {@81} { **} cmp dl,al
    { **} jb @read_raw
    @createABCcode:
    { } movzx ebp,bl
    { } movzx ecx,bh
    { } shr eax,10h
    { } { x1 } mov ebp,dword ptr es:[REVERSE2hi+ebp*04h]
    { } { x1 } add ebp,dword ptr es:[REVERSE2lo+ecx*04h]
    { -} mov ecx,eax
    {@A0} { } mov eax,offset(EXT_AMINMAXCODE)
    @seekABC:
    {@A5} {0**} cmp [eax+ecx*08h+00h+_MAX],ebp
    {1**} jnbe @foundABC
    {2} cmp [eax+ecx*08h-08h+_MAX],ebp
    {0} lea ecx,[ecx-02h]
    {@B1} {1} jbe @seekABC
    { } add ecx,01h
    @foundABC:
    {@B6} {0} mov eax,[eax+ecx*08h+_MIN]
    {1} shr ebp,cl
    {2} { x1 } mov eax,es:[eax+ebp*04h]
    {@C0} {0} neg ecx
    {1} add ecx,10h
    {2} sub edx,ecx
    {0} shr ebx,cl
    {1**} cmp eax,255
    {2**} jna @character { SHORT jump instruction offset: -6Bh }
    @repeatABC:
    {@D0} {0-} mov ecx,eax
    {1} movzx ebp,ah
    {2} shr eax,10h
    {0} jz @@SECTION
    {1} sub dl,cl
    {@E0} {2} and ebp,ebx
    {0} shr ebx,cl
    {1} add ebp,eax
    {2} { x1 } mov eax,ss:[esp+_dMAXMINBITS]
    {0} add edi,ebp
    {1} neg ebp
    {2} mov [esp+_MOVELEN],ebp
    @read__raw: { BPOS may be 20h since decreased soon }
    {@F3} {0} movzx ebp,byte ptr [esi]
    {1-} mov ecx,edx
    {2} sub edx,(24+1)
    {0} shl ebp,cl
    {1} shr edx,1Fh
    {@00} {2} add esi,edx
    {0} lea edx,[ecx+edx*08h]
    {1} or ebx,ebp
    {2**} cmp dl,al
    {0**} jb @read__raw
    @createDISTcode:
    { } movzx ebp,bl
    { } movzx ecx,bh
    {@11} { } shr eax,10h
    { } { x1 } mov ebp,dword ptr es:[REVERSE2hi+ebp*04h]
    { } { x1 } add ebp,dword ptr es:[REVERSE2lo+ecx*04h]
    {@24} { -} mov ecx,eax
    { } mov eax,offset(EXT_DMINMAXCODE)
    @seekDIST:
    {0**} cmp [eax+ecx*08h+00h+_MAX],ebp
    {1**} jnbe @foundDIST
    {@90} {2} cmp [eax+ecx*08h-08h+_MAX],ebp
    {0} lea ecx,[ecx-02h]
    {1} jbe @seekDIST
    { } add ecx,01h
    @foundDIST:
    {0} mov eax,[eax+ecx*08h+_MIN]
    {@A0} {1} shr ebp,cl
    {2} mov eax,[eax+ebp*04h]
    {0} neg ecx
    {1} add ecx,10h
    {2} sub edx,ecx
    {0} shr ebx,cl
    {1**} cmp al,00h
    {@70} {2**} js @xxx
    {0} jz @yyy
    @DISTbits: { BPOS may be 20h since decreased soon }
    {0} movzx ebp,byte ptr [esi]
    {1-} mov ecx,edx
    {2} sub edx,(24+1)
    {@80} {0} shr edx,1Fh
    {1} add esi,edx
    {2} lea edx,[ecx+edx*08h]
    {0} shl ebp,cl
    {1} or ebx,ebp
    {2**} cmp edx,16
    {0**} jb @DISTbits
    @srcposition:
    {@91} {1} movzx ecx,ax
    {2} sar eax,10h
    {0} mov ebp,dword ptr [OFF+ecx*04h]
    {1} sub edx,ecx
    {@A0} {2} add eax,edi
    {0} and ebp,ebx
    {1} shr ebx,cl
    @copy: ...

Aktív témák