Aktív témák

  • P.H.

    senior tag

    Továbbra is egy szálas program.

    Core2 (2.5 GHz): 60 sec alatt 507000 mátrix
    Sandy Bridge (G1620 2.7 GHz): a K10-es ciklusverzióval 59 sec, a Core2-essel 60 sec

    Akkor is meg fogja oldani 2500 MHz-en bármely Core egy szálon ezt a feladatot; ha kell, fél évig mászok fel hetente 1000 mátrixot, de meg fogja.

    {@04-} { x1 } movsx ecx,byte ptr es:[edx]
    {1-} xor eax,eax
    {2-} mov esi,ebp
    {0} and esi,-8
    @init:
    {@0F} mov [edi+esi*08h+(00h*08h)+__0STARROW],eax
    {1} mov [edi+esi*08h+(04h*08h)+__0STARROW],eax
    {2*} add esi,08h
    {0*} jnz @init { clears ESI register }
    { } add edx,01h
    { -} mov ebx,ebp
    @@ARGUMENT: { K10:2.6 Core2:2.9 - 3.3 uop/clk - 1640*2+6550 }
    {@20} cmp ecx,esi { 4 AGU + 9 EX uops on Kaveri }
    {1} lea eax,[ebx+ecx*04h] { 3 clk 8 ALU ops on Core 2 }
    {2} movsx ecx,[edx]
    {0} lea edx,[edx+01h] { db $8D,$52,$00 }
    {1} mov [edi+eax*08h+__0STARROW],ebx { __0COUNTER <- EBP }
    {2} cmovs eax,esi
    {0} mov [edi+ebp*08h+__FIXEDROW],eax
    {1*} add ebp,04h
    {2*} jnz @@ARGUMENT { clears EBP register }
    { -} mov eax,edi
    { -} mov ebp,ebx
    { -} xor ecx,ecx
    {@40} add esp,ebx
    { } lea edx,[ebx-04h]
    @@REDUCE_ROWS:
    {@45} mov [edi+edx*08h+__ROWMODIFIER],ecx
    {1} mov esi,[edi+edx*08h+(04h*08h)+__FIXEDROW]
    {2*} add edx,04h
    {0*} jz @@REDUCE_COLUMNS
    {1} mov [edi+edx*08h+__0STAR],esi
    {2-} xor ecx,ecx
    {0} sub eax,ebp
    {1**} test esi,esi
    {2**} js @@REDUCE_ROWS
    {0-} mov ecx,ebp
    {@60} @findrowmin: { 2 AGU + 5 EX uops on Kaveri }
    {0} mov esi,[eax+ebx] { 2 clk 6 ALU ops on Core 2 }
    {1} or esi,[edi+ebx*08h+__0STARROW]
    {2} cmp esi,ecx
    {0} cmovb ecx,esi
    {1*} add ebx,04h
    {2*} jnz @findrowmin
    {@70-} mov ebx,ebp
    { } neg ecx
    { } jle @@REDUCE_ROWS
    @@ABNORMAL_EXIT:
    {@76} or edx,0FFFFFFFFh
    {1} sub esp,ebp
    {@7E} mov esi,[esp+__MARKS]
    {0} mov [esi+TRESULT.OPTIMUM],edx
    {1} mov ebx,[esi+TRESULT.NEXTIVALUE]
    {2} jmp dword ptr [esp+_INVALIDRESULT]
    { x4 } xor eax,eax; xor edx,edx
    {@90} @initcol:
    {0} mov [edi+__INITCOL],ecx
    {1-} mov esi,ebp
    {2} neg ebp
    {0} push ebp
    {1} or ebx,-1
    {2} jmp @@1ST_STEP { long jump instruction }
    {@A0} { x2 } xor eax,eax
    {@A2} @free0col:
    { } lea ecx,[edx-04h]
    {@A5} @setcolmod:
    { } mov [edi+edx*08h+__COLMODIFIER],esi
    @@REDUCE_COLUMNS: { no need to initialize -initcol in ECX }
    {0**} cmp edx,ebp
    {1**} jz @initcol
    {0} sub edx,04h
    {@B0-} xor esi,esi
    {1**} test [edi+edx*08h+__0STARROW],ebp
    {2**} js @setcolmod
    { } lea ebx,[edi+edx]
    { -} mov ecx,ebp
    { -} mov eax,ebp
    { } sub ebx,ebp
    {@C0} @findcolmin:
    {0} mov esi,[ebx] { 3 AGU + 8 EX uops on Kaveri }
    {1} add esi,[edi+ecx*08h+__ROWMODIFIER] { 3 clk 9 ALU ops on Core 2 }
    {2} or esi,[edi+ecx*08h+__FIXEDROW]
    {0} jz @test0row
    {1} sub ebx,ebp
    {2} cmp esi,eax
    {@D0} cmovb eax,esi
    {1*} add ecx,04h
    {2*} jnz @findcolmin
    { } lea ecx,[ebp-04h]
    { -} mov esi,eax
    { } lea ebx,[edi+edx]
    {@E0**} test eax,eax { JS/JNS can only fuse with TEST }
    { **} js @@ABNORMAL_EXIT
    {@E4} @seekcol0:
    {0} mov eax,[edi+ecx*08h+(04h*08h)+__ROWMODIFIER]
    {1*} add ecx,04h
    {2*} jz @free0col
    {0} sub ebx,ebp
    {1} add eax,[ebx]
    {@F1**} cmp eax,esi { maximum data value = 00FFFFFFh -> marked elements stay negative }
    {0**} jnz @seekcol0
    @test0row:
    { **} test [edi+ecx*08h+__0STAR],ebp
    { **} js @seekcol0
    { } mov [edi+edx*08h+__0STARROW],ecx
    {@FE} mov [edi+ecx*08h+__0STAR],edx
    {@02} jns @free0col { forced conditional jump for Sandy Bridge }
    { ----------------------------------------------------------------------------------------------- }
    {@04} { x12 } mov eax,00000000h; mov edx,00000000h; xor ecx,ecx
    {@10} { x2 } xor ebp,ebp
    @@5TH_STEP: { K10:2.5 Core2:2.4 - 2.8 uop/clk - 1900*2+4800
    {@12} mov eax,[edi+__INITCOL] { lea ebx,[ebp-04h] }
    {1} mov esi,[esp+__SIZE]
    {2} add eax,04h
    {0} movsx ebx,word ptr [edi+__MINCOLROW]
    {@20} @DEC5_free_col: { 3 AGU + 6 EX uops on Kaveri }
    {0} mov ebp,[edi+eax*08h+__COLMARK] { 2 clk 5 ALU ops on Core 2 }
    {1} sar ebp,1Fh
    {2} and ebp,edx
    {0} add [edi+eax*08h+__COLMODIFIER],ebp
    {1*} add eax,04h
    {@30*} jnz @DEC5_free_col { clears EAX register }
    { } mov eax,[esp+__SIZE+esi*04h]
    { } movsx ecx,word ptr [edi+__MINCOLROW+02h]
    { } jmp @INC5_marked_row
    { x4 } xor ebp,ebp; xor esi,esi
    {@40} @inc5row:
    {0} add [edi+eax*08h+__ROWMODIFIER],edx { 4 AGU + 4 EX uops on Kaveri }
    {1-} mov eax,ebp
    @INC5_marked_row:
    {2} mov ebp,[esp+esi*04h]
    {0*} sub esi,01h
    {1*} jge @inc5row { sets ESI to 0FFFFFFFFh }
    @@3RD_STEP:
    {@4E*} and esi,[edi+ebx*08h+__0STAR]
    {@52*} jz @4TH_STEP { long jump instruction }
    {@58} @re3start:
    { } mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
    { } { x1 } mov ecx,es:[edi+__INITCOL] { lea ecx,es:[ebp-04h] }
    {@60-} mov edx,ebx
    {@62} @mark3row:
    { } mov [esp+__OFFS+eax*04h],ebx
    { -} xor ebx,ebx
    { } mov [edi+esi*08h+__COLMARK],esi { unmark column with negative }
    { } add dword ptr [esp+__SIZE],01h
    {@71} @chk2col:
    {0*} add ecx,04h
    {1*} jz @@5TH_STEP
    {2**} test [edi+ecx*08h+__COLMARK],ecx { STORE FORWARDED from @mark3row }
    {0**} jns @chk2col
    @@2ND_STEP:
    {12} push dword ptr [edi+ecx*08h+__COLMODIFIER]
    {@80} lea eax,[ecx+edi]
    { } sub ebx,ebp
    { } sal ecx,10h
    { } mov esi,[edi+ebx*08h+__ROWMODIFIER]
    {@8C} @ZERO2col: { K10:3.0 Core2:2.5 - 2.9 uop/clk - 1500*2+5600 { 4 AGU + 11 EX uops on Kaveri }
    {0} sub esi,[esp+00h] { 4 clk 13 ALU ops on Core 2 }
    {@8F} add esi,[eax+ebp]
    {C2D} lea eax,[eax+ebp]
    {2} jo @over2flow { overflow: (-x)+(-y)=(+z) or (+x)+(+y)=(-z) }
    {0} or esi,[edi+ebx*08h+__0COLON___ROWMARK]
    {1} jz @zero
    {K10}// lea eax,[eax+ebp]
    {0} cmp esi,edx
    {@9F} cmovb edx,esi
    {@A2} cmovb cx,bx
    @over2flow:
    {0} mov esi,[edi+ebx*08h+(04h*08h)+__ROWMODIFIER]
    {1*} add ebx,04h
    {2*} jnz @ZERO2col
    {@AF} @zero:
    {0} pop eax { add esp,04h } { forces ESP handling to AGU/memory pipe on Kaveri/Core }
    {@B0-} mov eax,ecx
    {2} sar ecx,10h
    {0} cmovnc eax,[edi+__MINCOLROW]
    {1} mov [edi+__MINCOLROW],eax
    {2**} test ebx,ebx
    {0**} jz @chk2col
    {@C0*} add esi,[edi+ebx*08h+__0STAR] { zero found -> ESI=0 }
    {2*} jz @4TH_STEP
    {0} mov eax,[esp+__SIZE]
    {1**} cmp word ptr [edi+__MINCOLROW],bx { STORE FORWARDED }
    {2**} jz @re3start
    {@D0} cmp esi,ecx
    {1} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
    {2} cmovl ecx,esi
    {0*} sub ecx,04h { never clears ECX register }
    {1*} jnz @mark3row { forced conditional jump for Sandy Bridge }
    { x2 } xor esi,esi
    {@E0} { x4 } lea eax,[ebp+ebp+00h]
    @@4TH_STEP: { 5 AGU + 3 EX uops on Kaveri }
    {@E4-} mov ebx,edx
    @4TH_STEP:
    {@E6} mov edx,[edi+ecx*08h+__0STARROW]
    {2} mov [edi+ebx*08h+__0STAR],ecx
    {0} mov [edi+ecx*08h+__0STARROW],ebx
    {@F0} mov ecx,[edi+edx*08h+__0COLON___ROWMARK]
    {2**} cmp edx,00h
    {0**} jnz @@4TH_STEP
    { } sub esi,ebp
    { } sub edx,ebp
    { } lea ecx,[esi-04h] { mov ecx,[edi+__INITCOL] }
    @@1ST_STEP: { K10:2.8 Core2:2.9 - 3.2 uop/clk - 1500*2+6100 }
    {@00} mov eax,[edi+esi*08h+__0STARROW] { 4 AGU + 7 EX uops on Kaveri }
    {1} and ebx,eax { 2 clk 6 ALU ops on Core 2 }
    {2} not eax
    {0} mov [edi+esi*08h+__COLMARK],eax
    {1} mov eax,[edi+esi*08h+__FIXEDROW]
    {2} cmovs ecx,esi
    {0} mov [edi+esi*08h+__0COLON___ROWMARK],eax
    {1*} add esi,04h
    {2*} jnz @@1ST_STEP { clears ESI register }
    { } mov [esp+__SIZE],esi
    { -} xor ebx,ebx
    {@21*} add ecx,04h { long jump instruction }
    { *} jnz @@2ND_STEP { ===>>> EBX: 00h EDX:negative ECX:initcol (>= EBP) }
    { } mov esi,[esp+ebp+04h+__MARKS]
    { -} mov ebx,edi { work matrix unmodified } { [esp+__SAVE] }
    @@results:

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    válasz P.H. #101 üzenetére

    Eheti 1000-mátrix-hetente rovat :)

    Core2 (2.5 GHz): 60 sec alatt 510000 mátrix

    {@04-} { x1 } movsx ebx,byte ptr es:[edx]
    {1-} xor eax,eax
    {2-} mov esi,ebp
    {0} and esi,-8
    @init:
    {@0F} mov [edi+esi*08h+(00h*08h)+__0STARROW],eax
    {1} mov [edi+esi*08h+(04h*08h)+__0STARROW],eax
    {2*} add esi,08h
    {0*} jnz @init { clears ESI register }
    { } add edx,01h
    { -} mov ecx,ebp
    @@ARGUMENT: { K10:2.6 Core2:2.9 - 3.3 uop/clk - 1640*2+6550 }
    {@20} cmp ebx,esi { 4 AGU + 9 EX uops on Kaveri }
    {1} lea eax,[ebp+ebx*04h+00h] { 3 clk 8 ALU ops on Core 2 }
    {2} movsx ebx,[edx]
    {0} lea edx,[edx+01h]
    {1} mov [edi+eax*08h+__0STARROW],ebp { __0COUNTER <- EBP }
    {2} cmovs eax,esi
    {0} mov [edi+ecx*08h+__FIXEDROW],eax
    {1*} add ecx,04h
    {2*} jnz @@ARGUMENT { clears ECX register }
    { } add esp,ebp
    { -} mov eax,edi
    { -} push ebp
    {@40-} lea edx,[ebp-04h]
    @@REDUCE_ROWS:
    {@43} mov [edi+edx*08h+__ROWMODIFIER],ecx
    {1} mov esi,[edi+edx*08h+(04h*08h)+__FIXEDROW]
    {2*} add edx,04h
    {0*} jz @@REDUCE_COLUMNS
    {@50} mov [edi+edx*08h+__0STAR],esi
    {2-} xor ecx,ecx
    {0} sub eax,ebp
    {1**} test esi,esi { JS/JNS can only fuse with TEST }
    {2**} js @@REDUCE_ROWS
    { -} mov ebx,ebp { EBX < 0 for even minimum }
    { } mov ecx,[eax+ebp]
    {@61} or ecx,[edi+ebp*08h+__0STARROW]
    { } and ebp,04h
    { } add ebp,ebx
    {@69} @findrowmin: { K10:2.8 Core2:2.2 - 2.6 uop/clk - 1100*2+5000 }
    {0} mov esi,[eax+ebp+00h] { 4 AGU + 8 EX uops on Kaveri }
    {1} or esi,[edi+ebp*08h+(00h*08h)+__0STARROW] { 3 clk 10 ALU ops on Core 2 }
    {2} add ebp,08h
    {@72} cmp esi,ebx
    {1} cmovb ebx,esi
    {2} mov esi,[eax+ebp-04h]
    {0} or esi,[edi+ebp*08h-(04h*08h)+__0STARROW]
    {1} cmp esi,ecx
    {@81} cmovb ecx,esi
    {0**} test ebp,ebp
    {1**} jnz @findrowmin
    { } mov ebp,[esp+00h]
    { } cmp ebx,ecx
    { } cmovb ecx,ebx
    {@90} neg ecx
    { } jle @@REDUCE_ROWS
    @@ABNORMAL_EXIT:
    {@94} pop eax
    {1} sub esp,ebp
    {2} mov edx,0FFFFFFFFh
    {0} mov esi,[esp+__MARKS]
    {@A0} mov [esi+TRESULT.OPTIMUM],edx
    {2} mov ebx,[esi+TRESULT.NEXTIVALUE]
    {0} jmp dword ptr [esp+_INVALIDRESULT]
    { } { x6 } test ebp,0FFFFFFFFh
    {@90} @initcol:
    {0} neg dword ptr [esp+00h]
    {1-} mov esi,ebp
    {2} neg ebp
    {0} mov [edi+__INITCOL],ecx
    {1} or ebx,-1
    {2} jmp @@1ST_STEP { long jump instruction }
    {@A2} @free0col:
    { } lea ecx,[edx-04h]
    {@A5} @setcolmod:
    { } mov [edi+edx*08h+__COLMODIFIER],esi
    @@REDUCE_COLUMNS:
    {1**} jz @initcol
    {0} sub edx,04h
    {@B0-} xor esi,esi
    {1**} test [edi+edx*08h+__0STARROW],ebp
    {2**} js @setcolmod
    { } lea ebx,[edi+edx]
    { -} mov ecx,ebp
    { -} mov eax,ebp
    { } sub ebx,ebp
    {@C0} @findcolmin: { K10:3.0 Core2:_._ - _._ uop/clk - ____*2+____
    {0} mov esi,[ebx] { 3 AGU + 8 EX uops on Kaveri }
    {1} add esi,[edi+ecx*08h+__ROWMODIFIER] { 3 clk 9 ALU ops on Core 2 }
    {2} or esi,[edi+ecx*08h+__FIXEDROW]
    {0} jz @test0row
    {1} sub ebx,ebp
    {2} cmp esi,eax
    {@D0} cmovb eax,esi
    {1*} add ecx,04h
    {2*} jnz @findcolmin
    { } lea ecx,[ebp-04h]
    { -} mov esi,eax
    { } lea ebx,[edi+edx]
    {@E0**} test eax,eax { JS/JNS can only fuse with TEST }
    { **} js @@ABNORMAL_EXIT
    {@E4} @seekcol0:
    {0} mov eax,[edi+ecx*08h+(04h*08h)+__ROWMODIFIER]
    {1*} add ecx,04h
    {2*} jz @free0col
    {0} sub ebx,ebp
    {1} add eax,[ebx]
    {@F1**} cmp eax,esi { maximum data value = 00FFFFFFh -> marked elements stay negative }
    {0**} jnz @seekcol0
    @test0row:
    { **} test [edi+ecx*08h+__0STAR],ebp
    { **} js @seekcol0
    { } mov [edi+edx*08h+__0STARROW],ecx
    {@FE} mov [edi+ecx*08h+__0STAR],edx
    {@02} jns @free0col { forced conditional jump for Sandy Bridge }
    { ----------------------------------------------------------------------------------------------- }
    {@04} { x12 } mov eax,00000000h; mov edx,00000000h; xor ebp,ebp
    {@10} { x5 } mov ecx,00000000h
    @@5TH_STEP: { K10:2.6 Core2:2.4 - 2.8 uop/clk - 2000*2+5100
    {@15} mov eax,[edi+__INITCOL] { lea eax,[ebp+04h]; neg eax }
    {1} mov esi,[esp+__SIZE]
    {2} movsx ebx,word ptr [edi+__MINCOLROW]
    {@20} @DEC5_free_col: { 3 AGU + 6 EX uops on Kaveri }
    {0} add [edi+eax*08h+__COLMODIFIER],ecx { 2 clk 5 ALU ops on Core 2 }
    {1} mov ecx,[edi+eax*08h+(04h*08h)+__COLMARK]
    {2} sar ecx,1Fh
    {0} and ecx,edx
    {1*} add eax,04h
    {@30*} jnz @DEC5_free_col { clears EAX register [NOT USED] }
    { } mov eax,[esp+__SIZE+esi*04h]
    { } movsx ecx,word ptr [edi+__MINCOLROW+02h]
    { } jmp @INC5_marked_row
    { x4 } xor ebp,ebp; xor esi,esi
    {@40} @inc5row:
    {0} add [edi+eax*08h+__ROWMODIFIER],edx { 4 AGU + 4 EX uops on Kaveri }
    {1-} mov eax,ebp
    @INC5_marked_row:
    {2} mov ebp,[esp+esi*04h]
    {0*} sub esi,01h
    {1*} jge @inc5row { sets ESI to 0FFFFFFFFh }
    @@3RD_STEP:
    {@4E*} and esi,[edi+ebx*08h+__0STAR]
    {@52*} jz @4TH_STEP { long jump instruction }
    {@58} @re3start:
    { } mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
    { } { x1 } mov ecx,es:[edi+__INITCOL] { lea ecx,es:[ebp-04h] }
    {@60-} mov edx,ebx
    {@62} @mark3row:
    { } mov [esp+__OFFS+eax*04h],ebx
    { -} xor ebx,ebx
    { } mov [edi+esi*08h+__COLMARK],esi { unmark column with negative }
    { } inc eax
    { } mov [esp+__SIZE],eax
    {@71} @chk2col:
    {0*} add ecx,04h
    {1*} jz @@5TH_STEP { clears ECX register }
    {2**} test [edi+ecx*08h+__COLMARK],ecx { STORE FORWARDED from @mark3row }
    {0**} jns @chk2col
    @@2ND_STEP:
    {12} push dword ptr [edi+ecx*08h+__COLMODIFIER]
    {@80} lea eax,[ecx+edi]
    { } sub ebx,ebp
    { } sal ecx,10h
    { } mov esi,[edi+ebx*08h+__ROWMODIFIER]
    {@8C} @ZERO2col: { K10:3.0 Core2:2.5 - 2.9 uop/clk - 1500*2+5600 { 4 AGU + 11 EX uops on Kaveri }
    {0} sub esi,[esp+00h] { 4 clk 13 ALU ops on Core 2 }
    {@8F} add esi,[eax+ebp]
    {C2D} lea eax,[eax+ebp] { Core 2, Kaveri }
    {2} jo @over2flow { overflow: (-x)+(-y)=(+z) or (+x)+(+y)=(-z) }
    {0} or esi,[edi+ebx*08h+__0COLON___ROWMARK]
    {1} jz @zero
    {K10}// lea eax,[eax+ebp] { K10, Sandy Bridge, Ivy Bridge }
    {0} cmp esi,edx
    {@9F} cmovb edx,esi
    {@A2} cmovb cx,bx
    @over2flow:
    {0} mov esi,[edi+ebx*08h+(04h*08h)+__ROWMODIFIER]
    {1*} add ebx,04h
    {2*} jnz @ZERO2col { clears EBX register }
    {@AF} @zero:
    {0} pop eax { add esp,04h } { forces ESP handling to AGU/memory pipe on Kaveri/Core }
    {@B0-} mov eax,ecx
    {2} sar ecx,10h
    {0} cmovnc eax,[edi+__MINCOLROW]
    {1} mov [edi+__MINCOLROW],eax
    {2**} test ebx,ebx
    {0**} jz @chk2col
    {@C0*} add esi,[edi+ebx*08h+__0STAR] { zero found -> ESI=0 }
    {2*} jz @4TH_STEP
    {0} cmp ax,bx
    {1} { x1 } mov eax,ss:[esp+__SIZE]
    {2} jz @re3start
    {@D0} cmp esi,ecx
    {1} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
    {2} cmovl ecx,esi
    {0*} sub ecx,04h { never clears ECX register }
    {1*} jnz @mark3row { forced conditional jump for Sandy Bridge }
    { x2 } xor esi,esi
    {@E0} { x4 } lea eax,[ebp+ebp+00h]
    @@4TH_STEP: { 5 AGU + 3 EX uops on Kaveri }
    {@E4-} mov ebx,edx { 2 clk 2 ALU ops on Core 2 }
    @4TH_STEP:
    {@E6} mov edx,[edi+ecx*08h+__0STARROW]
    {2} mov [edi+ebx*08h+__0STAR],ecx
    {0} mov [edi+ecx*08h+__0STARROW],ebx
    {@F0} mov ecx,[edi+edx*08h+__0COLON___ROWMARK]
    {2**} cmp edx,00h
    {0**} jnz @@4TH_STEP { clears EDX register }
    { } sub esi,ebp
    { } sub edx,ebp
    { } lea ecx,[esi-04h] { mov ecx,[edi+__INITCOL] }
    @@1ST_STEP: { K10:2.8 Core2:2.9 - 3.2 uop/clk - 1500*2+6100 }
    {@00} mov eax,[edi+esi*08h+__0STARROW] { 4 AGU + 7 EX uops on Kaveri }
    {1} and ebx,eax { 3 clk 6 ALU ops on Core 2 }
    {2} not eax
    {0} mov [edi+esi*08h+__COLMARK],eax
    {1} mov eax,[edi+esi*08h+__FIXEDROW]
    {2} cmovs ecx,esi
    {0} mov [edi+esi*08h+__0COLON___ROWMARK],eax
    {1*} add esi,04h
    {2*} jnz @@1ST_STEP { clears ESI register }
    { } mov [esp+__SIZE],esi
    { -} xor ebx,ebx
    {@21*} add ecx,04h { long jump instruction }
    { *} jnz @@2ND_STEP { ===>>> EBX: 00h EDX:negative ECX:initcol (>= EBP) }
    { } mov esi,[esp+ebp+04h+__MARKS]
    { -} mov ebx,edi { work matrix unmodified } { [esp+__SAVE] }
    @@results:
    {@30} mov eax,[edi+edx*08h+__0STAR] { 3 AGU + 8 EX uops on Kaveri }
    {1} add ebx,ebp
    {2} add ecx,[ebx+eax]
    {0} add eax,ebp
    {1} shr eax,02h
    {2} mov [esi],al
    {@40} add esi,01h
    {1*} add edx,04h
    {2*} jnz @@results { clears EDX register ( DL=0 as head, DH=0 as length ) }
    {0} pop eax
    {1} add esp,ebp
    {2} neg ebp
    {0} or eax,-1
    {@50} lea ebx,[edi+ebp*04h]
    {1} sar ebp,02h
    {2} mov [esi+ebp+TRESULT.OPTIMUM],ecx
    {0} add esi,ebp
    {1-} xor ecx,ecx
    {2} jmp @onchain

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    válasz P.H. #102 üzenetére

    Core2 (2.5 GHz): 59 sec alatt megoldja a feladatot
    K10 (2.9 GHz): Core2-nek tetsző ciklusverzióval 46 sec oldja meg a feladatot
    Prescott (2.26 GHz): 60 sec alatt 220000 mátrix

    {@04-} { x1 } movsx ebx,byte ptr es:[edx]
    {1-} xor eax,eax
    {2-} mov esi,ebp
    {0} and esi,-8
    @init:
    {@0F} mov [edi+esi*08h+(00h*08h)+__0STARROW],eax
    {1} mov [edi+esi*08h+(04h*08h)+__0STARROW],eax
    {2*} add esi,08h
    {0*} jnz @init { clears ESI register }
    { } add edx,01h
    { -} mov ecx,ebp
    @@ARGUMENT: { K10:2.6 Core2:2.9 - 3.3 uop/clk - 1640*2+6550 }
    {@20} cmp ebx,esi { 4 AGU + 9 EX uops on Kaveri }
    {1} lea eax,[ebp+ebx*04h+00h] { 3 clk 8 ALU ops on Core 2 }
    {2} movsx ebx,[edx]
    {0} lea edx,[edx+01h]
    {1} mov [edi+eax*08h+__0STARROW],ebp { __0COUNTER <- EBP }
    {2} cmovs eax,esi
    {0} mov [edi+ecx*08h+__FIXEDROW],eax
    {1*} add ecx,04h
    {2*} jnz @@ARGUMENT { clears ECX register }
    { -} { x2 } xor ecx,ecx
    { -} mov eax,edi
    { -} push ebp
    {@40-} lea edx,[ebp-04h]
    @@REDUCE_ROWS:
    {@43} mov [edi+edx*08h+__ROWMODIFIER],ecx
    {1} mov esi,[edi+edx*08h+(04h*08h)+__FIXEDROW]
    {2*} add edx,04h
    {0*} jz @@REDUCE_COLUMNS
    {@50} mov [edi+edx*08h+__0STAR],esi
    {2-} xor ecx,ecx
    {0} sub eax,ebp
    {1**} test esi,esi { JS/JNS can only fuse with TEST }
    {2**} js @@REDUCE_ROWS
    { -} mov ebx,ebp { EBX < 0 for even minimum }
    { } mov ecx,[eax+ebp]
    {@61} or ecx,[edi+ebp*08h+__0STARROW]
    { } and ebp,04h
    { } add ebp,ebx
    {@69} @findrowmin: { K10:2.8 Core2:2.2 - 2.6 uop/clk - 1100*2+5000 }
    {0} mov esi,[eax+ebp+00h] { 4 AGU + 8 EX uops on Kaveri }
    {1} or esi,[edi+ebp*08h+(00h*08h)+__0STARROW] { 3 clk 10 ALU ops on Core 2 }
    {2} add ebp,08h
    {@72} cmp esi,ebx
    {1} cmovb ebx,esi
    {2} mov esi,[eax+ebp-04h]
    {0} or esi,[edi+ebp*08h-(04h*08h)+__0STARROW]
    {1} cmp esi,ecx
    {@81} cmovb ecx,esi
    {0**} test ebp,ebp
    {1**} jnz @findrowmin
    { } mov ebp,[esp+00h]
    { } cmp ebx,ecx
    { } cmovb ecx,ebx
    {@90} neg ecx
    { } jle @@REDUCE_ROWS
    { -} nop
    @@ABNORMAL_EXIT:
    {@95} pop eax
    {1} or edx,-1
    {2} mov esi,[esp+__MARKS]
    {0} mov [esi+TRESULT.OPTIMUM],edx
    {@A0} mov ebx,[esi+TRESULT.NEXTIVALUE]
    {2} jmp dword ptr [esp+_INVALIDRESULT]
    { } { x6 } test ebp,0FFFFFFFFh
    {@AD} @init0col:
    {0} mov [edi+__INITCOL],ecx
    {@B0-} mov esi,ebp
    {2} neg ebp
    {0} or ebx,-1
    {1*} sub ecx,04h
    {2*} jnz @@1ST_STEP { long jump instruction } { forced conditional jump for Sandy Bridge }
    {@C0} { x3 } cmp ebp,00h
    {@C3} @free0col:
    { -} mov ecx,edx
    {@C5} @setcolmod:
    { } mov [edi+edx*08h+__COLMODIFIER],esi
    @@REDUCE_COLUMNS: { no need to initialize -initcol in ECX }
    {0**} cmp edx,ebp
    {1**} jz @init0col
    {0} sub edx,04h
    {@D0-} xor esi,esi
    {1**} test [edi+edx*08h+__0STARROW],ebp
    {2**} js @setcolmod
    { } lea ebx,[edi+edx]
    { -} mov ecx,ebp
    { -} mov eax,ebp
    { } sub ebx,ebp
    {@E0} @findcolmin: { K10:3.0 Core2:_._ - _._ uop/clk - ____*2+____
    {0} mov esi,[ebx] { 3 AGU + 8 EX uops on Kaveri }
    {1} add esi,[edi+ecx*08h+__ROWMODIFIER] { 3 clk 9 ALU ops on Core 2 }
    {2} or esi,[edi+ecx*08h+__FIXEDROW]
    {0} jz @test0row
    {1} sub ebx,ebp
    {2} cmp esi,eax
    {@F0} cmovb eax,esi
    {1*} add ecx,04h
    {2*} jnz @findcolmin
    { } lea ecx,[ebp-04h]
    { -} mov esi,eax
    { } lea ebx,[edi+edx]
    {@00**} test eax,eax { JS/JNS can only fuse with TEST }
    { **} js @@ABNORMAL_EXIT
    {@04} @seekcol0:
    {0} mov eax,[edi+ecx*08h+(04h*08h)+__ROWMODIFIER]
    {1*} add ecx,04h
    {2*} jz @free0col
    {0} sub ebx,ebp
    {1} add eax,[ebx]
    {@11**} cmp eax,esi { maximum data value = 00FFFFFFh -> marked elements stay negative }
    {0**} jnz @seekcol0
    @test0row:
    { **} test [edi+ecx*08h+__0STAR],ebp
    { **} js @seekcol0
    { } mov [edi+edx*08h+__0STARROW],ecx
    {@1E} mov [edi+ecx*08h+__0STAR],edx
    {@22} jns @free0col { forced conditional jump for Sandy Bridge }
    { ----------------------------------------------------------------------------------------------- }
    {@24} { x12 } test ebp,0FFFFFFFFh; test edi,0FFFFFFFFh
    {@30} { x9 } mov ecx,00000000h; xor esi,esi; xor edi,edi
    @@5TH_STEP: { K10:2.6 Core2:_._ - _._ uop/clk - ____*2+____
    {@39} mov ecx,[edi+__MINCOLROW]
    { } sub ebx,ebp
    { } neg edx
    {@40} @DEC5_free_col: { 5 AGU + 11 EX uops on Kaveri }
    {0} mov eax,[edi+ebx*08h+__COLMARK] { 3 clk 8 ALU ops on Core 2 }
    {1} sar eax,1Fh
    {2} mov [edi+ebx*08h+__COLMARK],eax
    {0} and eax,edx
    {1} sub [edi+ebx*08h+__COLMODIFIER],eax
    {@51} mov eax,[edi+ebx*08h+__0COLON___ROWMARK]
    {0} sar eax,1Fh
    {1} and eax,edx
    {2} sub [edi+ebx*08h+__ROWMODIFIER],eax
    {0*} add ebx,04h
    {@61*} jnz @DEC5_free_col { clears EBX register [NOT USED] }
    {@63} movsx ebx,cx
    {1} sar ecx,10h
    {2} mov esi,[edi+ebx*08h+__0STAR]
    {0**} cmp esi,00h
    {@70**} jz @4TH_STEP { long jump instruction }
    {2} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
    {0} mov dword ptr [edi+esi*08h+__COLMARK],0FFFFFFFFh { unmark column with -1 }
    {1} mov esi,[edi+__INITCOL]
    {@85} @mark3row:
    { -} xor ebx,ebx
    { } lea ecx,[esi-04h]
    { } jmp @chk2col
    @pass2col:
    { } mov [edi+ecx*08h+__COLMARK],ecx { re-mark column with column index <> -1 }
    {@90} @chk2col:
    {0*} add ecx,04h
    {1*} jz @@5TH_STEP { clears ECX register }
    {2**} cmp [edi+ecx*08h+__COLMARK],ecx
    {0**} jbe @chk2col
    @@2ND_STEP:
    { } lea eax,[ecx+edi]
    { } sub ebx,ebp
    @continue:
    {@A0} { x1 } push dword ptr es:[edi+ecx*08h+__COLMODIFIER]
    { } sal ecx,10h
    { } mov esi,[edi+ebx*08h+__ROWMODIFIER]
    {@AC} @ZERO2col: { K10:3.0 Core2:2.5 - 2.9 uop/clk - 1500*2+5600 { 4 AGU + 11 EX uops on Kaveri }
    {0} sub esi,[esp+00h] { 4 clk 13 ALU ops on Core 2 }
    {@AF} add esi,[eax+ebp]
    {C2D} lea eax,[eax+ebp]
    {2} jo @over2flow { overflow: (-x)+(-y)=(+z) or (+x)+(+y)=(-z) }
    {0} or esi,[edi+ebx*08h+__0COLON___ROWMARK]
    {1} jz @@3RD_STEP
    {K10}// lea eax,[eax+ebp]
    {0} cmp esi,edx
    {@BF} cmovb edx,esi
    {@C2} cmovb cx,bx
    @over2flow:
    {0} mov esi,[edi+ebx*08h+(04h*08h)+__ROWMODIFIER]
    {1*} add ebx,04h
    {2*} jnz @ZERO2col { clears EBX register }
    @@3RD_STEP:
    {@CF} pop esi { add esp,04h } { enforces ESP handling to AGU/load pipe on Kaveri/Core }
    {@D0-} mov esi,ecx
    {2} sar ecx,10h
    {0} cmovnc esi,[edi+__MINCOLROW]
    {1} mov [edi+__MINCOLROW],esi
    {2**} test ebx,ebx
    {0**} jz @pass2col
    {@E0} mov esi,[edi+ebx*08h+__0STAR]
    {2**} test esi,esi
    {0**} jz @4TH_STEP
    {1} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
    {2} or dword ptr [edi+esi*08h+__COLMARK],-1 { unmark column with -1 }
    {@F1**} cmp word ptr [edi+__MINCOLROW],bx
    {1**} jz @re2start
    {2**} cmp esi,ecx { jb = jl for 2 negative numbers }
    {0**} jb @mark3row
    {1*} add ebx,04h
    {2*} jnz @continue
    {@00} jmp @pass2col
    {1} { x2 } xor eax,eax
    {@04} @re2start:
    {0} mov ecx,[edi+__INITCOL]
    {1-} mov ebx,ebp
    {2} neg ebx
    @initcol:
    {0} sar dword ptr [edi+ebx*08h+__COLMARK],1Fh
    {@10*} add ebx,04h
    {2*} jnz @initcol { clears EBX register }
    { } or edx,-1
    { *} sub ecx,04h
    { *} jnz @chk2col { long jump instruction }
    {@20} { x4 } lea eax,[ebp+ebp+00h]
    @@4TH_STEP: { 5 AGU + 3 EX uops on Kaveri }
    {@24-} mov ebx,edx { 2 clk 2 ALU ops on Core 2 }
    @4TH_STEP:
    {@26} mov edx,[edi+ecx*08h+__0STARROW]
    {2} mov [edi+ebx*08h+__0STAR],ecx
    {0} mov [edi+ecx*08h+__0STARROW],ebx
    {@30} mov ecx,[edi+edx*08h+__0COLON___ROWMARK]
    {2**} cmp edx,00h
    {0**} jnz @@4TH_STEP { clears EDX register }
    { } sub esi,ebp
    { } sub edx,ebp
    { } lea ecx,[esi-04h]
    @@1ST_STEP: { K10:2.8 Core2:2.9 - 3.2 uop/clk - 1500*2+6100 }
    {@40} mov eax,[edi+esi*08h+__0STARROW] { 4 AGU + 7 EX uops on Kaveri }
    {1} and ebx,eax { 3 clk 6 ALU ops on Core 2 }
    {2} not eax
    {0} mov [edi+esi*08h+__COLMARK],eax
    {1} mov eax,[edi+esi*08h+__FIXEDROW]
    {2} cmovs ecx,esi
    {0} mov [edi+esi*08h+__0COLON___ROWMARK],eax
    {1*} add esi,04h
    {2*} jnz @@1ST_STEP { clears ESI register }
    { } { x3 } mov ebx,00000000h
    {@60*} add ecx,04h { long jump instruction }
    { *} jnz @@2ND_STEP { ===>>> EBX: 00h EDX:negative = -EBP ECX:initcol (>= EBP) }
    { } { x1 } mov esi,ss:[esp+04h+__MARKS]
    { -} mov ebx,edi { work matrix unmodified } { [esp+__SAVE] }
    @@results:
    {@70} mov eax,[edi+edx*08h+__0STAR] { 3 AGU + 8 EX uops on Kaveri }
    {1} add ebx,ebp
    {2} add ecx,[ebx+eax]
    {0} add eax,ebp
    {1} shr eax,02h
    {2} mov [esi],al
    {@80} add esi,01h
    {1*} add edx,04h
    {2*} jnz @@results { clears EDX register ( DL=0 as head, DH=0 as length ) }

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    Több, mint 1 Kbyte-os (1124 karakter) Java utasítás: arg egész pozitív számot szöveggé konvertálja dest tömbbe i pozíciótól kezdődően, legfejlebb size karakter méretben; 0-k helyett space-ekkel felvezetve, ha a szám kevesebb karakterből áll, mint size.

    public static byte[] inttoSPACEstr(int arg, int size, int i, byte[] dest) {
    int x, j, nr, z;


    dest[(i=(i=(i=(i=(i=(i=(i=(
    i+=((dest[i]=(byte)((((z =(j=((nr=1+(int)(((x=(int)(((arg=(arg^(x=arg>>31))-x)*0x00000000A7C5AC47L)>>>32)>>>16)*0x00000000D1B71759L)>>>30))>>>15)))-1)&0xFFFFFFF0)+j+48)) | (10-1-size))>>>31)
    +(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00007FFF)*5)>>>14)))-1)&0xFFFFFFF0)+j+48)) | (10-2-size)) >>>31))
    +(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00003FFF)*5)>>>13)))-1)&0xFFFFFFF0)+j+48)) | (10-3-size)) >>>31))
    +(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00001FFF)*5)>>>12)))-1)&0xFFFFFFF0)+j+48)) | (10-4-size)) >>>31))
    +(((dest[i]=(byte)((((z|=(j=(( (nr&0x00000FFF)*5)>>>11)))-1)&0xFFFFFFF0)+j+48)) | (10-5-size)) >>>31))
    +(((dest[i]=(byte)((((z|=(j=((nr=1+((int)(((arg-x*100000)*0x00000000D1B71759L)>>>30)))>>>15)))-1)&0xFFFFFFF0)+j+48)) | (10-6-size)) >>>31))
    +(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00007FFF)*5)>>>14)))-1)&0xFFFFFFF0)+j+48)) | (10-7-size)) >>>31))
    +(((dest[i]=(byte)((((z|=(j=((nr=(nr&0x00003FFF)*5)>>>13)))-1)&0xFFFFFFF0)+j+48)) | (10-8-size)) >>>31))
    +(((dest[i]=(byte)((((z| (j=((nr=(nr&0x00001FFF)*5)>>>12)))-1)&0xFFFFFFF0)+j+48)) | (10-9-size)) >>>31)]
    =(byte)((((nr&0x00000FFF)*5)>>>11)+48);


    return dest; }

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    Nincs többé külön K10-nek tetsző és külön Core2-nek tetsző verzió: egyetlen van, amely mindkettőn jó, kb. 0.5% veszteséggel.

    Core2 (2.5 GHz): 58 sec alatt megoldja a feladatot
    K10 (2.9 GHz): 44 sec alatt megoldja a feladatot
    Prescott (2.26 GHz): 60 sec alatt 220000 mátrix
    Northwood (2.4 GHz): 60 sec alatt 248000 mátrix

    {@04-} { x1 } movsx ebx,byte ptr es:[edx]
    {1-} mov esi,ebp
    {2-} xor eax,eax
    {0} and esi,-8
    @init:
    {@0F} mov [edi+esi*08h+(00h*08h)+__0STARROW],eax
    {1} mov [edi+esi*08h+(04h*08h)+__0STARROW],eax
    {2*} add esi,08h
    {0*} jnz @init { clears ESI register }
    { } add edx,01h
    { -} mov ecx,ebp
    @@ARGUMENT: { K10:2.6 Core2:2.9 - 3.3 uop/clk - 1640*2+6550 }
    {@20} cmp ebx,esi { 4 AGU + 9 EX uops on Kaveri }
    {1} lea eax,[ebp+ebx*04h+00h] { 3 clk 8 ALU ops on Core 2 }
    {2} movsx ebx,byte ptr [edx]
    {0} lea edx,[edx+01h] { db $8D,$52,$00 }
    {1} mov [edi+eax*08h+__0STARROW],ebp { __0COUNTER <- EBP }
    {2} cmovs eax,esi
    {0} mov [edi+ecx*08h+__FIXEDROW],eax
    {1*} add ecx,04h
    {2*} jnz @@ARGUMENT { clears ECX register }
    { -} { x2 } xor ecx,ecx
    { -} mov eax,edi
    { -} push ebp
    {@40-} lea edx,[ebp-04h]
    @@REDUCE_ROWS:
    {@43} mov [edi+edx*08h+__ROWMODIFIER],ecx
    {1} mov esi,[edi+edx*08h+(04h*08h)+__FIXEDROW]
    {2*} add edx,04h
    {0*} jz @@REDUCE_COLUMNS
    {@50} mov [edi+edx*08h+__0STAR],esi
    {2-} xor ecx,ecx
    {0} sub eax,ebp
    {1**} test esi,esi { JS/JNS can only fuse with TEST }
    {2**} js @@REDUCE_ROWS
    { -} mov ebx,ebp { EBX < 0 for even minimum }
    { } mov ecx,[eax+ebp]
    {@61} or ecx,[edi+ebp*08h+__0STARROW]
    { } and ebp,04h
    { } add ebp,ebx
    {@69} @findrowmin: { K10:2.8 Core2:2.2 - 2.6 uop/clk - 1100*2+5000 }
    {0} mov esi,[eax+ebp] { 4 AGU + 8 EX uops on Kaveri }
    {1} or esi,[edi+ebp*08h+(00h*08h)+__0STARROW] { 3 clk 10 ALU ops on Core 2 }
    {2} add ebp,08h
    {@72} cmp esi,ebx
    {1} cmovb ebx,esi
    {2} mov esi,[eax+ebp-04h]
    {0} or esi,[edi+ebp*08h-(04h*08h)+__0STARROW]
    {1} cmp esi,ecx
    {@81} cmovb ecx,esi
    {0**} test ebp,ebp
    {1**} jnz @findrowmin
    { } mov ebp,[esp+00h]
    { } cmp ebx,ecx
    { } cmovb ecx,ebx
    {@90} neg ecx
    { } jle @@REDUCE_ROWS
    { -} { x1 } nop
    @@ABNORMAL_EXIT:
    {@95} pop eax
    {1} { x3 } or edx,0FFFFFFFFh
    {2} mov esi,[esp+__MARKS]
    {@A0} mov [esi+TRESULT.OPTIMUM],edx
    {1} mov ebx,[esi+TRESULT.NEXTIVALUE]
    {2} jmp dword ptr [esp+_INVALIDRESULT]
    { x1 } nop
    {@AB} @init0col:
    {0} mov [edi+__INITCOL],ecx
    {1-} mov esi,ebp
    {@B0} neg ebp
    {0} mov ebx,0FFFFFFFFh
    {1*} sub ecx,04h
    {2*} jnz @@1ST_STEP { long jump instruction } { forced conditional jump for Sandy Bridge }
    {@C0} { x2 } xor eax,eax
    {@C2} @free0col:
    { } mov [edi+edx*08h+__COLMODIFIER],esi { no need to initialize __COLMODIFIER of fixed column }
    { -} mov ecx,edx
    {@C8} @next0col:
    { **} cmp edx,ebp
    { **} jz @init0col
    @@REDUCE_COLUMNS: { no need to initialize -initcol in ECX }
    {0} mov eax,[edi+edx*08h-(04h*08h)+__0STARROW]
    {@D0} sub edx,04h
    {2*} sub eax,01h
    {0*} jnc @next0col
    { } { x1 } lea ebx,es:[edi+edx]
    { -} mov ecx,ebp
    { } sub ebx,ebp
    {@E0} @findcolmin: { K10:3.0 Core2:_._ - _._ uop/clk - ____*2+____
    {0} mov esi,[ebx] { 3 AGU + 8 EX uops on Kaveri }
    {1} add esi,[edi+ecx*08h+__ROWMODIFIER] { 3 clk 9 ALU ops on Core 2 }
    {2} or esi,[edi+ecx*08h+__FIXEDROW]
    {0} jz @test0row
    {1} sub ebx,ebp
    {2} cmp esi,eax
    {@F0} cmovb eax,esi
    {1*} add ecx,04h
    {2*} jnz @findcolmin
    { } lea ecx,[ebp-04h]
    { -} mov esi,eax
    { } lea ebx,[edi+edx]
    {@00**} test eax,eax { JS/JNS can only fuse with TEST }
    { **} js @@ABNORMAL_EXIT
    {@04} @seekcol0:
    {0} mov eax,[edi+ecx*08h+(04h*08h)+__ROWMODIFIER]
    {1*} add ecx,04h
    {2*} jz @free0col
    {0} sub ebx,ebp
    {1} add eax,[ebx]
    {@11**} cmp eax,esi { maximum data value = 00FFFFFFh -> marked elements stay negative }
    {0**} jnz @seekcol0
    @test0row:
    { **} test [edi+ecx*08h+__0STAR],ebp { JS/JNS can only fuse with TEST }
    { **} js @seekcol0
    { } mov [edi+edx*08h+__0STARROW],ecx
    {@1E} mov [edi+ecx*08h+__0STAR],edx
    {@22} jns @free0col { forced conditional jump for Sandy Bridge }
    { ----------------------------------------------------------------------------------------------- }
    {@24} { x12 } mov eax,00000000h; mov edx,00000000h; xor ecx,ecx
    {@30} { x5 } mov esi,00000000h
    @@5TH_STEP: { K10:2.2 Core2:2.2 - 2.7 uop/clk - 3050*2+3700 }
    {@35} movsx esi,word ptr [edi+__MINCOLROW+00h]
    { } sub ebx,ebp
    { } movsx eax,word ptr [edi+ebx*08h+__SIGN-(04h*08h)+__COLMARK]
    {@40} @5th_step: { 5 AGU + 11 EX uops on Kaveri }
    {0} movsx ecx,word ptr [edi+ebx*08h+__SIGN+__0COLON___ROWMARK] { 4 clk 6 ALU ops on Core 2 }
    {1} mov [edi+ebx*08h-(04h*08h)+__COLMARK],eax
    {2} and eax,edx
    {0} add [edi+ebx*08h+__COLMODIFIER],eax
    {@4F} and ecx,edx
    {2} movsx eax,word ptr [edi+ebx*08h+__SIGN-(04h*08h)+(04h*08h)+__COLMARK] { __MINCOLROW col }
    {0} add [edi+ebx*08h+__ROWMODIFIER],ecx
    {1*} add ebx,04h
    {2*} jnz @5th_step { clears EBX register }
    {@5F} mov ecx,[edi+__INITCOL]
    {1-} mov edx,esi
    {2} mov esi,[edi+esi*08h+__0STAR]
    {0**} test esi,esi
    {1**} jz @@4TH_STEP { long jump instruction }
    {@70} mov [edi+edx*08h+__0COLON___ROWMARK],eax { set row mark }
    {0} mov dword ptr [edi+esi*08h-(04h*08h)+__COLMARK],-1 { unmark column with -1 }
    {1} jmp @test2col
    { x2 } xor esi,esi
    {@80} { x2 } xor eax,eax
    {@82} @fast2forward:
    { *} add ebx,04h
    { *} jnz @continue
    {@87} @pass2col:
    { } mov [edi+ecx*08h-(04h*08h)+__COLMARK],ecx { re-mark column with its index != -1 }
    {@8B} @next2col:
    {0*} add ecx,04h
    {1*} jz @@5TH_STEP { clears ECX register }
    @test2col:
    {@90**} cmp [edi+ecx*08h-(04h*08h)+__COLMARK],ecx
    {0**} jbe @next2col
    @@2ND_STEP:
    { } sub ebx,ebp { ordered for Core2 }
    { } lea eax,[ecx+edi] { ordered for Core2 }
    {@9B} @continue:
    { } mov esi,[edi+ecx*08h+__COLMODIFIER]
    { } push esi
    {@A0} sal ecx,10h
    { } mov esi,[edi+ebx*08h+__ROWMODIFIER]
    {@A7} @ZERO2col: { K10:3.0 Core2:2.5 - 2.9 uop/clk - 1500*2+5600 { 4 AGU + 11 EX uops on Kaveri }
    {0} sub esi,[esp+00h] { 4 clk 13 ALU ops on Core 2 }
    {1} add esi,[eax+ebp]
    {2} lea eax,[eax+ebp]
    {@B0} jo @over2flow { overflow: (-x)+(-y)=(+z) or (+x)+(+y)=(-z) }
    {1} or esi,[edi+ebx*08h+__0COLON___ROWMARK]
    {2} jz @@3RD_STEP
    {0} cmp esi,edx
    {1} cmovb edx,esi
    {2} cmovb cx,bx
    {@C1} @over2flow:
    {0} mov esi,[edi+ebx*08h+(04h*08h)+__ROWMODIFIER]
    {1*} add ebx,04h
    {2*} jnz @ZERO2col { clears EBX register }
    @@3RD_STEP:
    {@CA} pop esi { add esp,04h } { enforces ESP tracking to AGU/load pipe on Bulldozer/Core }
    {1-} mov esi,ecx
    {2} sar ecx,10h
    {@D0} cmovnc esi,[edi+__MINCOLROW]
    {1} mov [edi+__MINCOLROW],esi
    {2**} { x1 } cmp ebx,00h
    {0**} jz @pass2col
    {1} mov esi,[edi+ebx*08h+__0STAR]
    {@E0**} test esi,esi
    {0**} jz @4TH_STEP
    {1} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
    {2} mov dword ptr [edi+esi*08h-(04h*08h)+__COLMARK],-1 { unmark column with -1 }
    {@F0**} cmp word ptr [edi+__MINCOLROW],bx { STORE FORWARDED }
    {1**} jz @re2start
    {2**} cmp esi,ecx { jb = jl for 2 negative numbers }
    {0**} jae @fast2forward
    {1-} xor ebx,ebx
    {2-} mov ecx,esi
    {0} jmp @@2ND_STEP
    {@00} @re2start:
    {0} mov ecx,[edi+__INITCOL]
    {1-} mov edx,ebx
    {2-} mov ebx,ebp
    {0} neg ebx
    {@09} @init2col:
    {0} movsx eax,word ptr [edi+ebx*08h+__SIGN-(04h*08h)+__COLMARK]
    {1} mov [edi+ebx*08h-(04h*08h)+__COLMARK],eax
    {2*} add ebx,04h
    {0*} jnz @init2col { clears EBX register }
    { } jmp @test2col { long jump instruction }
    { } { x4 } lea eax,[ebp+ebp+00h]
    {@20} { x2 } test edi,edi
    @@4TH_STEP:
    {@22-} mov ecx,eax
    @re4order:
    {@24-} mov ebx,edx
    @4TH_STEP:
    {@26} mov edx,[edi+ecx*08h+__0STARROW]
    {2} mov [edi+ebx*08h+__0STAR],ecx
    {0} mov [edi+ecx*08h+__0STARROW],ebx
    {@30} mov ecx,[edi+edx*08h+__0COLON___ROWMARK]
    {2**} { x1 } cmp edx,00h
    {0**} jnz @re4order { clears EDX register }
    { } sub esi,ebp
    { } sub edx,ebp
    { } lea ecx,[esi-04h]
    @@1ST_STEP: { K10:2.8 Core2:2.9 - 3.2 uop/clk - 1500*2+6100 }
    {@40} mov eax,[edi+esi*08h+__0STARROW] { 4 AGU + 7 EX uops on Kaveri }
    {1} and ebx,eax { clears EBX at uncomplete calculation } { 3 clk 6 ALU ops on Core 2 }
    {2} not eax
    {0} mov [edi+esi*08h-(04h*08h)+__COLMARK],eax
    {1} mov eax,[edi+esi*08h+__FIXEDROW]
    {2} cmovs ecx,esi
    {0} mov [edi+esi*08h+__0COLON___ROWMARK],eax
    {1*} add esi,04h
    {2*} jnz @@1ST_STEP { clears ESI register [NOT USED] }
    { *} add ecx,04h { long jump instruction }
    { *} jnz @@2ND_STEP { ===>>> EBX: 00h EDX:negative = -EBP ECX:initcol (>= EBP) }
    { -} mov ebx,edi { work matrix unmodified } { [esp+__SAVE] }
    { } mov esi,[esp+04h+__MARKS]
    @@results:
    {@6A} mov eax,[edi+edx*08h+__0STAR] { 3 AGU + 8 EX uops on Kaveri }
    {1} add ebx,ebp
    {@70} add ecx,[ebx+eax]
    {0} add eax,ebp
    {1} shr eax,02h
    {2} mov [esi],al
    {0} add esi,01h
    {1*} add edx,04h
    {@80*} jnz @@results { clears EDX register ( DL=0 as head, DH=0 as length ) }

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    válasz P.H. #92 üzenetére

    Egy Pentium 4 1 nagyságrenddel nagyobb teljesítményre képes, mint egy Java-ban írt program ARM-on (legalábbis mert léteznek pointerek, nincs szigorú típusosság, nincs byte-nál automatikus előjeles kiterjesztés 4 byte-ra, amit le kell küzdeni, stb.); még úgy is, hogy a branch prediction success rate 87% körüli.

    A ciklus ASM-ben:

    @character:
    {@65} { } mov [edi],al
    { } add edi,01h
    @@DECODE:
    {@6A} { } mov eax,[esp+_aMAXMINBITS]
    @read_raw: { BPOS may be 20h since decreased soon }
    { } movzx ebp,byte ptr [esi]
    {@70} { -} mov ecx,edx
    { } sub edx,(24+1)
    { } shl ebp,cl
    { } shr edx,1Fh
    { } add esi,edx
    { } lea edx,[ecx+edx*08h]
    { } or ebx,ebp
    {@81} { **} cmp dl,al
    { **} jb @read_raw
    @createABCcode:
    { } movzx ebp,bl
    { } movzx ecx,bh
    { } shr eax,10h
    { } { x1 } mov ebp,dword ptr es:[REVERSE2hi+ebp*04h]
    { } { x1 } add ebp,dword ptr es:[REVERSE2lo+ecx*04h]
    { -} mov ecx,eax
    {@A0} { } mov eax,offset(EXT_AMINMAXCODE)
    @seekABC:
    {@A5} {0**} cmp [eax+ecx*08h+00h+_MAX],ebp
    {1**} jnbe @foundABC
    {2} cmp [eax+ecx*08h-08h+_MAX],ebp
    {0} lea ecx,[ecx-02h]
    {@B1} {1} jbe @seekABC
    { } add ecx,01h
    @foundABC:
    {@B6} {0} mov eax,[eax+ecx*08h+_MIN]
    {1} shr ebp,cl
    {2} { x1 } mov eax,es:[eax+ebp*04h]
    {@C0} {0} neg ecx
    {1} add ecx,10h
    {2} sub edx,ecx
    {0} shr ebx,cl
    {1**} cmp eax,255
    {2**} jna @character { SHORT jump instruction offset: -6Bh }
    @repeatABC:
    {@D0} {0-} mov ecx,eax
    {1} movzx ebp,ah
    {2} shr eax,10h
    {0} jz @@SECTION
    {1} sub dl,cl
    {@E0} {2} and ebp,ebx
    {0} shr ebx,cl
    {1} add ebp,eax
    {2} { x1 } mov eax,ss:[esp+_dMAXMINBITS]
    {0} add edi,ebp
    {1} neg ebp
    {2} mov [esp+_MOVELEN],ebp
    @read__raw: { BPOS may be 20h since decreased soon }
    {@F3} {0} movzx ebp,byte ptr [esi]
    {1-} mov ecx,edx
    {2} sub edx,(24+1)
    {0} shl ebp,cl
    {1} shr edx,1Fh
    {@00} {2} add esi,edx
    {0} lea edx,[ecx+edx*08h]
    {1} or ebx,ebp
    {2**} cmp dl,al
    {0**} jb @read__raw
    @createDISTcode:
    { } movzx ebp,bl
    { } movzx ecx,bh
    {@11} { } shr eax,10h
    { } { x1 } mov ebp,dword ptr es:[REVERSE2hi+ebp*04h]
    { } { x1 } add ebp,dword ptr es:[REVERSE2lo+ecx*04h]
    {@24} { -} mov ecx,eax
    { } mov eax,offset(EXT_DMINMAXCODE)
    @seekDIST:
    {0**} cmp [eax+ecx*08h+00h+_MAX],ebp
    {1**} jnbe @foundDIST
    {@90} {2} cmp [eax+ecx*08h-08h+_MAX],ebp
    {0} lea ecx,[ecx-02h]
    {1} jbe @seekDIST
    { } add ecx,01h
    @foundDIST:
    {0} mov eax,[eax+ecx*08h+_MIN]
    {@A0} {1} shr ebp,cl
    {2} mov eax,[eax+ebp*04h]
    {0} neg ecx
    {1} add ecx,10h
    {2} sub edx,ecx
    {0} shr ebx,cl
    {1**} cmp al,00h
    {@70} {2**} js @xxx
    {0} jz @yyy
    @DISTbits: { BPOS may be 20h since decreased soon }
    {0} movzx ebp,byte ptr [esi]
    {1-} mov ecx,edx
    {2} sub edx,(24+1)
    {@80} {0} shr edx,1Fh
    {1} add esi,edx
    {2} lea edx,[ecx+edx*08h]
    {0} shl ebp,cl
    {1} or ebx,ebp
    {2**} cmp edx,16
    {0**} jb @DISTbits
    @srcposition:
    {@91} {1} movzx ecx,ax
    {2} sar eax,10h
    {0} mov ebp,dword ptr [OFF+ecx*04h]
    {1} sub edx,ecx
    {@A0} {2} add eax,edi
    {0} and ebp,ebx
    {1} shr ebx,cl
    @copy: ...

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    válasz P.H. #106 üzenetére

    és befele:

    @program:
    {0} movzx ecx,byte ptr [esi+00h]
    @x:
    {0**} cmp edx,ebp
    {1**} jae @init
    {2} add edx,01h
    {0**} cmp cl,[esi+edx]
    {1**} jz @x
    @init:
    { **} cmp edx,(03+01)
    { **} jae @long_jump
    { -} xor ebp,ebp
    { } mov [esp+_Q],edi
    { } mov ch,cl
    @repeat:
    {0-} mov edi,eax
    {1} and eax,(__WINDOW-1)
    {2} sub edi,ebx
    {0} mov eax,[esp+_PREV+eax*04h]
    {1**} cmp edi,-(__WINDOW)
    {2**} jbe @pre_encode { JLE = JBE for 2 negative numbers }
    {0} add edi,esi
    {1**} cmp ch,[edi+ebp]
    {2**} jnz @repeat
    {0**} cmp cl,[edi+00h]
    {1**} jnz @repeat
    {2-} xor edx,edx
    @length:
    {0**} cmp edx,[esp+...]
    {1**} jae @QQ
    {2} movzx ecx,byte ptr [esi+edx+01h]
    {0} cmp cl,[edi+edx+01h]
    {1} lea edx,[edx+01h]
    {2} jz @length
    {0} movzx ecx,byte ptr [esi+00h]
    {1} sub edi,esi
    {2} cmp edx,ebp
    {0} cmova ebp,edx
    {1} mov ch,[esi+ebp]
    {2} jbe @repeat
    {0} mov [esp+...],edi
    {1} jmp @repeat
    @QQ:
    {0} movzx ecx,byte ptr [esi+00h]
    {1} sub edi,esi
    {2} mov ebp,00000028
    {0} mov [esp+...],edi
    {1} mov edi,[esp+_Q]
    {2-} xor eax,eax
    {0**} cmp edx,???
    {1**} jz @QQQ
    {2-} mov ebp,edx
    @pre_encode:
    {0} mov edi,[esp+_Q]
    {1-} xor eax,eax
    {2*} sub ebp,03h
    {0*} jb @encode_alpha
    {1} lea edx,[ebp+03h]
    @QQQ:
    { } mov [esp+...],ebx
    { -} mov ebx,eax
    { } jz @indexed
    {---} bsr ecx,ebp
    {0*} sub ecx,02h
    {1*} jle @indexed
    {2} mov ebx,dword ptr [...+ecx*04h]
    {0-} mov eax,ecx
    {1} and ebx,ebp
    {2} shr ebp,cl
    {0} lea ebp,[ebp+ecx*04h+00h]
    @indexed:
    {0} mov ecx,dword ptr [...+ebp*04h]
    {1} mov ebp,[esp+_ZIPPED]
    {2} add al,cl
    {0} shl ebx,cl
    {1} shr ecx,10h
    {2} or ebx,ecx
    {0-} mov ecx,edi
    {1} and ecx,07h
    {2} shr edi,03h
    {0} shl ebx,cl
    {1} or bl,[edi+ebp]
    {2} add ecx,eax
    {0} mov eax,[esp+...]
    @write:
    {0} mov [edi+ebp],bl
    {1} add edi,01h
    {2} shr ebx,08h
    {0*} sub ecx,08h
    {1*} jge @write
    { } xor eax,-1
    { } mov ebp,00000005h
    { } lea edi,[edi*08h+ecx]
    { } jz @distindexed
    {---} bsr ecx,eax
    {0*} sub ecx,01h
    {1*} js @distindexed
    {2} mov ebx,dword ptr [...+ecx*04h]
    {0} add ebp,ecx
    {1} and ebx,eax
    {2} shr eax,cl
    {0} lea eax,[ecx*02h+eax]
    {1} shl ebx,05h
    @distindexed:
    {0-} mov ecx,edi
    {1} or ebx,dword ptr [...+eax*04h]
    {2} mov eax,[esp+_ZIPPED]
    {0} shr edi,03h
    {1} and ecx,07h
    {2} shl ebx,cl
    {0} add ecx,ebp
    {1} or bl,[edi+eax+00h]
    {2} mov ebp,[esp+_HASH]
    @write:
    {0} mov [edi+eax],bl
    {1} add edi,01h
    {2} shr ebx,08h
    {0*} sub ecx,08h
    {1*} jge @write
    { } add esi,edx
    { } neg edx
    { } mov ebx,[esp+...]
    { -} mov eax,ebp
    { } lea edi,[edi*08h+ecx]
    { } jmp @administration
    @encode_alpha:
    {0} mov ebp,[esp+_ZIPPED]
    {1} movzx edx,cl
    {2-} mov ecx,edi
    {1} mov edx,dword ptr [...+edx*04h]
    {0} shr edi,03h
    {2} and ecx,07h
    {0-} mov eax,edx
    {1} shr edx,10h
    {2} shl edx,cl
    {0} add cl,al
    {1} mov eax,[esp+_HASH]
    {2} or dl,[edi+ebp]
    @write:
    {0} mov [edi+ebp],dl
    {1} add edi,01h
    {2} shr edx,08h
    {0*} sub ecx,08h
    {1*} jge @write
    { } xor edx,-1
    { } mov ebp,eax
    { } add esi,01h
    { } lea edi,[edi*08h+ecx]
    @administration:
    {0} shl eax,__BITS
    {1} and eax,__LOOKUP-1
    {2} xor al,[esi+edx+03h]
    {0} mov ecx,[esp+_LAST+ebp*04h]
    {1} mov [esp+_LAST+ebp*04h],ebx
    {2-} mov ebp,ebx
    {0} add ebx,01h
    {1} and ebp,(__WINDOW-1)
    {2} mov [esp+_PREV+ebp*04h],ecx
    {1} add edx,01h
    {0-} mov ebp,eax
    {2} jnz @administration
    { } mov [esp+_HASH],eax
    { } mov ebp,[esp+_LEN]
    { } mov eax,[esp+_LAST+eax*04h]
    { *} sub ebp,ebx
    { *} jz @finalize
    { **} cmp ebp,???
    { **} jae @program
    { } mov [esp+...],ebp
    { } jmp @program
    @long_jump:

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    Production code. P4 ready. Core2-n, K10-en, Bulldozeren, P4-en 2.5x gyorsabb, mint a W..R.r.

    @FIX_character:
    {@A1} {0} mov [edi],al
    {1} add edi,01h
    @@FIX_DECODE: { bpos >= 9 }
    {2} mov eax,[esi]
    {0-} mov ecx,edx
    {1} mov ebp,((-1 shl ZFIX_aMAXBITS) xor -1)
    {@AF} {2} shr edx,03h
    {@B2} {0} xor edx,03h
    {1} and ebp,ebx
    {2} shl eax,cl
    {0} add esi,edx
    {1} lea edx,[ecx+edx*08h]
    {2} or ebx,eax
    @FIX_code:
    {@C0} {0} mov eax,dword ptr [ZFIXED_RELOC+ebp*08h+_CODE]
    {1} mov ecx,dword ptr [ZFIXED_RELOC+ebp*08h+_SIZE]
    {2} sub edx,ecx
    {@D0} {0} shr ebx,cl
    {1**} cmp eax,00000100h
    {2**} jb @FIX_character
    @FIX_length:
    {0-} mov ecx,eax
    {1-} movzx ebp,ah
    {2} jz @SECTION { short jump instruction }
    {@E0} {0} and ebp,ebx
    {1} shr eax,10h
    {2} add ebp,eax
    {0-} mov eax,ebx
    {1} add edi,ebp
    @FIX_distance:
    {2} shr eax,cl
    {0} add cl,ZFIX_dBITS
    {@F0} {1} neg ebp
    {2} sub dl,cl
    {0} and eax,((-1 shl ZFIX_dBITS) xor -1)
    {1} shr ebx,cl
    {2} mov eax,dword ptr [ZFIXED_DIST+eax*04h]
    @FIX_load:
    {@00} {0} mov [esp+_MOVELEN],ebp
    {1} mov ebp,[esi]
    {2-} mov ecx,edx
    {0} shr edx,03h
    {1} xor edx,03h
    {2} shl ebp,cl
    {@10} {0} add esi,edx
    {1} lea edx,[ecx+edx*08h]
    {2} or ebx,ebp
    {0} mov ebp,[esp+_MOVELEN]
    {1**} test al,0FFh
    {2**} js @FIX_movechar
    {@20} {0} jz @FIX_moveword
    @FIX_movedword:
    {1} mov ebp,00000001h
    {2-} mov ecx,eax
    {0} sub dl,al
    {1} sar eax,10h
    {2} shl ebp,cl
    {@30} {0} xor ebp,0FFFFFFFFh
    {1} add eax,edi
    {2} and ebp,ebx
    {0} shr ebx,cl
    {1} mov ecx,[esp+_MOVELEN]
    {@40} {2} sub eax,ebp
    @FIX_move4byte:
    {0} mov ebp,dword ptr [ecx+eax+00h]
    {1} mov [edi+ecx+00h],ebp
    {2} mov ebp,dword ptr [ecx+eax+04h]
    {0} mov [edi+ecx+04h],ebp
    {@50} {1*} add ecx,08h
    {2*} js @FIX_move4byte
    { } jmp @@FIX_DECODE
    { x6 } cmp esi,00h; cmp edx,00h
    @FIX_moveword:
    {@60} {0} sar eax,10h
    {1} add eax,edi
    @FIX_move2byte:
    {@65} {0} movzx ecx,word ptr [eax+ebp+00h]
    {1} mov [edi+ebp+00h],cx
    {@6D} {2} movzx ecx,word ptr [eax+ebp+02h]
    {@72} {0} mov [edi+ebp+02h],cx
    {1*} add ebp,04h
    {2*} js @FIX_move2byte
    { } jmp @@FIX_DECODE
    @FIX_movechar:
    {@81} {1} movzx eax,byte ptr [ebp+edi-01h]
    @FIX_repeat:
    {@86} {0} mov [edi+ebp+00h],al
    {1} mov [edi+ebp+01h],al
    {2*} add ebp,02h
    {@90} {0*} js @FIX_repeat
    { } jmp @@FIX_DECODE

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    Procuction Code. P4 ready. Core2-n, K10-en, Bulldozeren, P4-en 12%-kal gyorsabb, mint a W..R.r.

    @samechar:
    {@80} {0**} cmp edx,ebp
    {1**} jae @initrepeat
    @compression:
    {2} cmp cl,[esi+edx+01h]
    {0} lea edx,[edx+01h]
    {1} jz @samechar
    @initrepeat:
    { } { x1 } and ebp,00h
    {@90} { **} cmp edx,(03+01)
    { **} jae @special { long jump instruction }
    { } mov ch,cl
    { } { x1 } mov ss:[esp+_ENCODED],edi
    @repeat:
    {@A0} {0-} lea edi,[eax+ebx]
    {1} and eax,(__WINDOW-1)
    {2**} cmp edi,-(__WINDOW)
    {0**} jbe @pre_encode
    {@B0} {1} add edi,esi
    {2} mov eax,[esp+_PREV+eax*04h]
    {0**} cmp ch,[edi+ebp]
    {1**} jnz @repeat
    {2**} cmp cl,[edi+00h]
    {0**} jnz @repeat
    {@C0} {1-} xor edx,edx
    @length:
    {@C2} {0**} cmp edx,[esp+_MAXRANGE]
    {1**} jae @maximum_repeat
    {2} movzx ecx,byte ptr [esi+edx+01h]
    {0} cmp cl,[edi+edx+01h]
    {@D1} {1} lea edx,[edx+01h]
    {2} jz @length
    {0} movzx ecx,byte ptr [esi+00h]
    {1} mov ch,[esi+ebp]
    {2**} cmp edx,ebp
    {0**} jbe @repeat
    {@E0} {1-} mov ebp,edx
    {2} mov ch,[esi+edx]
    {0} mov [esp+_MOVEDIST],edi
    {2} jmp @repeat
    { x6 } test ebx,00000000h
    {@F0} { x4 } xor ebp,ebp; xor esi,esi
    @maximum_repeat:
    {@F4} {0} mov [esp+_MOVEDIST],edi
    {1} mov ebp,00000028
    {2-} xor eax,eax
    {0-} xor ebx,ebx
    {@00} {1} mov edi,[esp+_ENCODED]
    {2**} cmp edx,00000258
    {0**} jz @indexed
    {1} movzx ecx,byte ptr [esi+00h]
    {2-} mov ebp,edx
    @pre_encode:
    {@11} {0} mov edi,[esp+_ENCODED]
    {1-} xor ebx,ebx
    {2*} sub ebp,03h
    {0*} jb @encode_alpha
    {@20} {1-} movzx eax,bl
    {2} lea edx,[ebp+03h]
    { } jz @indexed
    @encode_length:
    {---} bsr ecx,ebp
    {0*} sub ecx,02h
    {1*} jle @indexed
    {@30} {2-} mov ebx,ebp
    {0} shr ebp,cl
    {1-} mov eax,ebp
    {2} lea ebp,[ebp+ecx*04h+00h]
    {0} shl eax,cl
    {1} xor ebx,eax
    {2-} mov eax,ecx
    @indexed:
    {@40} {0} mov ecx,dword ptr [ZFIXED_ENCODE+257*04h+ebp*0
    {1} mov ebp,[esp+_ZIPPED]
    {2} add al,cl
    {0} shl ebx,cl
    {1} shr ecx,10h
    {@52} {2} or ebx,ecx
    {0-} mov ecx,edi
    {1} and ecx,07h
    {2} shr edi,03h
    {0} shl ebx,cl
    {1} or bl,[edi+ebp]
    {@61} {2} add ecx,eax
    {0} mov eax,[esp+_MOVEDIST]
    @writelength:
    {0} mov [edi+ebp],bl
    {1} add edi,01h
    {2} shr ebx,08h
    {0*} sub ecx,08h
    {@72} {1*} jge @writelength
    @encode_distance:
    { } { x3 } mov ebp,00000000h
    { } sub eax,esi
    { } xor eax,-1
    {@80} { } lea edi,[edi*08h+ecx*08h+00]
    { } jz @distindexed
    {---} bsr ecx,eax
    {0*} sub ecx,01h
    {1*} js @distindexed
    {2-} mov ebx,eax
    {@90} {0} shr eax,cl
    {1-} mov ebp,eax
    {2} lea eax,[ecx*02h+eax]
    {0} shl ebp,cl
    {1} xor ebx,ebp
    {2-} mov ebp,ecx
    {0} shl ebx,05h
    @distindexed:
    {@A0} {0} or ebx,dword ptr [REVERSE2_5+eax*04h]
    {1-} mov ecx,edi
    {0} shr edi,03h
    {2} mov eax,[esp+_ZIPPED]
    {@B0} {1} and ecx,07h
    {2} shl ebx,cl
    {0} or bl,[edi+eax]
    {1} lea ecx,[ecx+ebp+05h]
    {2} mov ebp,[esp+_HASH]
    @writedistance:
    {@C0} {0} mov [edi+eax],bl
    {1} add edi,01h
    {2} shr ebx,08h
    {0*} sub ecx,08h
    {1*} jge @writedistance
    { } add edx,esi
    {@D0} { } lea edi,[edi*08h+ecx]
    { -} mov eax,ebp
    { } jmp @administration
    { x9 } xor eax,eax; xor ebp,ebp; mov edx,00000000h
    {@00} { x4 } xor edx,edx; xor ecx,ecx
    @encode_alpha:
    {@04} {0} mov ebp,[esp+_ZIPPED]
    {1-} movzx edx,cl
    {2-} mov ecx,edi
    {0} shr edi,03h
    {@10} {1} mov edx,dword ptr [ZFIXED_ENCODE+edx*04h]
    {2} and ecx,07h
    {0} movzx eax,dx
    {1} shr edx,10h
    {@20} {2} shl edx,cl
    {0} add ecx,eax
    {1} mov eax,[esp+_HASH]
    {2} or dl,[edi+ebp]
    @writechar:
    {0} mov [edi+ebp],dl
    {1} add edi,01h
    {@31} {2} shr edx,08h
    {0*} sub ecx,08h
    {1*} jge @writechar
    { } lea edx,[esi+01h]
    { } lea edi,[edi*08h+ecx+00h]
    @administration:
    {@20} {0} mov ecx,[esp+_LAST+eax*04h]
    {1} mov [esp+_LAST+eax*04h],esi
    {2} shl eax,__BITS
    {0} and eax,__LOOKUP-1
    {1-} mov ebp,esi
    {2} xor al,[esi+03h]
    {0} add esi,01h
    {1} and ebp,(__WINDOW-1)
    {2} mov [esp+_PREV+ebp*04h],ecx
    {0**} cmp esi,edx
    {1**} jnz @administration
    @nextloop:
    {0} mov [esp+_HASH],eax
    {@70} {1} mov ebp,[esp+_LEN]
    {2} sub ebx,esi
    {0} mov eax,[esp+_LAST+eax*04h]
    {1-} xor edx,edx
    {2} movzx ecx,byte ptr [esi+00h]
    {0*} sub ebp,esi
    {1*} jz @finalize
    {2**} cmp ebp,00000258
    {@90} {0**} jae @compression
    { } mov [esp+_MAXRANGE],ebp
    { } jmp @compression
    { x1 } nop
    {@80} { x1 } nop
    @special:

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    válasz P.H. #108 üzenetére

    Egy program sosincs befejezve, csak abbahagyva van.

    @FIX_character:
    {@A2} {0} mov [edi],al
    {1-} mov ecx,edx
    {2} add edi,01h
    @@FIX_symbol:
    {@A9} {0} mov eax,(($1 shl ZFIX_aMAXBITS)-1)
    {1} shl ebp,cl
    {@B0} {2} xor ecx,(03h shl 03h)
    {0} and ax,bx
    {1} or edx,(03h shl 03h)
    {2} shr ecx,03h
    {0} or ebx,ebp
    {1} add esi,ecx
    @@FIX_code:
    {@C0} {2} mov ecx,dword ptr [ZFIXED_RELOC+eax*08h+_SIZE]
    {0} mov ebp,[esi]
    {1} mov eax,dword ptr [ZFIXED_RELOC+eax*08h+_CODE]
    {@D0} {2} shr ebx,cl
    {0*} sub edx,ecx
    {1*} jnc @FIX_character
    @FIX_length:
    {2} sar ecx,10h
    {0} and al,bl
    {1} sub dl,cl
    {2} shr ebx,cl
    {@FF} {0-} movzx ecx,al
    {@02} {1} sar eax,10h
    {2} jz @NXSECTION
    {0} sub eax,ecx
    {1-} movzx ecx,bl
    @FIX_distance:
    {@10} {2} sub edx,ZFIX_dBITS
    {0} mov [esp+_MOVELEN],eax
    {1} sub edi,eax
    {2} mov eax,dword ptr [ZFIXED_DISTx8+ecx*04h]
    {@20} {0-} movzx ecx,dl
    {1} shr ebx,ZFIX_dBITS
    @FIX_load:
    {2} or edx,(03h shl 03h)
    {0} shl ebp,cl
    {1} xor ecx,(03h shl 03h)
    {2} shr ecx,03h
    {@31} {0} or ebx,ebp
    {1} mov ebp,[esp+_MOVELEN]
    {2} movzx edx,dl
    {0} add esi,ecx
    {1-} mov ecx,eax
    {2} sar eax,10h
    {@41} {0} jns @FIX_movechar
    {1} jc @FIX_moveword
    {2} mov ebp,00000001h
    @FIX_movedword:
    {0} sub dl,cl
    {1} shl ebp,cl
    {2} add eax,edi
    {@50} {0} add ebp,0FFFFFFFFh
    {1} and ebp,ebx
    {2} shr ebx,cl
    {0} mov ecx,[esp+_MOVELEN]
    {1} sub eax,ebp
    @FIX_move4byte:
    {@60} {0} mov ebp,[ecx+eax+00h]
    {1} mov [edi+ecx+00h],ebp
    {2} mov ebp,[ecx+eax+04h]
    {0} mov [edi+ecx+04h],ebp
    {1*} add ecx,08h
    {@71} {2*} js @FIX_move4byte
    {0-} mov ecx,edx
    {1} mov ebp,[esi]
    {2} jmp @@FIX_symbol
    { x4 } lea eax,[edx+edx+01h]
    @FIX_movechar:
    {@60} {0} movzx eax,byte ptr [ebp+edi-01h]
    {0} imul ecx,eax
    {1-} mov eax,ebx
    @FIX_move1byte:
    {0} mov [edi+ebp+00h],ecx
    {@70} {1} mov [edi+ebp+04h],ecx
    {2*} add ebp,08h
    {0*} js @FIX_move1byte
    {1} and eax,(($1 shl ZFIX_aMAXBITS)-1)
    {2} jmp @@FIX_code
    @FIX_moveword:
    {@80} {0} add eax,edi
    @FIX_move2byte:
    {@82} {0} movzx ecx,word ptr [eax+ebp+00h]
    {1} mov [edi+ebp+00h],cx
    {2} movzx ecx,word ptr [eax+ebp+02h]
    {@8F} {0} mov [edi+ebp+02h],cx
    {@94} {1*} add ebp,04h
    {2*} js @FIX_move2byte
    {0-} mov eax,ebx
    {1} and eax,(($1 shl ZFIX_aMAXBITS)-1)
    {@A0} {2} jmp @@FIX_code

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    válasz P.H. #105 üzenetére

    Örömmel jelentem, hogy lassabb nem lett

    .............................

    { } movsx ebx,byte ptr [edx]
    { } xor eax,eax
    { } mov esi,ebp
    { } mov [edi+__N],ebp
    { } and esi,-8
    { } mov ecx,ebp
    @init:
    { } mov [edi+esi*08h+(00h*08h)+__K],eax
    { } mov [edi+esi*08h+(04h*08h)+__K],eax
    { } add esi,08h
    { } jnz @init
    @argument
    { } cmp ebx.00h
    { } lea eax,[ebp+ebx*04h]
    { } movsx ebx,byte ptr [edx+01h]
    { } lea edx,[edx+01h]
    { } mov [edi+eax*08h+__K],ebp
    { } cmovs eax,esi
    { } mov [edi+ecx*08h+__F],eax
    { } add ecx,04h
    { } jnz @argument
    { } mov [edi+ebp*08h-(04h*08h)+__K],esi
    { } lea edx,[ebp-04h]
    { } mov ebx,edi
    { } jmp @next0row
    @ROWS:
    { } mov [edi+__LEFT1+edx*08h+__W],esi
    @next0row:
    { } mov eax,[edi+edx*08h+(04h*08h)+__S]
    { } add edx,04h
    { } jz @@COLUMNS
    { } mov [edi+edx*08h+B],eax
    { } xor esi,esi
    { } mov [edi+edx*08h+_R],eax
    { } sub ebx,ebp
    { **} test eax,eax
    { **} jnz @@ROWS
    { } lea ecx,[ebp+04h]
    { } mov esi,[ebx+ebp]
    { } or esi,[edi+ebp*08h+__K]
    { } and ecx,-8
    @findrowmin:
    { } mov eax,[ebx+ecx]
    { } or eax,[edi+ecx*08h+__K]
    { } add ecx,08h
    { } cmp eax,ebp
    { } cmovb ebp,eax
    { } mov eax,[ebx+ecx-04h]
    { } or eax,[edi+ecx*08h-(04h*08h)+__K]
    { } cmp eax,esi
    { } cmovb esi,eax
    { **} cmp ecx,00h
    { **} jnz @findrowmin
    { } cmp ebp,esi
    { } cmovb esi,ebp
    { } mov ebp,[edi+__N]
    { } neg esi
    { } jle @@ROWS
    @@XXX:
    { } mov esi,[esp+_A]
    { } mov ecx,[esp+_I]
    { } mov dword ptr [esi+A.OPTIMUM],?
    { } mov ebx,[esi+TRESULT.NEXT]
    { } jmp ecx
    @free0col:
    { } add dword ptr [edi+__0COUNTER],-1
    { } mov [edi+__CCOLMIN],esi
    { } mov ecx,0FFFFFFFFh
    { } mov [edi+ebp*08h-(04h*08h)+__K],edx
    @0col:
    { } mov [edi+__LEFT1+edx*08h+__COLMOD],esi
    @@COLUMNS:
    { -} mov ebx,ebp
    @next0col:
    { } mov [edi+__LEFT1+edx*08h+__C],ecx
    { } mov ecx,edi+edx*08h-(04h*08h)+__K]
    { } lea eax,[edx-04h]
    { } sub eax,ebp
    { } js @@INIT0COL
    { } sub edx,04h
    { } xor ecx,-1
    { } jns @next0col
    @findcolmin:
    { } mov esi,[eax+edi]
    { } add esi,[edi+__LEFT1+ebx*08h+__U]
    { } or esi,[edi+ebx*08h+__S]
    { } jz @test0row
    { } sub eax,ebp
    { } cmp esi,ecx
    { } cmovb ecx,esi
    { } add ebx,04h
    { } jnz @findcolmin
    { } mov eax,edx
    { } lea ebx,[ebp-04h]
    { } mov esi,ecx
    { } cmp ecx,00h
    { } js @@XXX
    @seek0col:
    { } mov ecx,[edi+__LEFT1+ebx*08h+(04h*08h)+__W]
    { } sub eax,ebp
    { } add ebx,04h
    { } jz @free0col
    { } add ecx,[eax+edi]
    { **} cmp ecx,esi
    { **} jnz @seek0col
    @test0row:
    { } mov ecx,[edi+ebx*08h+B]
    { **} test ecx,ecx
    { **} js @seek0col
    { } mov [edi+ebx*08h+B],edx
    { } mov [edi+edx*08h+__K],ebx
    { } jmp @0col
    @@INIT0COL:
    { } lea eax,[edi+ecx]
    { } mov [edi+__L],ecx
    { } neg ebp
    { } sal ecx,10h
    { } jnz @init2col
    { -} xor esi,esi
    { } jmp @@FINISHED
    @@5TH_STEP:
    { -} mov edx,esi
    { } movsx esi,si
    { } sar eax,10h
    @5TH_STEP:
    { } movsx ecx,byte ptr [edi+__LEFT1+ebx*08h+__SIGN+__C]
    { } and ecx,edx
    { } add [edi+__LEFT1+ebx*08h+__COLMOD],ecx
    { } movsx ecx,byte ptr [edi+ebx*08h+__SIGN+_R]
    { } and ecx,edx
    { } add [edi+__LEFT1+ebx*08h+__W],ecx
    { } add ebx,04h
    { } jnz @5TH_STEP
    { } mov edx,es:[edi+esi*08h+B]
    { } db $8B,$8C,$3B,__L,?,?,?
    { **} test edx,edx
    { **} jz @@4TH_STEP
    { } add dword ptr [edi+__PN],-1
    { } mov [edi+esi*08h+_R],eax
    { } mov [edi+__LEFT1+edx*08h+__C],esi
    { } cmp edx,ecx
    { } cmovb ecx,edx
    { } sub ebx,ebp
    { } mov [edi+__L],ecx
    { } jmp @@9ND_STEP
    @fast6forward:
    { } mov esi,[edi+__LEFT1+ebx*08h+(04h*08h)+__W]
    { } add ebx,04h
    { } jz @pass8col
    { } sal ecx,10h
    { } jmp @loop
    @pass8col:
    { } mov eax,[edi+__N]
    { } sub ebx,ebp
    { } mov [edi+__LEFT1+ecx*08h+__C],eax
    @next20col:
    { } add ecx,04h
    { } jz @@5TH_STEP { clears ECX register
    { **} cmp [edi+__LEFT1+ecx*08h+__C],eax
    {0**} jbe @next20col
    @@8ND_STEP:
    { } mov esi,[edi+__LEFT1+ecx*08h+__COLMOD]
    { } lea eax,[edi+ecx]
    { } mov [edi+__CCOL],esi
    { } imul ecx,00010000h
    @init2col:
    { } add eax,ebp
    { } mov esi,[edi+ebx*08h+__W]
    @loop:
    { } sub esi,[edi+__COLMIN]
    { } add esi,[eax]
    { } lea eax,[eax+ebp]
    { } jo @over6flow
    { } or esi,[edi+ebx*08h+_R]
    { } jz @@11D_STEP
    { } cmp esi,edx
    { } cmovb edx,esi
    { } cmovb cx,bx
    @over6flow:
    { } mov esi,[edi+__LEFT1+ebx*08h+(04h*08h)+__W]
    { } add ebx,04h
    { } jnz @loop
    { -} mov eax,ecx
    { } sar ecx,10h
    { } cmovc esi,eax
    { } mov [edi+__MC],esi
    { } jmp @pass8col
    @@11D_STEP:
    { -} mov esi,ecx
    { } sar ecx,10h
    { } cmovnc esi,[edi+__M]
    { } mov [edi+__MC],esi
    { } mov esi,[edi+ebx*08h+B]
    { **} test esi,esi
    { **} jz @4TH_STEP
    { } mov [edi+ebx*08h+_R],ecx
    { } mov [edi+__LEFT1+esi*08h+__C],-1
    { **} cmp word ptr [edi+__MC],bx
    { **} jz @re2start
    { **} cmp esi,ecx
    { **} jae @fast6forward
    { } mov ecx,esi
    { } xor ebx,ebx
    { } mov eax,[edi+__L]
    { } sub ebx,ebp
    { } cmp esi,eax
    { } cmovb eax,esi
    { } mov [edi+__L],eax
    { } jmp @@7ND_STEP
    @re2start:
    { } mov ecx,[edi+__L]
    { } xor ebx,ebx
    { } add dword ptr [edi+__PN],-1
    { } mov edx,esi
    { } cmp esi,ecx
    { } cmovb ecx,esi
    { } sub ebx,ebp
    { } mov [edi+__L],ecx
    { } jmp @@6ND_STEP
    @@4TH_STEP:
    { } mov ecx,eax
    { } mov ebx,esi
    @4TH_STEP:
    { } mov [edi+ebx*08h+BB],ecx
    { } mov edx,[edi+ecx*08h+__K]
    { } mov [edi+ecx*08h+__K],ebx
    { } mov ebx,edx
    { } mov ecx,[edi+edx*08h+_R]
    { } sub edx,ebp
    { } jnc @4TH_STEP
    { } xor esi,esi
    { } mov ebx,edx
    { } sub dword ptr [edi+__0COUNTER],-1
    { } jz @@FINISHED
    @@1ST_STEP:
    { } mov eax,[edi+esi*08h-(04h*08h)+__K]
    { } xor eax,-1
    { } mov [edi+__LEFT1+esi*08h-(04h*08h)+__C],eax
    { } lea esi,[esi-04h]
    { } mov eax,[edi+esi*08h+__S]
    { } cmovs ecx,esi
    { } mov [edi+esi*08h+_R],eax
    { **} cmp edx,esi
    { **} jnz @@1ST_STEP
    { } nop
    { } mov [edi+__L],ecx
    { } jmp @@5ND_STEP

    @@FINISHED:
    { } mov eax,edi
    { } mov ecx,[esp+_KIMENET]
    { } mov ebp,edx
    @@results:
    { } mov ebx,[edi+edx*08h+__B]
    { } sub eax,ebp
    { } add esi,[eax+ebx]
    { } sub ebx,ebp
    { } shr ebx,02h
    { } mov [ecx],bl
    { } add ecx,01h
    { } add edx,04h
    { } jnz @@results
    (CODE)

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

  • P.H.

    senior tag

    Skeleton of code adjusted to Zen(1) and Zen+ (i. e. placeholder codes omitted).

    Runs constantly at 3.4 IPC (of maximum 4.0 due to the 4 available ALU-s) for 50x50 matrices, this means 85% utilization.

    Since more than 90% percent of instructions need ALU, so important theorem is the way of instruction distribution among the ALUs. This is true for all other microarchitectures as well.

    { } movsx ebx,byte ptr [eax+E.FIELD0+00h]
    { } mov esi,ebp
    { } xor edx,edx
    { } and esi,-8
    { } mov [edi+__A],ebp
    { } mov ecx,ebp
    @init:
    { } mov [edi+esi*08h+(00h*08h)+__B],edx
    { } mov [edi+esi*08h+(04h*08h)+__B],edx
    { } sub esi,-8
    { } jnz @init
    @@a:
    { } cmp ebx,ebp
    { } lea esi,[ebp+ebx*04h]
    { } movsx ebx,byte ptr [eax+E.FIELD0+01h]
    { } lea eax,[eax+01h]
    { } mov [edi+esi*08h+__B],ebp
    { } cmova esi,edx
    { } mov [edi+ecx*08h+__C],esi
    { } add ecx,04h
    { } jnz @@a
    { } mov eax,[edi+ebp*08h+__C]
    { } mov ebx,ebp
    { } mov edx,edi
    { } jmp @b
    @@ROWS:
    { } mov eax,[edi+ebx*08h+(04h*08h)+__C]
    { } add ebx,04h
    { } jz @@COLS
    @next0b:
    { } mov [edi+ebx*08h+__D],eax
    { } sub edx,ebp
    { } mov [edi+ebx*08h+R],eax
    { } add eax,ebp
    { } jc @@ROWS
    { } lea ecx,[ebp+04h]
    { } mov esi,[edx+ebp]
    { } or esi,[edi+ebp*08h+__B]
    { } and ecx,-8
    { } mov ebp,[edi+ecx*08h+__B]
    @findr:
    { } or ebp,[edx+ecx+00h]
    { } cmp ebp,eax
    { } cmovb eax,ebp
    { } mov ebp,[edx+ecx+04h]
    { } or ebp,[edi+ecx*08h+(04h*08h)+__B]
    { } cmp ebp,esi
    { } cmovb esi,ebp
    { } mov ebp,[edi+ecx*08h+(08h*08h)+__B]
    { } add ecx,08h
    { } jnz @findr
    { } cmp eax,esi
    { } mov ebp,[edi+__A]
    { } cmovb esi,eax
    { } neg esi
    { } mov [edi+__L+ebx*08h+__F],esi
    { } jle @ROWS
    @@EXIT:
    { } mov esi,[esp+_X]
    { } mov [esi+E.O],7FFFFFFFh
    { } jmp @outside
    @free0col:
    { } add [edi+__0],ebp
    { } mov [edi+__Y],esi
    { } mov ecx,0FFFFFFFFh
    @@COLS:
    { } mov [edi+ebp*08h-(04h*08h)+__B],ebx
    @mark:
    { } mov [edi+__L+ebx*08h+__CC],esi
    { } mov esi,[edi+__L+ebp*08h+__F]
    { } mov edx,ebp
    @nextc:
    { } lea eax,[ebx-04h]
    { } mov [edi+__L+ebx*08h+__M],ecx
    { } mov ecx,[edi+ebx*08h-(04h*08h)+__B]
    { } mov ebx,eax
    { } sub eax,ebp
    { } jc @@init0
    { } xor ecx,-1
    { } jns @nextc
    @findc:
    { } add esi,[eax+edi]
    { } or esi,[edi+edx*08h+__D]
    { } lea edx,[edx+04h]
    { } jz @testr
    { } sub eax,ebp
    { } cmp esi,ecx
    { } cmovb ecx,esi
    { } mov esi,[edi+__L+edx*08h+__F]
    { **} cmp edx,00h
    { **} jnz @findc
    { } mov eax,ebx
    { } mov edx,ebp
    { } mov esi,ecx
    { **} cmp ecx,00h
    { **} js @@EXIT
    @seek0:
    { } mov ecx,[edi+__L+edx*08h+__F]
    { } sub eax,ebp
    { } add edx,04h
    { } jg @free0col
    { } add ecx,[eax+edi]
    { **} cmp ecx,esi
    { **} jnz @seek0
    @testr:
    { } lea ecx,[edx-04h]
    { **} test [edi+edx*08h-(04h*08h)+__C],ebx
    { **} js @seek0col
    { } mov [edi+ebx*08h+__B],ecx
    { } mov [edi+edx*08h-(04h*08h)+__C],ebx
    { } xor ecx,ecx
    { } jmp @mark
    @@init0:
    { } mov [edi+__I],ecx
    { } mov eax,ecx
    { } sal ecx,10h
    { } jnz @scan
    { } mov eax,edi
    { } mov esi,[esp+_X]
    { } jmp outside
    @@1ST_STEP:
    { } movsx esi,ax
    { } mov esi,[edi+esi*08h+__C]
    { } neg edx
    @1ST_STEP:
    { } movsx ecx,byte ptr [edi+__L+ebx*08h+__S+__M]
    { } and ecx,edx
    { } sub [edi+__L+ebx*08h+__CC],ecx
    { } movsx ecx,byte ptr [edi+ebx*08h+__S+R]
    { } and ecx,edx
    { } sub [edi+__L+ebx*08h+__F],ecx
    { } add ebx,04h
    { } jnz @1ST_STEP
    { } mov ecx,[edi+__I]
    { } movsx ebx,ax
    { } sar eax,10h
    { **} test esi,esi
    { **} jz @@2ND_STEP
    { } add dword ptr [edi+__A],-1
    { } mov [edi+ebx*08h+R],eax
    { } mov [edi+__L+esi*08h+__M],ebx
    { } cmp esi,ecx
    { } cmovb ecx,esi
    { } mov ebx,ebp
    { } mov [edi+__I],ecx
    { } jmp @@6TH_STEP
    @pass:
    { } mov eax,ecx
    { } sar ecx,10h
    { } cmovnc eax,[edi+__W]
    { } mov [edi+__W],eax
    { } lea ebx,[ebp+00h]
    { } mov [edi+__L+ecx*08h+__M],esi
    @nx:
    { } mov esi,[edi+__L+ecx*08h+__M +(04h*08h)]
    { } add ecx,04h
    { } jz @@1ST_STEP
    { **} cmp esi,[edi+__A]
    { **} jbe @nx
    @@6TH_STEP:
    { } mov esi,[edi+__L+ecx*08h+__CC]
    { } lea ebx,[ebp-04h]
    { } { x1 } nop
    { } mov eax,ecx
    { } mov [edi+__Y],esi
    @ffd:
    { } sal ecx,10h
    { } mov esi,[edi+__L+ebx*08h+(04h*08h)+__F]
    @z:
    { **} cmp ebx,-4
    { **} jz @pass
    @scan:
    { } add ebx,04h
    { } sub eax,ebp
    { } sub esi,[edi+__Y]
    { } add esi,[edi+eax]
    { } or esi,[edi+ebx*08h+R]
    { } jz @@5TH_STEP
    { } cmp esi,edx
    { } cmovb cx,bx
    { } cmovb edx,esi
    { } mov esi,[edi+__L+ebx*08h+(04h*08h)+__F]
    { **} cmp ebx,-4
    { **} jz @pass
    { } add ebx,04h
    { } sub eax,ebp
    { } sub esi,[edi+__Y]
    { } add esi,[edi+eax]
    { } or esi,[edi+ebx*08h+R]
    { } jz @@5TH_STEP
    { } cmp esi,edx
    { } cmovb cx,bx
    { } cmovb edx,esi
    { } mov esi,[edi+__L+ebx*08h+(04h*08h)+__F]
    { } db $66,$66,$66; nop
    { } jmp @zZ
    @@5TH_STEP:
    { } mov esi,ecx
    { } sar ecx,10h
    { } cmovnc esi,[edi+__W]
    { } mov [edi+__W],esi
    { } mov esi,[edi+ebx*08h+__C]
    { **} test esi,esi
    { **} jz @2ND_STEP
    { } mov [edi+ebx*08h+R],ecx
    { } mov [edi+__L+esi*08h+__M],0FFFFFFFFh
    { **} cmp word ptr [edi+__W],bx
    { **} jz @re
    { **} cmp esi,ecx
    { **} jae @ffd
    { } mov ecx,esi
    { } mov eax,[edi+__I]
    { } cmp esi,eax
    { } cmovb eax,esi
    { } mov [edi+__I],eax
    { } jmp @@6TH_STEP
    @re:
    { } mov ecx,[edi+__I]
    { } add dword ptr [edi+__A],-1
    { } mov edx,esi
    { } cmp esi,ecx
    { } cmovb ecx,esi
    { } mov [edi+__I],ecx
    { } jmp @@6TH_STEP
    @@2ND_STEP:
    { } mov ecx,eax
    @2ND_STEP:
    { } mov [edi+ebx*08h+__C],ecx
    { } mov edx,[edi+ecx*08h+__B]
    { } mov [edi+ecx*08h+__B],ebx
    { } mov ecx,[edi+edx*08h+R]
    { } mov ebx,edx
    { } add edx,ebp
    { } jc @2ND_STEP
    { } mov ecx,esi
    { } sub [edi+__0],ebp
    { } jz @@outside
    @@9ST_STEP:
    { } mov eax,[edi+esi*08h-(04h*08h)+__B]
    { } xor eax,-1
    { } mov [edi+__L+esi*08h-(04h*08h)+__M],eax
    { } lea esi,[esi-04h]
    { } cmovs ecx,esi
    { } mov ebx,[edi+esi*08h+__D]
    { } mov eax,[edi+esi*08h-(04h*08h)+__B]
    { } mov [edi+esi*08h+R],ebx
    { **} cmp ebp,esi
    { **} jz @i9
    { } xor eax,-1
    { } mov [edi+__L+esi*08h-(04h*08h)+__M],eax
    { } lea esi,[esi-04h]
    { } cmovs ecx,esi
    { } mov eax,[edi+esi*08h+__D]
    { } mov [edi+esi*08h+R],eax
    { **} cmp ebp,esi
    { **} jnz @@9ST_STEP
    @i9:
    { } mov [edi+__I],ecx
    {- } jmp @@6TH_STEP

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Aktív témák