Aktív témák

  • P.H.

    senior tag

    Utolsó felvonás, két bevezető ciklus összevonása után.

    Néhány IPC-mérés a lépések ciklusaira (végtelen ciklusban mérve):
    @@REDUCE_ROWS:1.8 IPC
    @@REDUCE_COLUMNS:1.6 IPC
    @@2ND_STEP: 0.9 IPC (ez a leggyakrabban lefutó ciklus)
    @@5TH_STEP: 2.2 IPC
    @@1ST_STEP: 1.5 IPC

    Úgy tűnik, az AMD-n (a Bulldozer-ig bezárólag) a legjobb stratégia az, ha a ciklusokban az utasítások fele [ ] referenciát tartalmaz, azaz a memóriahivatkozások mellett bizonyos ADD reg,imm és MOV reg,reg utasítások helyett azok LEA reg,[reg+imm] vagy LEA reg,[reg] megfelelőiket használom, ezek méretre ugyanakkorák, viszont a 3 AGU valamelyikében futnak az ALU-k helyett.

    pushad
    mov ebx,offset(MARKS)
    lea edx,[ebp+ebp]
    xor ecx,ecx
    lea edi,[ebx+ebp]
    neg ebp
    @mark0:
    sub edx,04h
    mov [ebx],ecx
    lea ebx,[ebx+04h]
    jg @mark0
    @@REDUCE_ROWS:
    mov [esp+__SYS0],ebp
    mov ebx,ebp
    sub esp,_SAVE
    @rowmin:
    mov ecx,ebp
    mov esi,01000000h
    xor edx,edx
    @findrowmin:
    cmp esi,[eax]
    cmovz edx,ecx
    cmova esi,[eax]
    add ecx,01h
    lea eax,[eax+04h]
    jnz @findrowmin
    sub ecx,ebp
    cmp esi,01000000h
    jz @specific
    lea eax,[eax+ebp*04h]
    @subrow:
    xor edx,edx
    cmp byte ptr [eax+03h],00h
    cmovz edx,esi
    sub [eax],edx
    sub ecx,01h
    lea eax,[eax+04h]
    jnz @subrow
    jmp @reducenxrow
    @specific:
    test edx,edx
    jz @@ABNORMAL_EXIT
    test byte ptr [edi+edx],01h
    jz @mark
    @@ABNORMAL_EXIT:
    add esp,40h
    xor eax,eax
    mov edx,7FFFFFFFh
    stc
    ret
    @mark:
    or byte ptr [edi+ebx],10h
    add ecx,ebx
    or byte ptr [edi+edx],01h
    add dword ptr [esp+_SAVE+__SYS0],01h
    mov [edi+ecx],dl
    jz @count_result_STACK
    @reducenxrow:
    add ebx,01h
    jnz @rowmin
    @@RECUDE_COLUMNS:
    neg ebp
    @nxcolmin:
    mov edx,ebp
    sub ebx,01h
    sub eax,04h
    add edx,ebx
    js @@2ND_STEP
    test byte ptr [edi+ebx],01h
    jnz @nxcolmin
    neg ebp
    mov edx,01000000h
    mov ecx,ebp
    @findcolmin:
    cmp edx,[eax]
    cmova edx,[eax]
    add ecx,01h
    lea eax,[eax+ebp*04h]
    jnz @findcolmin
    lea ecx,[ebp-01h]
    neg ebp
    cmp edx,01000000h
    jz @@ABNORMAL_EXIT
    @subcol:
    xor esi,esi
    add ecx,01h
    jz @nxcolmin
    lea eax,[eax+ebp*04h]
    cmp byte ptr [eax+03h],00h
    cmovz esi,edx
    sub [eax],esi
    jnz @subcol
    bts dword ptr [edi+ecx],04h
    jc @subcol
    bts dword ptr [edi+ebx],00h
    lea esi,[ecx+ebp]
    jc @subcol
    add dword ptr [esp+_SAVE+__SYS0],01h
    mov byte ptr [eax+03h],01h
    mov [edi+esi],bl
    jnz @subcol
    jmp @count_result_STACK
    @@3RD_STEP:
    or byte ptr [edi+ebx],02h
    mov byte ptr [esi+03h],02h
    and byte ptr [edi+edx],11111110b
    @@2ND_STEP:
    xor ebx,ebx
    mov esi,[esp+_SAVE+__MTX]
    xor ecx,ecx
    mov edx,00FFFFFFh
    sub ebx,ebp
    @free0:
    sub ecx,ebp
    @freerow:
    test byte ptr [edi+ebx],02h
    jz @zeroinrow
    add ebx,01h
    lea esi,[esi+ebp*04h]
    jnz @freerow
    jmp @@5TH_STEP
    @zeroinrow:
    xor eax,eax
    test byte ptr [edi+ecx],01h
    jnz @nx2col
    add eax,[esi]
    jz @@DECIDE_NEXT_STEP
    cmp edx,eax
    jbe @nx2col
    add esp,_SAVE
    lea edx,[eax] //mov edx,eax
    pushad
    @nx2col:
    add ecx,01h
    lea esi,[esi+04h]
    jnz @zeroinrow
    add ebx,01h
    jnz @free0
    @@5TH_STEP:
    xor ecx,ecx
    mov esi,[esp+_SAVE+__MTX]
    sub ebx,ebp
    @nx5row:
    sub ecx,ebp
    test byte ptr [edi+ebx],02h
    jnz @increase_double_markeds
    @decrease_row_free:
    bt dword ptr [edi+ecx],00h
    mov al,[esi+03h]
    adc al,00h
    mov eax,00000000h
    cmovz eax,edx
    sub [esi],eax
    add ecx,01h
    lea esi,[esi+04h]
    jnz @decrease_row_free
    jmp @step5row
    @increase_double_markeds:
    mov al,[esi+03h]
    and al,11111100b
    bt dword ptr [edi+ecx],00h
    sbb al,00h
    mov eax,00000000h
    cmovc eax,edx
    add [esi],eax
    add ecx,01h
    lea esi,[esi+04h]
    jnz @increase_double_markeds
    @step5row:
    add ebx,01h
    jnz @nx5row
    jmp @@5TH_STEP
    popad
    sub esp,20h
    @@DECIDE_NEXT_STEP:
    mov edx,0FFFFFF00h
    lea eax,[ebx+ebp]
    add dl,[edi+eax]
    jnz @@3RD_STEP
    @@4TH_STEP:
    mov edx,[esp+_SAVE+__MTX]
    @colon_to_star:
    mov [edi+eax],cl
    add ecx,ebp
    mov byte ptr [esi+03h],01h
    xor eax,eax
    lea esi,[edx+ecx*04h]
    shl ecx,02h
    and byte ptr [edi+ebx],11111101b
    sub eax,ebp
    @search_star_in_column:
    test byte ptr [esi+03h],01h
    jz @nxstar
    cmp eax,ebx
    jnz @0_star
    @nxstar:
    add eax,01h
    lea esi,[esi+ebp*04h]
    jnz @search_star_in_column
    jmp @@1ST_STEP
    @0_star:
    mov ebx,eax
    mov byte ptr [esi+03h],00h
    add eax,ebp
    sub esi,ecx
    xor ecx,ecx
    mov byte ptr [edi+eax],00h
    sub ecx,ebp
    @search_colon_in_row:
    test byte ptr [esi+03h],02h
    jnz @colon_to_star
    add ecx,01h
    lea esi,[esi+04h]
    jnz @search_colon_in_row
    @error:
    nop
    @@1ST_STEP:
    xor ebx,ebx
    xor eax,eax
    add dword ptr [esp+_SAVE+__SYS0],01h
    jz @count_result_STACK
    sub ebx,ebp
    mov cl,[edi+00h]
    jmp @nxclear
    @clear_colon:
    and byte ptr [esi+03h],11111101b
    add eax,01h
    lea esi,[esi+04h]
    jnz @clear_colon
    @nxclear:
    sub eax,ebp
    @markedrow:
    test byte ptr [edi+ebx],02h
    mov esi,edx
    mov byte ptr [edi+ebx],00h
    jnz @clear_colon
    add ebx,01h
    lea edx,[edx+ebp*04h]
    jnz @markedrow
    @markcol:
    movsx edx,byte ptr [edi+ebx]
    add eax,01h
    lea ebx,[ebx+01h]
    mov byte ptr [edi+edx],01h
    jnz @markcol
    mov [edi+00h],cl
    jmp @@2ND_STEP
    @count_result_STACK:
    { EDI -> MARKS memory end
    EBP: row/column quantity
    add esp,_SAVE
    xor ecx,ecx
    xor eax,eax
    mov esi,[esp+__SAVE]
    mov ebx,[esp+__MARKS]
    add esp,20h
    @results:
    movsx edx,byte ptr [edi+ecx]
    lea ecx,[ecx+01h]
    add edx,ebp
    add eax,[esi+edx*04h]
    cmp ecx,ebp
    mov [ebx],dl
    lea esi,[esi+ebp*04h]
    lea ebx,[ebx+01h]
    jnz @results

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Aktív témák