Aktív témák

  • P.H.

    senior tag

    Macro-op fusion

    The Sandy Bridge can fuse two instructions into one µop in more cases than previous processors can [...].

    The decoders will fuse an arithmetic or logic instruction and a subsequent conditional jump instruction into a single compute-and-branch µop in certain cases. The compute-and-branch µop is not split in two at the execution units but executed as a single µop by the branch unit at execution port 5.
    The CMP, ADD and SUB instructions can fuse with signed and unsigned branch instructions. INC and DEC can fuse with signed branch instructions, and TEST and AND instructions can fuse with all branch instructions (including useless combinations), [...]
    The first instruction can have an immediate operand or a memory source operand, but not both. It cannot have memory destination operand.

    Megcsillagoztam azokat az utasításpárokat, amelyek elméletileg egysíthetők, nem véve figyelembe a szükséges 3 utasításos teret köztük (egyrészt az elágazás-tévesztések miatt, másrészt nem nagyon lehet előre kiszámolni, mi tartozik decode-nál ugyanabba a 4-es utasításcsoportba):

    The programmer should keep fuseable arithmetic instructions together with a subsequent conditional jump rather than scheduling other instructions in-between; and there should preferably be at least three other instructions between one fuseable pair and the next fuseable pair in order to take advantage of macro-op fusion.

    Az ADD és SUB bevonása a Sandy Bridge-nél a macro-op fusion-be igen hatékony lépés volt az Intel-től.

    mov eax,edi
    pushad
    shl ebp,02h
    xor ecx,ecx
    lea edx,[ebp+ebp*02h]
    lea edi,[ebx+ebp]
    neg ebp
    @mark0:
    sub edx,04h
    mov [ebx+edx],ecx
    jg @mark0
    mov byte ptr [edi+00h],01h
    @@REDUCE_ROWS:
    mov ebx,ebp
    @rowmin:
    mov esi,02000000h
    mov ecx,ebp
    xor edx,edx
    @findrowmin:
    cmp esi,[eax]
    cmovz edx,ecx
    cmova esi,[eax]
    add eax,04h
    * add ecx,04h
    * jnz @findrowmin
    sub ecx,ebp
    * cmp esi,02000000h
    * jz @specific
    add eax,ebp
    @subrow:
    xor edx,edx
    cmp byte ptr [eax+03h],00h
    cmovz edx,esi
    sub [eax],edx
    add eax,04h
    * sub ecx,04h
    * jnz @subrow
    * add ebx,04h
    * jnz @rowmin
    jmp @columns
    @specific:
    cmp byte ptr [edi+edx],00h
    mov byte ptr [edi+edx],01h
    jnz @@ABNORMAL_EXIT
    add ecx,ebx
    sub dword ptr [esp+__SYS0],01h
    mov byte ptr [edi+ebx+02h],01h
    mov [edi+ecx*02h+__0STAR],edx
    jz @count_result_STACK
    * add ebx,04h
    * jnz @rowmin
    @columns:
    mov [edi+00h],bl
    @@RECUDE_COLUMNS:
    sub ebx,04h
    sub eax,04h
    * cmp ebx,ebp
    * jl @@2ND_STEP
    test byte ptr [edi+ebx],01h
    jnz @@RECUDE_COLUMNS
    mov esi,02000000h
    mov ecx,ebp
    @findcolmin:
    cmp esi,[eax]
    cmova esi,[eax]
    add eax,ebp
    * add ecx,04h
    * jnz @findcolmin
    cmp esi,02000000h
    lea ecx,[ebp-04h]
    jz @@ABNORMAL_EXIT
    @subcol:
    xor edx,edx
    * add ecx,04h
    * jz @@RECUDE_COLUMNS
    sub eax,ebp
    cmp [eax+03h],dl
    cmovz edx,esi
    * sub [eax],edx
    * jnz @subcol
    mov dl,[edi+ecx+02h]
    mov byte ptr [edi+ecx+02h],01h
    or dl,[edi+ebx]
    mov edx,ecx
    jnz @subcol
    mov byte ptr [edi+ebx],01h
    sub edx,ebp
    mov byte ptr [edi+ecx+02h],01h
    sub dword ptr [esp+__SYS0],01h
    mov [edi+edx*02h+__0STAR],ebx
    jnz @subcol
    jmp @count_result_STACK
    @@ABNORMAL_EXIT:
    add esp,20h
    xor eax,eax
    mov edx,7FFFFFFFh
    stc
    ret

    @@3RD_STEP:
    mov byte ptr [edi+ebx+03h],0FFh
    mov byte ptr [edi+edx],00h
    mov [edi+eax*02h+__COLON],ecx
    @@2ND_STEP:
    lea ecx,[ebp-04h]
    mov edx,00FFFFFFh
    jmp @c2col
    @zeroincol:
    cmp edx,[esi]
    mov bl,[edi+eax+03h]
    sbb bl,00h
    jz @@DECIDE_NEXT_STEP
    @nx2mtx:
    sub esi,ebp
    * add eax,04h
    * jnz @zeroincol
    @c2col:
    mov esi,ecx
    add esi,[esp+__MTX]
    sub esi,ebp
    @check2col:
    add esi,04h
    * add ecx,04h
    * jz @@5TH_STEP
    cmp byte ptr [edi+ecx],00h
    mov eax,ebp
    jnz @check2col
    jmp @zeroincol
    @@5TH_STEP:
    lea ebx,[ebp+03h]
    mov esi,[esp+__MTX]
    @nx5row:
    mov eax,[edi+ebx-03h]
    sub ecx,edx
    xor eax,edx
    cmovs edx,ecx
    mov ecx,ebp
    @decrease_row_free:
    bt dword ptr [edi+ecx],00h
    mov al,[esi+03h]
    adc al,[edi+ebx]
    mov eax,00000000h
    cmovz eax,edx
    sub [esi],eax
    add esi,04h
    * add ecx,04h
    * jnz @decrease_row_free
    * add ebx,04h
    * js @nx5row
    mov eax,[esp+__FREE0]
    xor edx,edx
    mov esi,eax
    sub eax,[esp+__MTX]
    idiv ebp
    neg eax
    lea ecx,[ebp+edx]
    lea eax,[ebp+eax*04h]
    @@DECIDE_NEXT_STEP:
    xor edx,edx
    mov [esp+__FREE0],esi
    * add edx,[esi]
    * jnz @nx2mtx
    mov ebx,eax
    sub eax,ebp
    * add edx,[edi+eax*02h+__0STAR]
    * jnz @@3RD_STEP
    @@4TH_STEP:
    sub edx,ebp
    jmp @newstar
    @0_star:
    mov [edi+ebx*02h+__0STAR],ecx
    mov ecx,[edi+eax*02h+__COLON]
    @newstar:
    mov ebx,eax
    lea eax,[edx-04h]
    @starincol:
    * cmp [edi+eax*02h+__0STAR],ecx
    * jz @0_star
    * sub eax,04h
    * jns @starincol
    mov [edi+ebx*02h+__0STAR],ecx
    @@1ST_STEP:
    sub dword ptr [esp+__SYS0],01h
    mov ebx,edi
    mov ecx,ebp
    jz @count_result_STACK
    mov edx,[edi]
    @restructure:
    mov esi,[ebx+__0STAR]
    mov byte ptr [edi+ecx+03h],00h
    add ebx,08h
    mov byte ptr [edi+esi],01h
    * add ecx,04h
    * jnz @restructure
    mov [edi],edx
    jmp @@2ND_STEP
    @count_result_STACK:
    xor ecx,ecx
    neg ebp
    xor eax,eax
    mov esi,[esp+__SAVE]
    mov ebx,[esp+__MARKS]
    add esp,20h
    @results:
    mov edx,[edi+ecx*02h+__0STAR]
    add ecx,04h
    add edx,ebp
    add eax,[esi+edx]
    shr edx,02h
    add esi,ebp
    cmp ecx,ebp
    mov [ebx],dl
    lea ebx,[ebx+01h]
    jnz @results

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Aktív témák