Aktív témák

  • P.H.

    senior tag

    válasz P.H. #63 üzenetére

    Még pár 10 millió órajelnyi csökkentés benne volt Netburst-ön, a @down ciklus kisebbre vételének (20 » 16 utasítás) köszönhetően.

    Utasításadatok a 2 konkrét gépre immár az AIDA64 Instruction Dump-nak megfelelően (Northwood és Prescott):

    mov eax,[esi-08h+__PRIO] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    mov ebx,[esi+__PACKADAT2] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    or edx,-1 // (1) d/d p0 LOGIC (1) 1/d p0 LOGIC
    cmp eax,[esp+__STOPINDEX] // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    mov ebp,[esi+edi*08h+__PRIO] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    mov ebx,[ebx+eax*04h] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    jz @finish // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    mov [esi+eax*08h+__CONN],eax // (2) 1/2 p0+3 STORE+STA (2) 1/2 p0+3 STORE+STA
    sub edi,edx // (1) d/d p01 ALU (1) 1/d p01 ALU
    jg @finish // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    mov [esi+__RELAXED],eax // (1) 1/2 p0 STORE (1) 1/2 p0 STORE
    jz @block // (1) 0/1 p0 BRANCH (1) 0/1 p0 BRANCH
    prefetchnta [ebx] // (6) 6/6 p2 LOAD (1) 1/1 p2 LOAD
    mov ebp,[esi+ebp*08h+__DIST] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    mov [esi+__HEADER],ebx // (1) 1/2 p0 STORE (1) 1/2 p0 STORE
    @down: //
    lea eax,[edx+edx] // (1) d/d p01 ALU (1) 1/d p01 ALU
    mov ecx,[esi+eax*08h-08h+__PRIO] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    cmp eax,edi // (1) d/d p01 ALU (1) 1/d p01 ALU
    jl @insertdown // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    mov ebx,[esi+eax*08h+00h+__PRIO] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    mov ecx,[esi+ecx*08h+__DIST] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    jz @child // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    cmp ecx,[esi+ebx*08h+__DIST] // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    sbb eax,00h // (3) 7/7 p1 ALU 1 (3) 10/10 p1 ALU 1
    mov ebx,[esi+eax*08h+__PRIO] // {1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    @child: //
    cmp ebp,[esi+ebx*08h+__DIST] // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    @insertdown: //
    cmovbe ebx,[esi+edi*08h-08h+__PRIO] // (4) 6/1 p01+2 ALU+LOAD (4) 10/3 p01+2 ALU+LOAD
    mov [esi+edx*08h+__PRIO],ebx // (2) 1/2 p0+3 STORE+STA (2) 1/2 p0+3 STORE+STA
    mov [esi+ebx*08h+__CONN],edx // (2) 1/2 p0+3 STORE+STA (2) 1/2 p0+3 STORE+STA
    mov edx,eax // (1) d/d p01 ALU (1) 1/d p01 ALU
    jnbe @down // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    mov ebx,[esi+__HEADER] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    @block: //
    movzx eax,byte ptr [ebx+STRUCT0.FIELD0] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    movzx ecx,byte ptr [ebx+STRUCT0.FIELD1] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    add ebx,STRUCT0SIZE-STRUCT1SIZE // (1) d/d p01 ALU (1) 1/d p01 ALU
    cmp byte ptr [esp+WORKAREA0+eax],00h // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    jz @entry // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    mov [esi+__HEADER],ecx // (1) 1/2 p0 STORE (2) 1/2 p0 STORE
    @connects: //
    sub dword ptr [esi+__HEADER],01h // (3) 9/2 p01+2 ALU+LOAD+STORE (3) 5/2 p01+2 ALU+LOAD+STO
    js @entry // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    mov ecx,[ebx+STRUCT1SIZE+STRUCT1.FIELD0] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    add ebx,STRUCT1SIZE // (1) d/d p01 ALU (1) 1/d p01 ALU
    movzx edx,byte ptr [ebx+STRUCT1.FIELD1] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    cmp dword ptr [esi+ecx*08h+__CONN],00h // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    jg @connects // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    @label1: //
    mov al,[ebx+STRUCT1.FIELD2] // (1) 3/1 p2 LOAD (1) 5/1 p2 LOAD
    mov ebp,[ebx+STRUCT1.FIELD3] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    or al,[ebx+STRUCT1.FIELD4] // (2) 3/1 p0+2 LOGIC+LOAD (2) 5/1 p0+2 LOGIC+LOAD
    and al,[esp+WORKAREA1+edx] // (2) 3/1 p0+2 LOGIC+LOAD (2) 5/1 p0+2 LOGIC+LOAD
    cmp edx,11 // (1) d/d p01 ALU (1) 1/d p01 ALU
    mov edx,10000*1000 // (1) d/d p01 ALU (1) 1/d p01 ALU
    cmovnz edx,ebp // (3) 6/1 p01 ALU (3) 10/d p01 ALU
    add edx,ebp // (1) d/d p01 ALU (1) 1/d p01 ALU
    cmp al,00h // (1) d/d p01 ALU (1) 1/d p01 ALU
    movzx eax,byte ptr [ebx+STRUCT1.FIELD5] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    cmovnz ebp,edx // (3) 6/1 p01 ALU (3) 10/d p01 ALU
    @label2: //
    cmp al,0FFh // (1) d/d p01 ALU (1) 1/d p01 ALU
    jnz @label4 // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    @label3: //
    mov al,[ebx+STRUCT1.FIELD6] // (1) 2/1 p2 LOAD (1) 5/1 p2 LOAD
    lea edx,[ebp+1000*1000] // (1) d/d p01 ALU (1) 1/d p01 ALU
    cmp al,[esp+ARGUMENT1] // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    mov eax,[esi+__RELAXED] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    cmova edx,ebp // (3) 6/1 p01 ALU (3) 10/d p01 ALU
    sub ebp,ebp // (1) d/d p01 ALU (1) 1/d p01 ALU
    cmp ebp,[esi+ecx*08h+__CONN] // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    rcl ebp,01h // (1) 4/4 p1 ALU 1 (1) 7/7 p1 ALU 1
    sub ebp,01h // (1) d/d p01 ALU (1) 1/d p01 ALU
    add edx,[esi+eax*08h+__DIST] // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    or ebp,edx // (1) d/d p0 LOGIC (1) 1/d p0 LOGIC
    cmp [esi+ecx*08h+__DIST],ebp // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    jle @connects // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    mov ebp,[esi+ecx*08h+__CONN] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    mov [esi+ecx*08h+__DIST],edx // (2) 1/2 p0+3 STORE+STA (2) 1/2 p0+3 STORE+STA
    neg ecx // (1) d/d p0 LOGIC (1) 1/d p0 LOGIC
    cmp ebp,00h // (1) d/d p01 ALU (1) 1/d p01 ALU
    mov [esi+ecx*08h+__PREV],eax // (2) 1/2 p0+3 STORE+STA (2) 1/2 p0+3 STORE+STA
    jnz @moveup // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    sub edi,01h // (1) d/d p01 ALU (1) 1/d p01 ALU
    mov ebp,edi // (1) d/d p01 ALU (1) 1/d p01 ALU
    @moveup: //
    mov eax,ebp // (1) d/d p01 ALU (1) 1/d p01 ALU
    sar ebp,01h // (1) 4/1 p1 MMX_SHIFT (1) 1/d p1 SHIFT
    mov ecx,[esi+ebp*08h+__PRIO] // (1) 2/1 p2 LOAD (1) 4/1 p2 LOAD
    cmp eax,-2 // (1) d/d p01 ALU (1) 1/d p01 ALU
    ja @insertup // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    cmp edx,[esi+ecx*08h+__DIST] // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    @insertup: //
    cmovae ecx,[ebx+STRUCT1.FIELD0] // (4) 6/1 p01+2 ALU+LOAD (4) 10/3 p01+2 ALU+LOAD
    mov [esi+eax*08h+__PRIO],ecx // (2) 1/2 p0+3 STORE+STA (2) 1/2 p0+3 STORE+STA
    mov [esi+ecx*08h+__CONN],eax // (2) 1/2 p0+3 STORE+STA (2) 1/2 p0+3 STORE+STA
    jnae @moveup // (1) 0/4 p0 BRANCH (1) 0/4 p0 BRANCH
    jmp @connects // (1) 0/1 p0 BRANCH (1) 0/1 p0 BRANCH
    @label4: //
    lea edx,[eax+eax*04h] // (2) 4/1 p01 ALU (2) 2/1 p01 ALU
    add edx,edx // (1) d/d p01 ALU (1) d/d p01 ALU
    sub eax,100 // (1) d/d p01 ALU (1) 1/d p01 ALU
    cmovbe eax,edx // (3) 6/1 p01 ALU (3) 10/3 p01 ALU
    cmp eax,[esp+ARGUMENT1] // (2) 3/1 p01+2 ALU+LOAD (2) 5/1 p01+2 ALU+LOAD
    lea edx,[ebp+1000*1000] // (1) d/d p01 ALU (1) 1/d p01 ALU
    cmovna ebp,edx // (3) 6/1 p01 ALU (3) 10/d p01 ALU
    jmp @label3 // (1) 0/1 p0 BRANCH (1) 0/1 p0 BRANCH

    [ Szerkesztve ]

    Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙

Aktív témák