2012-08-16 84 views
5

为了尝试学习ARM汇编程序,我编写了一个简单的测试项目,使用内联汇编和NEON指令执行图像缩减。经过一番努力,我设法得到它的工作,快乐的日子为什么铿锵优化打破我的内联汇编代码?

https://github.com/rmaz/NEON-Image-Downscaling

:你可以在这里看到它。除了它只适用于小于-O2的优化级别。我查看了生成的ASM,但是我看不出为什么会出现这种情况。谁能提供任何见解?这里负责联汇编部分的功能:

static void inline resizeRow(uint32_t *dst, uint32_t *src, uint32_t pixelsPerRow) 
{ 
    const uint32_t * rowB = src + pixelsPerRow; 

    // force the number of pixels per row to a mutliple of 8 
    pixelsPerRow = 8 * (pixelsPerRow/8);  

    __asm__ volatile("Lresizeloop:      \n" // start loop 
        "vld1.32  {d0-d3}, [%1]!  \n" // load 8 pixels from the top row 
        "vld1.32  {d4-d7}, [%2]!  \n" // load 8 pixels from the bottom row 
        "vhadd.u8  q0, q0, q2   \n" // average the pixels vertically 
        "vhadd.u8  q1, q1, q3   \n" 
        "vtrn.32  q0, q2    \n" // transpose to put the horizontally adjacent pixels in different registers 
        "vtrn.32  q1, q3    \n" 
        "vhadd.u8  q0, q0, q2   \n" // average the pixels horizontally 
        "vhadd.u8  q1, q1, q3   \n" 
        "vtrn.32  d0, d1    \n" // fill the registers with pixels 
        "vtrn.32  d2, d3    \n" 
        "vswp   d1, d2    \n" 
        "vst1.64  {d0-d1}, [%0]!  \n" // store the result 
        "subs   %3, %3, #8   \n" // subtract 8 from the pixel count 
        "bne   Lresizeloop   \n" // repeat until the row is complete 
        : "=r"(dst), "=r"(src), "=r"(rowB), "=r"(pixelsPerRow) 
        : "0"(dst), "1"(src), "2"(rowB), "3"(pixelsPerRow) 
        : "q0", "q1", "q2", "q3" 
        ); 
} 

运作产生的输出在O1周围函数和循环如下:

.align 2 
    .code 16      @ @"\01-[BDPViewController downscaleImageNeon:]" 
    .thumb_func "-[BDPViewController downscaleImageNeon:]" 
"-[BDPViewController downscaleImageNeon:]": 
    .cfi_startproc 
Lfunc_begin4: 
    .loc 1 86 0     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:0 
@ BB#0: 
    .loc 1 86 1 prologue_end  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:1 
    push {r4, r5, r6, r7, lr} 
    add r7, sp, #12 
    push.w {r8, r10, r11} 
    sub sp, #20 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R2+0 
    .loc 1 88 20     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:88:20 
Ltmp41: 
    movw r0, :lower16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4)) 
Ltmp42: 
    mov r6, r2 
Ltmp43: 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R6+0 
    movt r0, :upper16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4)) 
LPC4_0: 
    add r0, pc 
    ldr.w r11, [r0] 
    mov r0, r6 
    blx _objc_retain 
    mov r4, r0 
    mov r0, r6 
    mov r1, r11 
Ltmp44: 
    blx _objc_msgSend 
    blx _CGImageGetWidth 
    mov r5, r0 
Ltmp45: 
    @DEBUG_VALUE: width <- R5+0 
    .loc 1 89 21     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:89:21 
    mov r0, r6 
    mov r1, r11 
    str r5, [sp, #16]   @ 4-byte Spill 
    blx _objc_msgSend 
    blx _CGImageGetHeight 
    mov r10, r0 
Ltmp46: 
    @DEBUG_VALUE: height <- R10+0 
    .loc 1 90 26     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:90:26 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    blx _CGImageGetBytesPerRow 
    str r0, [sp, #12]   @ 4-byte Spill 
Ltmp47: 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    .loc 1 91 35     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:91:35 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    blx _CGImageGetAlphaInfo 
    str r0, [sp, #4]   @ 4-byte Spill 
Ltmp48: 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    .loc 1 94 45     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    mov r6, r0 
Ltmp49: 
    mov r0, r4 
    blx _objc_release 
    mov r0, r6 
    .loc 1 98 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29 
    mul r8, r10, r5 
Ltmp50: 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    .loc 1 94 45     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45 
    blx _CGImageGetDataProvider 
    blx _CGDataProviderCopyData 
Ltmp51: 
    @DEBUG_VALUE: data <- R0+0 
    str r0, [sp, #8]   @ 4-byte Spill 
Ltmp52: 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    .loc 1 95 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:95:29 
    blx _CFDataGetBytePtr 
    mov r4, r0 
Ltmp53: 
    @DEBUG_VALUE: buffer <- R4+0 
    .loc 1 98 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29 
    lsr.w r0, r8, #2 
    movs r1, #4 
    blx _calloc 
    mov r5, r0 
Ltmp54: 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    mov r0, r10 
Ltmp55: 
    @DEBUG_VALUE: height <- R0+0 
    .loc 1 101 29    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    cmp r0, #0 
Ltmp56: 
    @DEBUG_VALUE: rowIndex <- 0+0 
    beq LBB4_3 
@ BB#1:         @ %.lr.ph 
Ltmp57: 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: buffer <- R4+0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    @DEBUG_VALUE: rowIndex <- 0+0 
    ldr r1, [sp, #12]   @ 4-byte Reload 
Ltmp58: 
    @DEBUG_VALUE: bytesPerRow <- R1+0 
    mov.w r8, #0 
    lsl.w r11, r1, #1 
    .loc 1 104 74    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:104:74 
Ltmp59: 
    lsr.w r10, r1, #1 
Ltmp60: 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
LBB4_2:         @ =>This Inner Loop Header: Depth=1 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    @DEBUG_VALUE: rowIndex <- 0+0 
    lsr.w r1, r8, #1 
Ltmp61: 
    mov r6, r0 
Ltmp62: 
    @DEBUG_VALUE: height <- R6+0 
    mla r0, r1, r10, r5 
Ltmp63: 
    @DEBUG_VALUE: destRow <- R1+0 
    .loc 1 105 9     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:105:9 
    ldr r2, [sp, #16]   @ 4-byte Reload 
    mov r1, r4 
Ltmp64: 
    bl _resizeRow 
    mov r0, r6 
Ltmp65: 
    @DEBUG_VALUE: height <- R0+0 
    .loc 1 101 50    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:50 
    add.w r8, r8, #2 
Ltmp66: 
    @DEBUG_VALUE: rowIndex <- R8+0 
    .loc 1 101 29    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    add r4, r11 
    cmp r8, r0 
    blo LBB4_2 
Ltmp67: 
LBB4_3:         @ %._crit_edge 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    .loc 1 109 28    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:109:28 
    ldr r1, [sp, #4]   @ 4-byte Reload 
Ltmp68: 
    lsrs r2, r0, #1 
    str r1, [sp] 
    mov r6, r5 
Ltmp69: 
    @DEBUG_VALUE: outputBuffer <- R6+0 
    ldr r1, [sp, #16]   @ 4-byte Reload 
    ldr r0, [sp, #12]   @ 4-byte Reload 
Ltmp70: 
    lsrs r1, r1, #1 
    lsrs r3, r0, #1 
    mov r0, r5 
    bl _createBitmapContext 
    mov r4, r0 
Ltmp71: 
    @DEBUG_VALUE: context <- R4+0 
    .loc 1 110 30    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30 
    blx _CGBitmapContextCreateImage 
    .loc 1 111 66    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66 
    movw r1, :lower16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4)) 
    .loc 1 110 30    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30 
    mov r5, r0 
Ltmp72: 
    @DEBUG_VALUE: scaledImage <- R5+0 
    .loc 1 111 66    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66 
    movt r1, :upper16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4)) 
    movw r0, :lower16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4)) 
    movt r0, :upper16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4)) 
LPC4_1: 
    add r1, pc 
LPC4_2: 
    add r0, pc 
    mov r2, r5 
    ldr r1, [r1] 
    ldr r0, [r0] 
    blx _objc_msgSend 
Ltmp73: 
    @DEBUG_VALUE: returnImage <- R0+0 
    @ InlineAsm Start 
    mov r7, r7  @ marker for objc_retainAutoreleaseReturnValue 
    @ InlineAsm End 
    blx _objc_retainAutoreleasedReturnValue 
Ltmp74: 
    mov r8, r0 
    .loc 1 112 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:112:5 
    mov r0, r5 
    blx _CGImageRelease 
    .loc 1 113 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:113:5 
    mov r0, r4 
    blx _CGContextRelease 
    .loc 1 114 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:114:5 
    ldr r0, [sp, #8]   @ 4-byte Reload 
    blx _CFRelease 
    .loc 1 115 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:115:5 
    mov r0, r6 
    blx _free 
Ltmp75: 
    .loc 1 118 1     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:118:1 
    mov r0, r8 
    add sp, #20 
    pop.w {r8, r10, r11} 
    pop.w {r4, r5, r6, r7, lr} 
Ltmp76: 
    b.w _objc_autoreleaseReturnValue 
Ltmp77: 
Lfunc_end4: 
    .cfi_endproc 

    .align 2 
    .code 16      @ @resizeRow 
    .thumb_func _resizeRow 
_resizeRow: 
    .cfi_startproc 
Lfunc_begin5: 
    .loc 1 26 0     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:26:0 
@ BB#0: 
    @DEBUG_VALUE: resizeRow:dst <- R0+0 
    @DEBUG_VALUE: resizeRow:src <- R1+0 
    @DEBUG_VALUE: resizeRow:pixelsPerRow <- R2+0 
    .loc 1 27 47 prologue_end @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:27:47 
    add.w r3, r1, r2, lsl #2 
Ltmp78: 
    @DEBUG_VALUE: rowB <- R3+0 
    .loc 1 30 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:30:5 
    bic r2, r2, #7 
Ltmp79: 
    .loc 1 32 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5 
    @ InlineAsm Start 
    Lresizeloop:      
vld1.32  {d0-d3}, [r1]!  
vld1.32  {d4-d7}, [r3]!  
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  q0, q2    
vtrn.32  q1, q3    
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  d0, d1    
vtrn.32  d2, d3    
vswp   d1, d2    
vst1.64  {d0-d1}, [r0]!  
subs   r2, r2, #8   
bne   Lresizeloop   

    @ InlineAsm End 
Ltmp80: 
    .loc 1 51 1     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:51:1 
    bx lr 
Ltmp81: 
Lfunc_end5: 
    .cfi_endproc 

并在O2非运作输出是作为如下:

.align 2 
    .code 16      @ @"\01-[BDPViewController downscaleImageNeon:]" 
    .thumb_func "-[BDPViewController downscaleImageNeon:]" 
"-[BDPViewController downscaleImageNeon:]": 
    .cfi_startproc 
Lfunc_begin4: 
    .loc 1 86 0     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:0 
@ BB#0: 
    .loc 1 86 1 prologue_end  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:1 
    push {r4, r5, r6, r7, lr} 
    add r7, sp, #12 
    push.w {r8, r10, r11} 
    sub sp, #20 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R2+0 
    .loc 1 88 20     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:88:20 
Ltmp41: 
    movw r0, :lower16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4)) 
Ltmp42: 
    mov r6, r2 
Ltmp43: 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R6+0 
    movt r0, :upper16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4)) 
LPC4_0: 
    add r0, pc 
    ldr.w r11, [r0] 
    mov r0, r6 
    blx _objc_retain 
    mov r4, r0 
    mov r0, r6 
    mov r1, r11 
Ltmp44: 
    blx _objc_msgSend 
    blx _CGImageGetWidth 
    mov r5, r0 
Ltmp45: 
    @DEBUG_VALUE: width <- R5+0 
    .loc 1 89 21     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:89:21 
    mov r0, r6 
    mov r1, r11 
    str r5, [sp, #16]   @ 4-byte Spill 
    blx _objc_msgSend 
    blx _CGImageGetHeight 
    mov r10, r0 
Ltmp46: 
    @DEBUG_VALUE: height <- R10+0 
    .loc 1 90 26     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:90:26 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    blx _CGImageGetBytesPerRow 
    str r0, [sp, #12]   @ 4-byte Spill 
Ltmp47: 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    .loc 1 91 35     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:91:35 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    blx _CGImageGetAlphaInfo 
    str r0, [sp, #4]   @ 4-byte Spill 
Ltmp48: 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    .loc 1 94 45     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    mov r6, r0 
Ltmp49: 
    mov r0, r4 
    blx _objc_release 
    mov r0, r6 
    .loc 1 98 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29 
    mul r8, r10, r5 
Ltmp50: 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    .loc 1 94 45     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45 
    blx _CGImageGetDataProvider 
    blx _CGDataProviderCopyData 
Ltmp51: 
    @DEBUG_VALUE: data <- R0+0 
    str r0, [sp, #8]   @ 4-byte Spill 
Ltmp52: 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    .loc 1 95 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:95:29 
    blx _CFDataGetBytePtr 
    mov r4, r0 
Ltmp53: 
    @DEBUG_VALUE: buffer <- R4+0 
    .loc 1 98 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29 
    lsr.w r0, r8, #2 
    movs r1, #4 
    blx _calloc 
    mov r5, r0 
Ltmp54: 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    mov r0, r10 
Ltmp55: 
    @DEBUG_VALUE: height <- R0+0 
    .loc 1 101 29    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    cmp r0, #0 
Ltmp56: 
    @DEBUG_VALUE: rowIndex <- 0+0 
    beq LBB4_3 
@ BB#1:         @ %.lr.ph 
Ltmp57: 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: buffer <- R4+0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    @DEBUG_VALUE: rowIndex <- 0+0 
    ldr r1, [sp, #12]   @ 4-byte Reload 
Ltmp58: 
    @DEBUG_VALUE: bytesPerRow <- R1+0 
    mov.w r8, #0 
    lsl.w r11, r1, #1 
    .loc 1 104 74    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:104:74 
Ltmp59: 
    lsr.w r10, r1, #1 
Ltmp60: 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
LBB4_2:         @ =>This Inner Loop Header: Depth=1 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    @DEBUG_VALUE: rowIndex <- 0+0 
    lsr.w r1, r8, #1 
Ltmp61: 
    mov r6, r0 
Ltmp62: 
    @DEBUG_VALUE: height <- R6+0 
    mla r0, r1, r10, r5 
Ltmp63: 
    @DEBUG_VALUE: destRow <- R1+0 
    .loc 1 105 9     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:105:9 
    ldr r2, [sp, #16]   @ 4-byte Reload 
    mov r1, r4 
Ltmp64: 
    bl _resizeRow 
    mov r0, r6 
Ltmp65: 
    @DEBUG_VALUE: height <- R0+0 
    .loc 1 101 50    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:50 
    add.w r8, r8, #2 
Ltmp66: 
    @DEBUG_VALUE: rowIndex <- R8+0 
    .loc 1 101 29    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    add r4, r11 
    cmp r8, r0 
    blo LBB4_2 
Ltmp67: 
LBB4_3:         @ %._crit_edge 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    .loc 1 109 28    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:109:28 
    ldr r1, [sp, #4]   @ 4-byte Reload 
Ltmp68: 
    lsrs r2, r0, #1 
    str r1, [sp] 
    mov r6, r5 
Ltmp69: 
    @DEBUG_VALUE: outputBuffer <- R6+0 
    ldr r1, [sp, #16]   @ 4-byte Reload 
    ldr r0, [sp, #12]   @ 4-byte Reload 
Ltmp70: 
    lsrs r1, r1, #1 
    lsrs r3, r0, #1 
    mov r0, r5 
    bl _createBitmapContext 
    mov r4, r0 
Ltmp71: 
    @DEBUG_VALUE: context <- R4+0 
    .loc 1 110 30    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30 
    blx _CGBitmapContextCreateImage 
    .loc 1 111 66    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66 
    movw r1, :lower16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4)) 
    .loc 1 110 30    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30 
    mov r5, r0 
Ltmp72: 
    @DEBUG_VALUE: scaledImage <- R5+0 
    .loc 1 111 66    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66 
    movt r1, :upper16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4)) 
    movw r0, :lower16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4)) 
    movt r0, :upper16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4)) 
LPC4_1: 
    add r1, pc 
LPC4_2: 
    add r0, pc 
    mov r2, r5 
    ldr r1, [r1] 
    ldr r0, [r0] 
    blx _objc_msgSend 
Ltmp73: 
    @DEBUG_VALUE: returnImage <- R0+0 
    @ InlineAsm Start 
    mov r7, r7  @ marker for objc_retainAutoreleaseReturnValue 
    @ InlineAsm End 
    blx _objc_retainAutoreleasedReturnValue 
Ltmp74: 
    mov r8, r0 
    .loc 1 112 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:112:5 
    mov r0, r5 
    blx _CGImageRelease 
    .loc 1 113 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:113:5 
    mov r0, r4 
    blx _CGContextRelease 
    .loc 1 114 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:114:5 
    ldr r0, [sp, #8]   @ 4-byte Reload 
    blx _CFRelease 
    .loc 1 115 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:115:5 
    mov r0, r6 
    blx _free 
Ltmp75: 
    .loc 1 118 1     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:118:1 
    mov r0, r8 
    add sp, #20 
    pop.w {r8, r10, r11} 
    pop.w {r4, r5, r6, r7, lr} 
Ltmp76: 
    b.w _objc_autoreleaseReturnValue 
Ltmp77: 
Lfunc_end4: 
    .cfi_endproc 

    .align 2 
    .code 16      @ @resizeRow 
    .thumb_func _resizeRow 
_resizeRow: 
    .cfi_startproc 
Lfunc_begin5: 
    .loc 1 26 0     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:26:0 
@ BB#0: 
    @DEBUG_VALUE: resizeRow:dst <- R0+0 
    @DEBUG_VALUE: resizeRow:src <- R1+0 
    @DEBUG_VALUE: resizeRow:pixelsPerRow <- R2+0 
    .loc 1 27 47 prologue_end @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:27:47 
    add.w r3, r1, r2, lsl #2 
Ltmp78: 
    @DEBUG_VALUE: rowB <- R3+0 
    .loc 1 30 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:30:5 
    bic r2, r2, #7 
Ltmp79: 
    .loc 1 32 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5 
    @ InlineAsm Start 
    Lresizeloop:      
vld1.32  {d0-d3}, [r1]!  
vld1.32  {d4-d7}, [r3]!  
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  q0, q2    
vtrn.32  q1, q3    
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  d0, d1    
vtrn.32  d2, d3    
vswp   d1, d2    
vst1.64  {d0-d1}, [r0]!  
subs   r2, r2, #8   
bne   Lresizeloop   

    @ InlineAsm End 
Ltmp80: 
    .loc 1 51 1     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:51:1 
    bx lr 
Ltmp81: 
Lfunc_end5: 
    .cfi_endproc 
+1

为什么不发布生成的代码? – 2012-08-16 13:20:37

+0

这两个看起来完全相同。这是编译器的汇编输出吗?尝试使用objdump从两个不同编译的二进制文件中获取程序集。 – auselen 2012-08-17 21:49:39

回答

13

这里的汇编代码我从-O2 Xcode项目获得的片段。 (与-O1大厦不打扰到内联函数,所以我并不感到惊讶,它工作正常。)

Ltmp55: 
    @DEBUG_VALUE: rowIndex <- R3+0 
    .loc 1 101 29    @ /tmp/NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    add r8, r12 
    cmp r3, r11 
    .loc 1 32 5     @ /tmp/NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5 
Ltmp56: 
    @ InlineAsm Start 
    Lresizeloop:      
vld1.32  {d0-d3}, [r4]!  
vld1.32  {d4-d7}, [r5]!  
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  q0, q2    
vtrn.32  q1, q3    
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  d0, d1    
vtrn.32  d2, d3    
vswp   d1, d2    
vst1.64  {d0-d1}, [r6]!  
subs   r2, r2, #8   
bne   Lresizeloop   

    @ InlineAsm End 
Ltmp57: 
    blo LBB2_2 

请参阅上最后一行blo(分支如果-更低)的指令?它使用汇编块顶部的cmp r3, r11设置的条件代码。但是当然你的内联汇编代码已经完全抛弃了条件代码寄存器。那么这是一个编译器错误?... 不!你只是忘了告诉编译器你的内联汇编代码破坏了条件代码。替换

    : "=r"(dst), "=r"(src), "=r"(rowB), "=r"(pixelsPerRow) 
       : "0"(dst), "1"(src), "2"(rowB), "3"(pixelsPerRow) 
       : "q0", "q1", "q2", "q3" 
       ); 

    : "=r"(dst), "=r"(src), "=r"(rowB), "=r"(pixelsPerRow) 
       : "0"(dst), "1"(src), "2"(rowB), "3"(pixelsPerRow) 
       : "q0", "q1", "q2", "q3", "cc" 
       ); 

和组件输出修正本身。我没有运行该应用程序,但我敢打赌,你会发现它现在好多了。 :)

+0

不错的一个quux,解决了这个问题,最高分。 – Tark 2012-08-30 21:36:18