| |
| # ============================================================================ |
| # bandwidth 0.23, a benchmark to estimate memory transfer bandwidth. |
| # ARM assembly module. |
| # Copyright (C) 2010 by Zack T Smith. |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation; either version 2 of the License, or |
| # (at your option) any later version. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program; if not, write to the Free Software |
| # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| # |
| # The author may be reached at fbui@comcast.net. |
| # ============================================================================= |
| |
| #-------------- |
| # Version 0.7 |
| #-------------- |
| |
| .arch armv5t |
| .fpu softvfp |
| |
| .section code |
| |
| .text |
| .align 2 |
| |
| .global Writer |
| .global RandomWriter |
| |
| .global Reader |
| .global RandomReader |
| |
| .global RegisterToRegister |
| .global StackReader |
| .global StackWriter |
| |
| #----------------------------------------------------------------------------- |
| # Name: Writer |
| # Purpose: Performs sequential write into memory, as fast as possible. |
| # Params: |
| # r0 = address |
| # r1 = length, multiple of 256 |
| # r2 = count |
| # r3 = value to write |
| #----------------------------------------------------------------------------- |
| Writer: |
| stmfd sp!,{r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} |
| |
| # r4 = temp |
| # r5 = temp |
| |
| and r1, #0xffffff80 |
| mov r4, r0 |
| mov r5, r1 |
| |
| mov r6, r3 |
| mov r7, r3 |
| mov r8, r3 |
| mov r9, r3 |
| mov r10, r3 |
| mov r11, r3 |
| mov r12, r3 |
| |
| .L0: |
| mov r0, r4 |
| mov r1, r5 |
| |
| .L1: |
| # Does 64 transfers, 4 bytes each = 256 bytes total. |
| # The "stmia" instruction automatically increments r0. |
| stmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| stmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| stmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| stmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| stmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| stmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| stmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| stmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| |
| sub r1, #256 |
| cmp r1, #0 |
| bne .L1 |
| |
| sub r2, #1 |
| cmp r2, #0 |
| bne .L0 |
| |
| # return. |
| ldmfd sp!,{r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} |
| |
| #----------------------------------------------------------------------------- |
| # Name: Reader |
| # Purpose: Performs sequential reads from memory, as fast as possible. |
| # Params: |
| # r0 = address |
| # r1 = length, multiple of 256 |
| # r2 = count |
| #----------------------------------------------------------------------------- |
| Reader: |
| stmfd sp!,{r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} |
| |
| # r3 = temp |
| |
| and r1, #0xffffff80 |
| mov r4, r0 |
| mov r5, r1 |
| |
| .L2: |
| mov r0, r4 |
| mov r1, r5 |
| |
| .L3: |
| # Does 64 transfers, 4 bytes each = 256 bytes total. |
| # The "ldmia" instruction automatically increments r0. |
| |
| ldmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| ldmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| ldmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| ldmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| ldmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| ldmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| ldmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| ldmia r0!, { r3, r6, r7, r8, r9, r10, r11, r12 } |
| |
| sub r1, #256 |
| cmp r1, #0 |
| bne .L3 |
| |
| sub r2, #1 |
| cmp r2, #0 |
| bne .L2 |
| |
| # return. |
| ldmfd sp!,{r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} |
| |
| #----------------------------------------------------------------------------- |
| # Name: RandomWriter |
| # Purpose: Performs random write into memory, as fast as possible. |
| # Params: |
| # r0 = pointer to array of chunk pointers |
| # r1 = # of 256-byte chunks |
| # r2 = # loops to do |
| # r3 = value to write |
| #----------------------------------------------------------------------------- |
| RandomWriter: |
| stmfd sp!,{r4, r5, lr} |
| |
| # r4 = temp |
| # r5 = temp |
| |
| .L4: |
| mov r5, #0 |
| |
| .L5: |
| # Get pointer to chunk in memory. |
| ldr r4, [r0, r5, LSL #2] |
| |
| # Does 64 transfers, 4 bytes each = 256 bytes total. |
| |
| str r3, [r4, #160] |
| str r3, [r4, #232] |
| str r3, [r4, #224] |
| str r3, [r4, #96] |
| str r3, [r4, #164] |
| str r3, [r4, #76] |
| str r3, [r4, #100] |
| str r3, [r4, #220] |
| str r3, [r4, #248] |
| str r3, [r4, #104] |
| str r3, [r4, #4] |
| str r3, [r4, #136] |
| str r3, [r4, #112] |
| str r3, [r4, #200] |
| str r3, [r4, #12] |
| str r3, [r4, #128] |
| str r3, [r4, #148] |
| str r3, [r4, #196] |
| str r3, [r4, #216] |
| str r3, [r4] |
| str r3, [r4, #84] |
| str r3, [r4, #140] |
| str r3, [r4, #204] |
| str r3, [r4, #184] |
| str r3, [r4, #124] |
| str r3, [r4, #48] |
| str r3, [r4, #64] |
| str r3, [r4, #212] |
| str r3, [r4, #240] |
| str r3, [r4, #236] |
| str r3, [r4, #24] |
| str r3, [r4, #252] |
| str r3, [r4, #68] |
| str r3, [r4, #20] |
| str r3, [r4, #72] |
| str r3, [r4, #32] |
| str r3, [r4, #28] |
| str r3, [r4, #52] |
| str r3, [r4, #244] |
| str r3, [r4, #180] |
| str r3, [r4, #80] |
| str r3, [r4, #60] |
| str r3, [r4, #8] |
| str r3, [r4, #56] |
| str r3, [r4, #208] |
| str r3, [r4, #228] |
| str r3, [r4, #40] |
| str r3, [r4, #172] |
| str r3, [r4, #120] |
| str r3, [r4, #176] |
| str r3, [r4, #108] |
| str r3, [r4, #132] |
| str r3, [r4, #16] |
| str r3, [r4, #44] |
| str r3, [r4, #92] |
| str r3, [r4, #168] |
| str r3, [r4, #152] |
| str r3, [r4, #156] |
| str r3, [r4, #188] |
| str r3, [r4, #36] |
| str r3, [r4, #88] |
| str r3, [r4, #116] |
| str r3, [r4, #192] |
| str r3, [r4, #144] |
| |
| add r5, #1 |
| cmp r5, r1 |
| bne .L5 |
| |
| sub r2, #1 |
| cmp r2, #0 |
| bne .L4 |
| |
| # return. |
| ldmfd sp!,{r4, r5, pc} |
| |
| #----------------------------------------------------------------------------- |
| # Name: RandomReader |
| # Purpose: Performs random reads from memory, as fast as possible. |
| # Params: |
| # r0 = pointer to array of chunk pointers |
| # r1 = # of 256-byte chunks |
| # r2 = # loops to do |
| #----------------------------------------------------------------------------- |
| RandomReader: |
| stmfd sp!,{r4, r5, lr} |
| |
| # r3 = temp |
| # r4 = temp |
| # r5 = temp |
| |
| .L6: |
| mov r5, #0 |
| |
| .L7: |
| # Get pointer to chunk in memory. |
| ldr r4, [r0, r5, LSL #2] |
| |
| # Does 64 transfers, 4 bytes each = 256 bytes total. |
| |
| ldr r3, [r4, #160] |
| ldr r3, [r4, #232] |
| ldr r3, [r4, #224] |
| ldr r3, [r4, #96] |
| ldr r3, [r4, #164] |
| ldr r3, [r4, #76] |
| ldr r3, [r4, #100] |
| ldr r3, [r4, #220] |
| ldr r3, [r4, #248] |
| ldr r3, [r4, #104] |
| ldr r3, [r4, #4] |
| ldr r3, [r4, #136] |
| ldr r3, [r4, #112] |
| ldr r3, [r4, #200] |
| ldr r3, [r4, #12] |
| ldr r3, [r4, #128] |
| ldr r3, [r4, #148] |
| ldr r3, [r4, #196] |
| ldr r3, [r4, #216] |
| ldr r3, [r4] |
| ldr r3, [r4, #84] |
| ldr r3, [r4, #140] |
| ldr r3, [r4, #204] |
| ldr r3, [r4, #184] |
| ldr r3, [r4, #124] |
| ldr r3, [r4, #48] |
| ldr r3, [r4, #64] |
| ldr r3, [r4, #212] |
| ldr r3, [r4, #240] |
| ldr r3, [r4, #236] |
| ldr r3, [r4, #24] |
| ldr r3, [r4, #252] |
| ldr r3, [r4, #68] |
| ldr r3, [r4, #20] |
| ldr r3, [r4, #72] |
| ldr r3, [r4, #32] |
| ldr r3, [r4, #28] |
| ldr r3, [r4, #52] |
| ldr r3, [r4, #244] |
| ldr r3, [r4, #180] |
| ldr r3, [r4, #80] |
| ldr r3, [r4, #60] |
| ldr r3, [r4, #8] |
| ldr r3, [r4, #56] |
| ldr r3, [r4, #208] |
| ldr r3, [r4, #228] |
| ldr r3, [r4, #40] |
| ldr r3, [r4, #172] |
| ldr r3, [r4, #120] |
| ldr r3, [r4, #176] |
| ldr r3, [r4, #108] |
| ldr r3, [r4, #132] |
| ldr r3, [r4, #16] |
| ldr r3, [r4, #44] |
| ldr r3, [r4, #92] |
| ldr r3, [r4, #168] |
| ldr r3, [r4, #152] |
| ldr r3, [r4, #156] |
| ldr r3, [r4, #188] |
| ldr r3, [r4, #36] |
| ldr r3, [r4, #88] |
| ldr r3, [r4, #116] |
| ldr r3, [r4, #192] |
| ldr r3, [r4, #144] |
| |
| add r5, #1 |
| cmp r5, r1 |
| bne .L7 |
| |
| sub r2, #1 |
| cmp r2, #0 |
| bne .L6 |
| |
| # return. |
| ldmfd sp!,{r4, r5, pc} |
| |
| #----------------------------------------------------------------------------- |
| # Name: RegisterToRegister |
| # Purpose: Performs register-to-register transfers. |
| # Params: |
| # r0 = count |
| #----------------------------------------------------------------------------- |
| RegisterToRegister: |
| stmfd sp!,{lr} |
| |
| # r1 = temp |
| |
| .L8: |
| # Does 64 transfers, 4 bytes each = 256 bytes total. |
| |
| mov r1, r2 |
| mov r1, r3 |
| mov r1, r4 |
| mov r1, r5 |
| mov r1, r6 |
| mov r1, r7 |
| mov r1, r8 |
| mov r1, r9 |
| |
| mov r2, r1 |
| mov r2, r3 |
| mov r2, r4 |
| mov r2, r5 |
| mov r2, r6 |
| mov r2, r7 |
| mov r2, r8 |
| mov r2, r9 |
| |
| mov r1, r2 |
| mov r1, r3 |
| mov r1, r4 |
| mov r1, r5 |
| mov r1, r6 |
| mov r1, r7 |
| mov r1, r8 |
| mov r1, r9 |
| |
| mov r1, r2 |
| mov r1, r3 |
| mov r1, r4 |
| mov r1, r5 |
| mov r1, r6 |
| mov r1, r7 |
| mov r1, r8 |
| mov r1, r9 |
| |
| mov r1, r2 |
| mov r1, r3 |
| mov r1, r4 |
| mov r1, r5 |
| mov r1, r6 |
| mov r1, r7 |
| mov r1, r8 |
| mov r1, r9 |
| |
| mov r1, r2 |
| mov r1, r3 |
| mov r1, r4 |
| mov r1, r5 |
| mov r1, r6 |
| mov r1, r7 |
| mov r1, r8 |
| mov r1, r9 |
| |
| mov r1, r2 |
| mov r1, r3 |
| mov r1, r4 |
| mov r1, r5 |
| mov r1, r6 |
| mov r1, r7 |
| mov r1, r8 |
| mov r1, r9 |
| |
| mov r1, r2 |
| mov r1, r3 |
| mov r1, r4 |
| mov r1, r5 |
| mov r1, r6 |
| mov r1, r7 |
| mov r1, r8 |
| mov r1, r9 |
| |
| sub r0, #1 |
| cmp r0, #0 |
| bne .L8 |
| |
| # return. |
| ldmfd sp!,{pc} |
| |
| #----------------------------------------------------------------------------- |
| # Name: StackReader |
| # Purpose: Performs stack-to-register transfers. |
| # Params: |
| # r0 = count |
| #----------------------------------------------------------------------------- |
| StackReader: |
| stmfd sp!,{lr} |
| |
| # r1 = temp |
| |
| sub sp, #32 |
| .L9: |
| # Does 64 transfers, 4 bytes each = 256 bytes total. |
| |
| ldr r1, [sp] |
| ldr r1, [sp, #4] |
| ldr r1, [sp, #8] |
| ldr r1, [sp, #12] |
| ldr r1, [sp, #16] |
| ldr r1, [sp, #20] |
| ldr r1, [sp, #24] |
| ldr r1, [sp, #28] |
| |
| ldr r1, [sp] |
| ldr r1, [sp, #4] |
| ldr r1, [sp, #8] |
| ldr r1, [sp, #12] |
| ldr r1, [sp, #16] |
| ldr r1, [sp, #20] |
| ldr r1, [sp, #24] |
| ldr r1, [sp, #28] |
| |
| ldr r1, [sp] |
| ldr r1, [sp, #4] |
| ldr r1, [sp, #8] |
| ldr r1, [sp, #12] |
| ldr r1, [sp, #16] |
| ldr r1, [sp, #20] |
| ldr r1, [sp, #24] |
| ldr r1, [sp, #28] |
| |
| ldr r1, [sp] |
| ldr r1, [sp, #4] |
| ldr r1, [sp, #8] |
| ldr r1, [sp, #12] |
| ldr r1, [sp, #16] |
| ldr r1, [sp, #20] |
| ldr r1, [sp, #24] |
| ldr r1, [sp, #28] |
| |
| ldr r1, [sp] |
| ldr r1, [sp, #4] |
| ldr r1, [sp, #8] |
| ldr r1, [sp, #12] |
| ldr r1, [sp, #16] |
| ldr r1, [sp, #20] |
| ldr r1, [sp, #24] |
| ldr r1, [sp, #28] |
| |
| ldr r1, [sp] |
| ldr r1, [sp, #4] |
| ldr r1, [sp, #8] |
| ldr r1, [sp, #12] |
| ldr r1, [sp, #16] |
| ldr r1, [sp, #20] |
| ldr r1, [sp, #24] |
| ldr r1, [sp, #28] |
| |
| ldr r1, [sp] |
| ldr r1, [sp, #4] |
| ldr r1, [sp, #8] |
| ldr r1, [sp, #12] |
| ldr r1, [sp, #16] |
| ldr r1, [sp, #20] |
| ldr r1, [sp, #24] |
| ldr r1, [sp, #28] |
| |
| ldr r1, [sp] |
| ldr r1, [sp, #4] |
| ldr r1, [sp, #8] |
| ldr r1, [sp, #12] |
| ldr r1, [sp, #16] |
| ldr r1, [sp, #20] |
| ldr r1, [sp, #24] |
| ldr r1, [sp, #28] |
| |
| sub r0, #1 |
| cmp r0, #0 |
| bne .L9 |
| |
| add sp, #32 |
| |
| # return. |
| ldmfd sp!,{pc} |
| |
| #----------------------------------------------------------------------------- |
| # Name: StackWriter |
| # Purpose: Performs register-to-stack transfers. |
| # Params: |
| # r0 = count |
| #----------------------------------------------------------------------------- |
| StackWriter: |
| stmfd sp!,{lr} |
| |
| # r1 = temp |
| |
| sub sp, #32 |
| .L10: |
| # Does 64 transfers, 4 bytes each = 256 bytes total. |
| |
| str r1, [sp] |
| str r1, [sp, #4] |
| str r1, [sp, #8] |
| str r1, [sp, #12] |
| str r1, [sp, #16] |
| str r1, [sp, #20] |
| str r1, [sp, #24] |
| str r1, [sp, #28] |
| |
| str r1, [sp] |
| str r1, [sp, #4] |
| str r1, [sp, #8] |
| str r1, [sp, #12] |
| str r1, [sp, #16] |
| str r1, [sp, #20] |
| str r1, [sp, #24] |
| str r1, [sp, #28] |
| |
| str r1, [sp] |
| str r1, [sp, #4] |
| str r1, [sp, #8] |
| str r1, [sp, #12] |
| str r1, [sp, #16] |
| str r1, [sp, #20] |
| str r1, [sp, #24] |
| str r1, [sp, #28] |
| |
| str r1, [sp] |
| str r1, [sp, #4] |
| str r1, [sp, #8] |
| str r1, [sp, #12] |
| str r1, [sp, #16] |
| str r1, [sp, #20] |
| str r1, [sp, #24] |
| str r1, [sp, #28] |
| |
| str r1, [sp] |
| str r1, [sp, #4] |
| str r1, [sp, #8] |
| str r1, [sp, #12] |
| str r1, [sp, #16] |
| str r1, [sp, #20] |
| str r1, [sp, #24] |
| str r1, [sp, #28] |
| |
| str r1, [sp] |
| str r1, [sp, #4] |
| str r1, [sp, #8] |
| str r1, [sp, #12] |
| str r1, [sp, #16] |
| str r1, [sp, #20] |
| str r1, [sp, #24] |
| str r1, [sp, #28] |
| |
| str r1, [sp] |
| str r1, [sp, #4] |
| str r1, [sp, #8] |
| str r1, [sp, #12] |
| str r1, [sp, #16] |
| str r1, [sp, #20] |
| str r1, [sp, #24] |
| str r1, [sp, #28] |
| |
| str r1, [sp] |
| str r1, [sp, #4] |
| str r1, [sp, #8] |
| str r1, [sp, #12] |
| str r1, [sp, #16] |
| str r1, [sp, #20] |
| str r1, [sp, #24] |
| str r1, [sp, #28] |
| |
| sub r0, #1 |
| cmp r0, #0 |
| bne .L10 |
| |
| add sp, #32 |
| |
| # return. |
| ldmfd sp!,{pc} |
| |