| /* |
| * vr5xxx.S -- CPU specific support routines |
| * |
| * Copyright (c) 1999 Cygnus Solutions |
| * |
| * The authors hereby grant permission to use, copy, modify, distribute, |
| * and license this software and its documentation for any purpose, provided |
| * that existing copyright notices are retained in all copies and that this |
| * notice is included verbatim in any distributions. No written agreement, |
| * license, or royalty fee is required for any of the authorized uses. |
| * Modifications to this software may be copyrighted by their authors |
| * and need not follow the licensing terms described here, provided that |
| * the new terms are clearly indicated on the first page of each file where |
| * they apply. |
| */ |
| |
| /* This file cloned from vr4300.S by dlindsay@cygnus.com |
| * and recoded to suit Vr5432 and Vr5000. |
| * Should be no worse for Vr43{00,05,10}. |
| * Specifically, __cpu_flush() has been changed (a) to allow for the hardware |
| * difference (in set associativity) between the Vr5432 and Vr5000, |
| * and (b) to flush the optional secondary cache of the Vr5000. |
| */ |
| |
| /* Processor Revision Identifier (PRID) Register: Implementation Numbers */ |
| #define IMPL_VR5432 0x54 |
| |
| /* Cache Constants not determinable dynamically */ |
| #define VR5000_2NDLINE 32 /* secondary cache line size */ |
| #define VR5432_LINE 32 /* I,Dcache line sizes */ |
| #define VR5432_SIZE (16*1024) /* I,Dcache half-size */ |
| |
| |
| #ifndef __mips64 |
| .set mips3 |
| #endif |
| #ifdef __mips16 |
| /* This file contains 32 bit assembly code. */ |
| .set nomips16 |
| #endif |
| |
| #include "regs.S" |
| |
| .text |
| .align 2 |
| |
| # Taken from "R4300 Preliminary RISC Processor Specification |
| # Revision 2.0 January 1995" page 39: "The Count |
| # register... increments at a constant rate... at one-half the |
| # PClock speed." |
| # We can use this fact to provide small polled delays. |
| .globl __cpu_timer_poll |
| .ent __cpu_timer_poll |
| __cpu_timer_poll: |
| .set noreorder |
| # in: a0 = (unsigned int) number of PClock ticks to wait for |
| # out: void |
| |
| # The Vr4300 counter updates at half PClock, so divide by 2 to |
| # get counter delta: |
| bnezl a0, 1f # continue if delta non-zero |
| srl a0, a0, 1 # divide ticks by 2 {DELAY SLOT} |
| # perform a quick return to the caller: |
| j ra |
| nop # {DELAY SLOT} |
| 1: |
| mfc0 v0, C0_COUNT # get current counter value |
| nop |
| nop |
| # We cannot just do the simple test, of adding our delta onto |
| # the current value (ignoring overflow) and then checking for |
| # equality. The counter is incrementing every two PClocks, |
| # which means the counter value can change between |
| # instructions, making it hard to sample at the exact value |
| # desired. |
| |
| # However, we do know that our entry delta value is less than |
| # half the number space (since we divide by 2 on entry). This |
| # means we can use a difference in signs to indicate timer |
| # overflow. |
| addu a0, v0, a0 # unsigned add (ignore overflow) |
| # We know have our end value (which will have been |
| # sign-extended to fill the 64bit register value). |
| 2: |
| # get current counter value: |
| mfc0 v0, C0_COUNT |
| nop |
| nop |
| # This is an unsigned 32bit subtraction: |
| subu v0, a0, v0 # delta = (end - now) {DELAY SLOT} |
| bgtzl v0, 2b # looping back is most likely |
| nop |
| # We have now been delayed (in the foreground) for AT LEAST |
| # the required number of counter ticks. |
| j ra # return to caller |
| nop # {DELAY SLOT} |
| .set reorder |
| .end __cpu_timer_poll |
| |
| # Flush the processor caches to memory: |
| |
| .globl __cpu_flush |
| .ent __cpu_flush |
| __cpu_flush: |
| .set noreorder |
| # NOTE: The Vr4300 and Vr5432 *CANNOT* have any secondary cache. |
| # On those, SC (bit 17 of CONFIG register) is hard-wired to 1, |
| # except that email from Dennis_Han@el.nec.com says that old |
| # versions of the Vr5432 incorrectly hard-wired this bit to 0. |
| # The Vr5000 has an optional direct-mapped secondary cache, |
| # and the SC bit correctly indicates this. |
| |
| # So, for the 4300 and 5432 we want to just |
| # flush the primary Data and Instruction caches. |
| # For the 5000 it is desired to flush the secondary cache too. |
| # There is an operation difference worth noting. |
| # The 4300 and 5000 primary caches use VA bit 14 to choose cache set, |
| # whereas 5432 primary caches use VA bit 0. |
| |
| # This code interprets the relevant Config register bits as |
| # much as possible, except for the 5432. |
| # The code therefore has some portability. |
| # However, the associativity issues mean you should not just assume |
| # that this code works anywhere. Also, the secondary cache set |
| # size is hardwired, since the 5000 series does not define codes |
| # for variant sizes. |
| |
| # Note: this version of the code flushes D$ before I$. |
| # It is difficult to construct a case where that matters, |
| # but it cant hurt. |
| |
| mfc0 a0, C0_PRID # a0 = Processor Revision register |
| nop # dlindsay: unclear why the nops, but |
| nop # vr4300.S had such so I do too. |
| srl a2, a0, PR_IMP # want bits 8..15 |
| andi a2, a2, 0x255 # mask: now a2 = Implementation # field |
| li a1, IMPL_VR5432 |
| beq a1, a2, 8f # use Vr5432-specific flush algorithm |
| nop |
| |
| # Non-Vr5432 version of the code. |
| # (The distinctions being: CONFIG is truthful about secondary cache, |
| # and we act as if the primary Icache and Dcache are direct mapped.) |
| |
| mfc0 t0, C0_CONFIG # t0 = CONFIG register |
| nop |
| nop |
| li a1, 1 # a1=1, a useful constant |
| |
| srl a2, t0, CR_IC # want IC field of CONFIG |
| andi a2, a2, 0x7 # mask: now a2= code for Icache size |
| add a2, a2, 12 # +12 |
| sllv a2, a1, a2 # a2=primary instruction cache size in bytes |
| |
| srl a3, t0, CR_DC # DC field of CONFIG |
| andi a3, a3, 0x7 # mask: now a3= code for Dcache size |
| add a3, a3, 12 # +12 |
| sllv a3, a1, a3 # a3=primary data cache size in bytes |
| |
| li t2, (1 << CR_IB) # t2=mask over IB boolean |
| and t2, t2, t0 # test IB field of CONFIG register value |
| beqz t2, 1f # |
| li a1, 16 # 16 bytes (branch shadow: always loaded.) |
| li a1, 32 # non-zero, then 32bytes |
| 1: |
| |
| li t2, (1 << CR_DB) # t2=mask over DB boolean |
| and t2, t2, t0 # test BD field of CONFIG register value |
| beqz t2, 2f # |
| li a0, 16 # 16bytes (branch shadow: always loaded.) |
| li a0, 32 # non-zero, then 32bytes |
| 2: |
| lui t1, ((K0BASE >> 16) & 0xFFFF) |
| ori t1, t1, (K0BASE & 0xFFFF) |
| |
| # At this point, |
| # a0 = primary Dcache line size in bytes |
| # a1 = primary Icache line size in bytes |
| # a2 = primary Icache size in bytes |
| # a3 = primary Dcache size in bytes |
| # t0 = CONFIG value |
| # t1 = a round unmapped cached base address (we are in kernel mode) |
| # t2,t3 scratch |
| |
| addi t3, t1, 0 # t3=t1=start address for any cache |
| add t2, t3, a3 # t2=end adress+1 of Dcache |
| sub t2, t2, a0 # t2=address of last line in Dcache |
| 3: |
| cache INDEX_WRITEBACK_INVALIDATE_D,0(t3) |
| bne t3, t2, 3b # |
| addu t3, a0 # (delay slot) increment by Dcache line size |
| |
| |
| # Now check CONFIG to see if there is a secondary cache |
| lui t2, (1 << (CR_SC-16)) # t2=mask over SC boolean |
| and t2, t2, t0 # test SC in CONFIG |
| bnez t2, 6f |
| |
| # There is a secondary cache. Find out its sizes. |
| |
| srl t3, t0, CR_SS # want SS field of CONFIG |
| andi t3, t3, 0x3 # mask: now t3= code for cache size. |
| beqz t3, 4f |
| lui a3, ((512*1024)>>16) # a3= 512K, code was 0 |
| addu t3, -1 # decrement code |
| beqz t3, 4f |
| lui a3, ((1024*1024)>>16) # a3= 1 M, code 1 |
| addu t3, -1 # decrement code |
| beqz t3, 4f |
| lui a3, ((2*1024*1024)>>16) # a3= 2 M, code 2 |
| j 6f # no secondary cache, code 3 |
| |
| 4: # a3 = secondary cache size in bytes |
| li a0, VR5000_2NDLINE # no codes assigned for other than 32 |
| |
| # At this point, |
| # a0 = secondary cache line size in bytes |
| # a1 = primary Icache line size in bytes |
| # a2 = primary Icache size in bytes |
| # a3 = secondary cache size in bytes |
| # t1 = a round unmapped cached base address (we are in kernel mode) |
| # t2,t3 scratch |
| |
| addi t3, t1, 0 # t3=t1=start address for any cache |
| add t2, t3, a3 # t2=end address+1 of secondary cache |
| sub t2, t2, a0 # t2=address of last line in secondary cache |
| 5: |
| cache INDEX_WRITEBACK_INVALIDATE_SD,0(t3) |
| bne t3, t2, 5b |
| addu t3, a0 # (delay slot) increment by line size |
| |
| |
| 6: # Any optional secondary cache done. Now do I-cache and return. |
| |
| # At this point, |
| # a1 = primary Icache line size in bytes |
| # a2 = primary Icache size in bytes |
| # t1 = a round unmapped cached base address (we are in kernel mode) |
| # t2,t3 scratch |
| |
| add t2, t1, a2 # t2=end adress+1 of Icache |
| sub t2, t2, a1 # t2=address of last line in Icache |
| 7: |
| cache INDEX_INVALIDATE_I,0(t1) |
| bne t1, t2, 7b |
| addu t1, a1 # (delay slot) increment by Icache line size |
| |
| j ra # return to the caller |
| nop |
| |
| 8: |
| |
| # Vr5432 version of the cpu_flush code. |
| # (The distinctions being: CONFIG can not be trusted about secondary |
| # cache (which does not exist). The primary caches use Virtual Address Bit 0 |
| # to control set selection. |
| |
| # Code does not consult CONFIG about cache sizes: knows the hardwired sizes. |
| # Since both I and D have the same size and line size, uses a merged loop. |
| |
| li a0, VR5432_LINE |
| li a1, VR5432_SIZE |
| lui t1, ((K0BASE >> 16) & 0xFFFF) |
| ori t1, t1, (K0BASE & 0xFFFF) |
| |
| # a0 = cache line size in bytes |
| # a1 = 1/2 cache size in bytes |
| # t1 = a round unmapped cached base address (we are in kernel mode) |
| |
| add t2, t1, a1 # t2=end address+1 |
| sub t2, t2, a0 # t2=address of last line in Icache |
| |
| 9: |
| cache INDEX_WRITEBACK_INVALIDATE_D,0(t1) # set 0 |
| cache INDEX_WRITEBACK_INVALIDATE_D,1(t1) # set 1 |
| cache INDEX_INVALIDATE_I,0(t1) # set 0 |
| cache INDEX_INVALIDATE_I,1(t1) # set 1 |
| bne t1, t2, 9b |
| addu t1, a0 |
| |
| j ra # return to the caller |
| nop |
| .set reorder |
| .end __cpu_flush |
| |
| # NOTE: This variable should *NOT* be addressed relative to |
| # the $gp register since this code is executed before $gp is |
| # initialised... hence we leave it in the text area. This will |
| # cause problems if this routine is ever ROMmed: |
| |
| .globl __buserr_cnt |
| __buserr_cnt: |
| .word 0 |
| .align 3 |
| __k1_save: |
| .word 0 |
| .word 0 |
| .align 2 |
| |
| .ent __buserr |
| .globl __buserr |
| __buserr: |
| .set noat |
| .set noreorder |
| # k0 and k1 available for use: |
| mfc0 k0,C0_CAUSE |
| nop |
| nop |
| andi k0,k0,0x7c |
| sub k0,k0,7 << 2 |
| beq k0,$0,__buserr_do |
| nop |
| # call the previous handler |
| la k0,__previous |
| jr k0 |
| nop |
| # |
| __buserr_do: |
| # TODO: check that the cause is indeed a bus error |
| # - if not then just jump to the previous handler |
| la k0,__k1_save |
| sd k1,0(k0) |
| # |
| la k1,__buserr_cnt |
| lw k0,0(k1) # increment counter |
| addu k0,1 |
| sw k0,0(k1) |
| # |
| la k0,__k1_save |
| ld k1,0(k0) |
| # |
| mfc0 k0,C0_EPC |
| nop |
| nop |
| addu k0,k0,4 # skip offending instruction |
| mtc0 k0,C0_EPC # update EPC |
| nop |
| nop |
| eret |
| # j k0 |
| # rfe |
| .set reorder |
| .set at |
| .end __buserr |
| |
| __exception_code: |
| .set noreorder |
| lui k0,%hi(__buserr) |
| daddiu k0,k0,%lo(__buserr) |
| jr k0 |
| nop |
| .set reorder |
| __exception_code_end: |
| |
| .data |
| __previous: |
| .space (__exception_code_end - __exception_code) |
| # This subtracting two addresses is working |
| # but is not garenteed to continue working. |
| # The assemble reserves the right to put these |
| # two labels into different frags, and then |
| # cant take their difference. |
| |
| .text |
| |
| .ent __default_buserr_handler |
| .globl __default_buserr_handler |
| __default_buserr_handler: |
| .set noreorder |
| # attach our simple bus error handler: |
| # in: void |
| # out: void |
| mfc0 a0,C0_SR |
| nop |
| li a1,SR_BEV |
| and a1,a1,a0 |
| beq a1,$0,baseaddr |
| lui a0,0x8000 # delay slot |
| lui a0,0xbfc0 |
| daddiu a0,a0,0x0200 |
| baseaddr: |
| daddiu a0,a0,0x0180 |
| # a0 = base vector table address |
| la a1,__exception_code_end |
| la a2,__exception_code |
| subu a1,a1,a2 |
| la a3,__previous |
| # there must be a better way of doing this???? |
| copyloop: |
| lw v0,0(a0) |
| sw v0,0(a3) |
| lw v0,0(a2) |
| sw v0,0(a0) |
| daddiu a0,a0,4 |
| daddiu a2,a2,4 |
| daddiu a3,a3,4 |
| subu a1,a1,4 |
| bne a1,$0,copyloop |
| nop |
| la a0,__buserr_cnt |
| sw $0,0(a0) |
| j ra |
| nop |
| .set reorder |
| .end __default_buserr_handler |
| |
| .ent __restore_buserr_handler |
| .globl __restore_buserr_handler |
| __restore_buserr_handler: |
| .set noreorder |
| # restore original (monitor) bus error handler |
| # in: void |
| # out: void |
| mfc0 a0,C0_SR |
| nop |
| li a1,SR_BEV |
| and a1,a1,a0 |
| beq a1,$0,res_baseaddr |
| lui a0,0x8000 # delay slot |
| lui a0,0xbfc0 |
| daddiu a0,a0,0x0200 |
| res_baseaddr: |
| daddiu a0,a0,0x0180 |
| # a0 = base vector table address |
| la a1,__exception_code_end |
| la a3,__exception_code |
| subu a1,a1,a3 |
| la a3,__previous |
| # there must be a better way of doing this???? |
| res_copyloop: |
| lw v0,0(a3) |
| sw v0,0(a0) |
| daddiu a0,a0,4 |
| daddiu a3,a3,4 |
| subu a1,a1,4 |
| bne a1,$0,res_copyloop |
| nop |
| j ra |
| nop |
| .set reorder |
| .end __restore_buserr_handler |
| |
| .ent __buserr_count |
| .globl __buserr_count |
| __buserr_count: |
| .set noreorder |
| # restore original (monitor) bus error handler |
| # in: void |
| # out: unsigned int __buserr_cnt |
| la v0,__buserr_cnt |
| lw v0,0(v0) |
| j ra |
| nop |
| .set reorder |
| .end __buserr_count |
| |
| /* EOF vr5xxx.S */ |