| /* This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| |
| #include "mpi.h" |
| #include "prtypes.h" |
| |
| /* |
| * This file implements a single function: s_mpi_getProcessorLineSize(); |
| * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line |
| * if a cache exists, or zero if there is no cache. If more than one |
| * cache line exists, it should return the smallest line size (which is |
| * usually the L1 cache). |
| * |
| * mp_modexp uses this information to make sure that private key information |
| * isn't being leaked through the cache. |
| * |
| * Currently the file returns good data for most modern x86 processors, and |
| * reasonable data on 64-bit ppc processors. All other processors are assumed |
| * to have a cache line size of 32 bytes. |
| * |
| */ |
| |
| #if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
| /* X86 processors have special instructions that tell us about the cache */ |
| #include "string.h" |
| |
| #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) |
| #define AMD_64 1 |
| #endif |
| |
| /* Generic CPUID function */ |
| #if defined(AMD_64) |
| |
| #if defined(__GNUC__) |
| |
| void |
| freebl_cpuid(unsigned long op, unsigned long *eax, |
| unsigned long *ebx, unsigned long *ecx, |
| unsigned long *edx) |
| { |
| __asm__("xor %%ecx, %%ecx\n\t" |
| "cpuid\n\t" |
| : "=a"(*eax), |
| "=b"(*ebx), |
| "=c"(*ecx), |
| "=d"(*edx) |
| : "0"(op)); |
| } |
| |
| #elif defined(_MSC_VER) |
| |
| #include <intrin.h> |
| |
| void |
| freebl_cpuid(unsigned long op, unsigned long *eax, |
| unsigned long *ebx, unsigned long *ecx, |
| unsigned long *edx) |
| { |
| int intrinsic_out[4]; |
| |
| __cpuid(intrinsic_out, op); |
| *eax = intrinsic_out[0]; |
| *ebx = intrinsic_out[1]; |
| *ecx = intrinsic_out[2]; |
| *edx = intrinsic_out[3]; |
| } |
| |
| #endif |
| |
| #else /* !defined(AMD_64) */ |
| |
| /* x86 */ |
| |
| #if defined(__GNUC__) |
| void |
| freebl_cpuid(unsigned long op, unsigned long *eax, |
| unsigned long *ebx, unsigned long *ecx, |
| unsigned long *edx) |
| { |
| /* Some older processors don't fill the ecx register with cpuid, so clobber it |
| * before calling cpuid, so that there's no risk of picking random bits that |
| * erroneously indicate that absent CPU features are present. |
| * Also, GCC isn't smart enough to save the ebx PIC register on its own |
| * in this case, so do it by hand. Use edi to store ebx and pass the |
| * value returned in ebx from cpuid through edi. */ |
| __asm__("xor %%ecx, %%ecx\n\t" |
| "mov %%ebx,%%edi\n\t" |
| "cpuid\n\t" |
| "xchgl %%ebx,%%edi\n\t" |
| : "=a"(*eax), |
| "=D"(*ebx), |
| "=c"(*ecx), |
| "=d"(*edx) |
| : "0"(op)); |
| } |
| |
| /* |
| * try flipping a processor flag to determine CPU type |
| */ |
| static unsigned long |
| changeFlag(unsigned long flag) |
| { |
| unsigned long changedFlags, originalFlags; |
| __asm__("pushfl\n\t" /* get the flags */ |
| "popl %0\n\t" |
| "movl %0,%1\n\t" /* save the original flags */ |
| "xorl %2,%0\n\t" /* flip the bit */ |
| "pushl %0\n\t" /* set the flags */ |
| "popfl\n\t" |
| "pushfl\n\t" /* get the flags again (for return) */ |
| "popl %0\n\t" |
| "pushl %1\n\t" /* restore the original flags */ |
| "popfl\n\t" |
| : "=r"(changedFlags), |
| "=r"(originalFlags), |
| "=r"(flag) |
| : "2"(flag)); |
| return changedFlags ^ originalFlags; |
| } |
| |
| #elif defined(_MSC_VER) |
| |
| /* |
| * windows versions of the above assembler |
| */ |
| #define wcpuid __asm __emit 0fh __asm __emit 0a2h |
| void |
| freebl_cpuid(unsigned long op, unsigned long *Reax, |
| unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) |
| { |
| unsigned long Leax, Lebx, Lecx, Ledx; |
| __asm { |
| pushad |
| xor ecx,ecx |
| mov eax,op |
| wcpuid |
| mov Leax,eax |
| mov Lebx,ebx |
| mov Lecx,ecx |
| mov Ledx,edx |
| popad |
| } |
| *Reax = Leax; |
| *Rebx = Lebx; |
| *Recx = Lecx; |
| *Redx = Ledx; |
| } |
| |
| static unsigned long |
| changeFlag(unsigned long flag) |
| { |
| unsigned long changedFlags, originalFlags; |
| __asm { |
| push eax |
| push ebx |
| pushfd /* get the flags */ |
| pop eax |
| push eax /* save the flags on the stack */ |
| mov originalFlags,eax /* save the original flags */ |
| mov ebx,flag |
| xor eax,ebx /* flip the bit */ |
| push eax /* set the flags */ |
| popfd |
| pushfd /* get the flags again (for return) */ |
| pop eax |
| popfd /* restore the original flags */ |
| mov changedFlags,eax |
| pop ebx |
| pop eax |
| } |
| return changedFlags ^ originalFlags; |
| } |
| #endif |
| |
| #endif |
| |
| #if !defined(AMD_64) |
| #define AC_FLAG 0x40000 |
| #define ID_FLAG 0x200000 |
| |
| /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ |
| static int |
| is386() |
| { |
| return changeFlag(AC_FLAG) == 0; |
| } |
| |
| /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ |
| static int |
| is486() |
| { |
| return changeFlag(ID_FLAG) == 0; |
| } |
| #endif |
| |
| /* |
| * table for Intel Cache. |
| * See Intel Application Note AP-485 for more information |
| */ |
| |
| typedef unsigned char CacheTypeEntry; |
| |
| typedef enum { |
| Cache_NONE = 0, |
| Cache_UNKNOWN = 1, |
| Cache_TLB = 2, |
| Cache_TLBi = 3, |
| Cache_TLBd = 4, |
| Cache_Trace = 5, |
| Cache_L1 = 6, |
| Cache_L1i = 7, |
| Cache_L1d = 8, |
| Cache_L2 = 9, |
| Cache_L2i = 10, |
| Cache_L2d = 11, |
| Cache_L3 = 12, |
| Cache_L3i = 13, |
| Cache_L3d = 14 |
| } CacheType; |
| |
| struct _cache { |
| CacheTypeEntry type; |
| unsigned char lineSize; |
| }; |
| static const struct _cache CacheMap[256] = { |
| /* 00 */ { Cache_NONE, 0 }, |
| /* 01 */ { Cache_TLBi, 0 }, |
| /* 02 */ { Cache_TLBi, 0 }, |
| /* 03 */ { Cache_TLBd, 0 }, |
| /* 04 */ { |
| Cache_TLBd, |
| }, |
| /* 05 */ { Cache_UNKNOWN, 0 }, |
| /* 06 */ { Cache_L1i, 32 }, |
| /* 07 */ { Cache_UNKNOWN, 0 }, |
| /* 08 */ { Cache_L1i, 32 }, |
| /* 09 */ { Cache_UNKNOWN, 0 }, |
| /* 0a */ { Cache_L1d, 32 }, |
| /* 0b */ { Cache_UNKNOWN, 0 }, |
| /* 0c */ { Cache_L1d, 32 }, |
| /* 0d */ { Cache_UNKNOWN, 0 }, |
| /* 0e */ { Cache_UNKNOWN, 0 }, |
| /* 0f */ { Cache_UNKNOWN, 0 }, |
| /* 10 */ { Cache_UNKNOWN, 0 }, |
| /* 11 */ { Cache_UNKNOWN, 0 }, |
| /* 12 */ { Cache_UNKNOWN, 0 }, |
| /* 13 */ { Cache_UNKNOWN, 0 }, |
| /* 14 */ { Cache_UNKNOWN, 0 }, |
| /* 15 */ { Cache_UNKNOWN, 0 }, |
| /* 16 */ { Cache_UNKNOWN, 0 }, |
| /* 17 */ { Cache_UNKNOWN, 0 }, |
| /* 18 */ { Cache_UNKNOWN, 0 }, |
| /* 19 */ { Cache_UNKNOWN, 0 }, |
| /* 1a */ { Cache_UNKNOWN, 0 }, |
| /* 1b */ { Cache_UNKNOWN, 0 }, |
| /* 1c */ { Cache_UNKNOWN, 0 }, |
| /* 1d */ { Cache_UNKNOWN, 0 }, |
| /* 1e */ { Cache_UNKNOWN, 0 }, |
| /* 1f */ { Cache_UNKNOWN, 0 }, |
| /* 20 */ { Cache_UNKNOWN, 0 }, |
| /* 21 */ { Cache_UNKNOWN, 0 }, |
| /* 22 */ { Cache_L3, 64 }, |
| /* 23 */ { Cache_L3, 64 }, |
| /* 24 */ { Cache_UNKNOWN, 0 }, |
| /* 25 */ { Cache_L3, 64 }, |
| /* 26 */ { Cache_UNKNOWN, 0 }, |
| /* 27 */ { Cache_UNKNOWN, 0 }, |
| /* 28 */ { Cache_UNKNOWN, 0 }, |
| /* 29 */ { Cache_L3, 64 }, |
| /* 2a */ { Cache_UNKNOWN, 0 }, |
| /* 2b */ { Cache_UNKNOWN, 0 }, |
| /* 2c */ { Cache_L1d, 64 }, |
| /* 2d */ { Cache_UNKNOWN, 0 }, |
| /* 2e */ { Cache_UNKNOWN, 0 }, |
| /* 2f */ { Cache_UNKNOWN, 0 }, |
| /* 30 */ { Cache_L1i, 64 }, |
| /* 31 */ { Cache_UNKNOWN, 0 }, |
| /* 32 */ { Cache_UNKNOWN, 0 }, |
| /* 33 */ { Cache_UNKNOWN, 0 }, |
| /* 34 */ { Cache_UNKNOWN, 0 }, |
| /* 35 */ { Cache_UNKNOWN, 0 }, |
| /* 36 */ { Cache_UNKNOWN, 0 }, |
| /* 37 */ { Cache_UNKNOWN, 0 }, |
| /* 38 */ { Cache_UNKNOWN, 0 }, |
| /* 39 */ { Cache_L2, 64 }, |
| /* 3a */ { Cache_UNKNOWN, 0 }, |
| /* 3b */ { Cache_L2, 64 }, |
| /* 3c */ { Cache_L2, 64 }, |
| /* 3d */ { Cache_UNKNOWN, 0 }, |
| /* 3e */ { Cache_UNKNOWN, 0 }, |
| /* 3f */ { Cache_UNKNOWN, 0 }, |
| /* 40 */ { Cache_L2, 0 }, |
| /* 41 */ { Cache_L2, 32 }, |
| /* 42 */ { Cache_L2, 32 }, |
| /* 43 */ { Cache_L2, 32 }, |
| /* 44 */ { Cache_L2, 32 }, |
| /* 45 */ { Cache_L2, 32 }, |
| /* 46 */ { Cache_UNKNOWN, 0 }, |
| /* 47 */ { Cache_UNKNOWN, 0 }, |
| /* 48 */ { Cache_UNKNOWN, 0 }, |
| /* 49 */ { Cache_UNKNOWN, 0 }, |
| /* 4a */ { Cache_UNKNOWN, 0 }, |
| /* 4b */ { Cache_UNKNOWN, 0 }, |
| /* 4c */ { Cache_UNKNOWN, 0 }, |
| /* 4d */ { Cache_UNKNOWN, 0 }, |
| /* 4e */ { Cache_UNKNOWN, 0 }, |
| /* 4f */ { Cache_UNKNOWN, 0 }, |
| /* 50 */ { Cache_TLBi, 0 }, |
| /* 51 */ { Cache_TLBi, 0 }, |
| /* 52 */ { Cache_TLBi, 0 }, |
| /* 53 */ { Cache_UNKNOWN, 0 }, |
| /* 54 */ { Cache_UNKNOWN, 0 }, |
| /* 55 */ { Cache_UNKNOWN, 0 }, |
| /* 56 */ { Cache_UNKNOWN, 0 }, |
| /* 57 */ { Cache_UNKNOWN, 0 }, |
| /* 58 */ { Cache_UNKNOWN, 0 }, |
| /* 59 */ { Cache_UNKNOWN, 0 }, |
| /* 5a */ { Cache_UNKNOWN, 0 }, |
| /* 5b */ { Cache_TLBd, 0 }, |
| /* 5c */ { Cache_TLBd, 0 }, |
| /* 5d */ { Cache_TLBd, 0 }, |
| /* 5e */ { Cache_UNKNOWN, 0 }, |
| /* 5f */ { Cache_UNKNOWN, 0 }, |
| /* 60 */ { Cache_UNKNOWN, 0 }, |
| /* 61 */ { Cache_UNKNOWN, 0 }, |
| /* 62 */ { Cache_UNKNOWN, 0 }, |
| /* 63 */ { Cache_UNKNOWN, 0 }, |
| /* 64 */ { Cache_UNKNOWN, 0 }, |
| /* 65 */ { Cache_UNKNOWN, 0 }, |
| /* 66 */ { Cache_L1d, 64 }, |
| /* 67 */ { Cache_L1d, 64 }, |
| /* 68 */ { Cache_L1d, 64 }, |
| /* 69 */ { Cache_UNKNOWN, 0 }, |
| /* 6a */ { Cache_UNKNOWN, 0 }, |
| /* 6b */ { Cache_UNKNOWN, 0 }, |
| /* 6c */ { Cache_UNKNOWN, 0 }, |
| /* 6d */ { Cache_UNKNOWN, 0 }, |
| /* 6e */ { Cache_UNKNOWN, 0 }, |
| /* 6f */ { Cache_UNKNOWN, 0 }, |
| /* 70 */ { Cache_Trace, 1 }, |
| /* 71 */ { Cache_Trace, 1 }, |
| /* 72 */ { Cache_Trace, 1 }, |
| /* 73 */ { Cache_UNKNOWN, 0 }, |
| /* 74 */ { Cache_UNKNOWN, 0 }, |
| /* 75 */ { Cache_UNKNOWN, 0 }, |
| /* 76 */ { Cache_UNKNOWN, 0 }, |
| /* 77 */ { Cache_UNKNOWN, 0 }, |
| /* 78 */ { Cache_UNKNOWN, 0 }, |
| /* 79 */ { Cache_L2, 64 }, |
| /* 7a */ { Cache_L2, 64 }, |
| /* 7b */ { Cache_L2, 64 }, |
| /* 7c */ { Cache_L2, 64 }, |
| /* 7d */ { Cache_UNKNOWN, 0 }, |
| /* 7e */ { Cache_UNKNOWN, 0 }, |
| /* 7f */ { Cache_UNKNOWN, 0 }, |
| /* 80 */ { Cache_UNKNOWN, 0 }, |
| /* 81 */ { Cache_UNKNOWN, 0 }, |
| /* 82 */ { Cache_L2, 32 }, |
| /* 83 */ { Cache_L2, 32 }, |
| /* 84 */ { Cache_L2, 32 }, |
| /* 85 */ { Cache_L2, 32 }, |
| /* 86 */ { Cache_L2, 64 }, |
| /* 87 */ { Cache_L2, 64 }, |
| /* 88 */ { Cache_UNKNOWN, 0 }, |
| /* 89 */ { Cache_UNKNOWN, 0 }, |
| /* 8a */ { Cache_UNKNOWN, 0 }, |
| /* 8b */ { Cache_UNKNOWN, 0 }, |
| /* 8c */ { Cache_UNKNOWN, 0 }, |
| /* 8d */ { Cache_UNKNOWN, 0 }, |
| /* 8e */ { Cache_UNKNOWN, 0 }, |
| /* 8f */ { Cache_UNKNOWN, 0 }, |
| /* 90 */ { Cache_UNKNOWN, 0 }, |
| /* 91 */ { Cache_UNKNOWN, 0 }, |
| /* 92 */ { Cache_UNKNOWN, 0 }, |
| /* 93 */ { Cache_UNKNOWN, 0 }, |
| /* 94 */ { Cache_UNKNOWN, 0 }, |
| /* 95 */ { Cache_UNKNOWN, 0 }, |
| /* 96 */ { Cache_UNKNOWN, 0 }, |
| /* 97 */ { Cache_UNKNOWN, 0 }, |
| /* 98 */ { Cache_UNKNOWN, 0 }, |
| /* 99 */ { Cache_UNKNOWN, 0 }, |
| /* 9a */ { Cache_UNKNOWN, 0 }, |
| /* 9b */ { Cache_UNKNOWN, 0 }, |
| /* 9c */ { Cache_UNKNOWN, 0 }, |
| /* 9d */ { Cache_UNKNOWN, 0 }, |
| /* 9e */ { Cache_UNKNOWN, 0 }, |
| /* 9f */ { Cache_UNKNOWN, 0 }, |
| /* a0 */ { Cache_UNKNOWN, 0 }, |
| /* a1 */ { Cache_UNKNOWN, 0 }, |
| /* a2 */ { Cache_UNKNOWN, 0 }, |
| /* a3 */ { Cache_UNKNOWN, 0 }, |
| /* a4 */ { Cache_UNKNOWN, 0 }, |
| /* a5 */ { Cache_UNKNOWN, 0 }, |
| /* a6 */ { Cache_UNKNOWN, 0 }, |
| /* a7 */ { Cache_UNKNOWN, 0 }, |
| /* a8 */ { Cache_UNKNOWN, 0 }, |
| /* a9 */ { Cache_UNKNOWN, 0 }, |
| /* aa */ { Cache_UNKNOWN, 0 }, |
| /* ab */ { Cache_UNKNOWN, 0 }, |
| /* ac */ { Cache_UNKNOWN, 0 }, |
| /* ad */ { Cache_UNKNOWN, 0 }, |
| /* ae */ { Cache_UNKNOWN, 0 }, |
| /* af */ { Cache_UNKNOWN, 0 }, |
| /* b0 */ { Cache_TLBi, 0 }, |
| /* b1 */ { Cache_UNKNOWN, 0 }, |
| /* b2 */ { Cache_UNKNOWN, 0 }, |
| /* b3 */ { Cache_TLBd, 0 }, |
| /* b4 */ { Cache_UNKNOWN, 0 }, |
| /* b5 */ { Cache_UNKNOWN, 0 }, |
| /* b6 */ { Cache_UNKNOWN, 0 }, |
| /* b7 */ { Cache_UNKNOWN, 0 }, |
| /* b8 */ { Cache_UNKNOWN, 0 }, |
| /* b9 */ { Cache_UNKNOWN, 0 }, |
| /* ba */ { Cache_UNKNOWN, 0 }, |
| /* bb */ { Cache_UNKNOWN, 0 }, |
| /* bc */ { Cache_UNKNOWN, 0 }, |
| /* bd */ { Cache_UNKNOWN, 0 }, |
| /* be */ { Cache_UNKNOWN, 0 }, |
| /* bf */ { Cache_UNKNOWN, 0 }, |
| /* c0 */ { Cache_UNKNOWN, 0 }, |
| /* c1 */ { Cache_UNKNOWN, 0 }, |
| /* c2 */ { Cache_UNKNOWN, 0 }, |
| /* c3 */ { Cache_UNKNOWN, 0 }, |
| /* c4 */ { Cache_UNKNOWN, 0 }, |
| /* c5 */ { Cache_UNKNOWN, 0 }, |
| /* c6 */ { Cache_UNKNOWN, 0 }, |
| /* c7 */ { Cache_UNKNOWN, 0 }, |
| /* c8 */ { Cache_UNKNOWN, 0 }, |
| /* c9 */ { Cache_UNKNOWN, 0 }, |
| /* ca */ { Cache_UNKNOWN, 0 }, |
| /* cb */ { Cache_UNKNOWN, 0 }, |
| /* cc */ { Cache_UNKNOWN, 0 }, |
| /* cd */ { Cache_UNKNOWN, 0 }, |
| /* ce */ { Cache_UNKNOWN, 0 }, |
| /* cf */ { Cache_UNKNOWN, 0 }, |
| /* d0 */ { Cache_UNKNOWN, 0 }, |
| /* d1 */ { Cache_UNKNOWN, 0 }, |
| /* d2 */ { Cache_UNKNOWN, 0 }, |
| /* d3 */ { Cache_UNKNOWN, 0 }, |
| /* d4 */ { Cache_UNKNOWN, 0 }, |
| /* d5 */ { Cache_UNKNOWN, 0 }, |
| /* d6 */ { Cache_UNKNOWN, 0 }, |
| /* d7 */ { Cache_UNKNOWN, 0 }, |
| /* d8 */ { Cache_UNKNOWN, 0 }, |
| /* d9 */ { Cache_UNKNOWN, 0 }, |
| /* da */ { Cache_UNKNOWN, 0 }, |
| /* db */ { Cache_UNKNOWN, 0 }, |
| /* dc */ { Cache_UNKNOWN, 0 }, |
| /* dd */ { Cache_UNKNOWN, 0 }, |
| /* de */ { Cache_UNKNOWN, 0 }, |
| /* df */ { Cache_UNKNOWN, 0 }, |
| /* e0 */ { Cache_UNKNOWN, 0 }, |
| /* e1 */ { Cache_UNKNOWN, 0 }, |
| /* e2 */ { Cache_UNKNOWN, 0 }, |
| /* e3 */ { Cache_UNKNOWN, 0 }, |
| /* e4 */ { Cache_UNKNOWN, 0 }, |
| /* e5 */ { Cache_UNKNOWN, 0 }, |
| /* e6 */ { Cache_UNKNOWN, 0 }, |
| /* e7 */ { Cache_UNKNOWN, 0 }, |
| /* e8 */ { Cache_UNKNOWN, 0 }, |
| /* e9 */ { Cache_UNKNOWN, 0 }, |
| /* ea */ { Cache_UNKNOWN, 0 }, |
| /* eb */ { Cache_UNKNOWN, 0 }, |
| /* ec */ { Cache_UNKNOWN, 0 }, |
| /* ed */ { Cache_UNKNOWN, 0 }, |
| /* ee */ { Cache_UNKNOWN, 0 }, |
| /* ef */ { Cache_UNKNOWN, 0 }, |
| /* f0 */ { Cache_UNKNOWN, 0 }, |
| /* f1 */ { Cache_UNKNOWN, 0 }, |
| /* f2 */ { Cache_UNKNOWN, 0 }, |
| /* f3 */ { Cache_UNKNOWN, 0 }, |
| /* f4 */ { Cache_UNKNOWN, 0 }, |
| /* f5 */ { Cache_UNKNOWN, 0 }, |
| /* f6 */ { Cache_UNKNOWN, 0 }, |
| /* f7 */ { Cache_UNKNOWN, 0 }, |
| /* f8 */ { Cache_UNKNOWN, 0 }, |
| /* f9 */ { Cache_UNKNOWN, 0 }, |
| /* fa */ { Cache_UNKNOWN, 0 }, |
| /* fb */ { Cache_UNKNOWN, 0 }, |
| /* fc */ { Cache_UNKNOWN, 0 }, |
| /* fd */ { Cache_UNKNOWN, 0 }, |
| /* fe */ { Cache_UNKNOWN, 0 }, |
| /* ff */ { Cache_UNKNOWN, 0 } |
| }; |
| |
| /* |
| * use the above table to determine the CacheEntryLineSize. |
| */ |
| static void |
| getIntelCacheEntryLineSize(unsigned long val, int *level, |
| unsigned long *lineSize) |
| { |
| CacheType type; |
| |
| type = CacheMap[val].type; |
| /* only interested in data caches */ |
| /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. |
| * this data check has the side effect of rejecting that entry. If |
| * that wasn't the case, we could have to reject it explicitly */ |
| if (CacheMap[val].lineSize == 0) { |
| return; |
| } |
| /* look at the caches, skip types we aren't interested in. |
| * if we already have a value for a lower level cache, skip the |
| * current entry */ |
| if ((type == Cache_L1) || (type == Cache_L1d)) { |
| *level = 1; |
| *lineSize = CacheMap[val].lineSize; |
| } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { |
| *level = 2; |
| *lineSize = CacheMap[val].lineSize; |
| } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { |
| *level = 3; |
| *lineSize = CacheMap[val].lineSize; |
| } |
| return; |
| } |
| |
| static void |
| getIntelRegisterCacheLineSize(unsigned long val, |
| int *level, unsigned long *lineSize) |
| { |
| getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); |
| getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); |
| getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); |
| getIntelCacheEntryLineSize(val & 0xff, level, lineSize); |
| } |
| |
| /* |
| * returns '0' if no recognized cache is found, or if the cache |
| * information is supported by this processor |
| */ |
| static unsigned long |
| getIntelCacheLineSize(int cpuidLevel) |
| { |
| int level = 4; |
| unsigned long lineSize = 0; |
| unsigned long eax, ebx, ecx, edx; |
| int repeat, count; |
| |
| if (cpuidLevel < 2) { |
| return 0; |
| } |
| |
| /* command '2' of the cpuid is intel's cache info call. Each byte of the |
| * 4 registers contain a potential descriptor for the cache. The CacheMap |
| * table maps the cache entry with the processor cache. Register 'al' |
| * contains a count value that cpuid '2' needs to be called in order to |
| * find all the cache descriptors. Only registers with the high bit set |
| * to 'zero' have valid descriptors. This code loops through all the |
| * required calls to cpuid '2' and passes any valid descriptors it finds |
| * to the getIntelRegisterCacheLineSize code, which breaks the registers |
| * down into their component descriptors. In the end the lineSize of the |
| * lowest level cache data cache is returned. */ |
| freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
| repeat = eax & 0xf; |
| for (count = 0; count < repeat; count++) { |
| if ((eax & 0x80000000) == 0) { |
| getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); |
| } |
| if ((ebx & 0x80000000) == 0) { |
| getIntelRegisterCacheLineSize(ebx, &level, &lineSize); |
| } |
| if ((ecx & 0x80000000) == 0) { |
| getIntelRegisterCacheLineSize(ecx, &level, &lineSize); |
| } |
| if ((edx & 0x80000000) == 0) { |
| getIntelRegisterCacheLineSize(edx, &level, &lineSize); |
| } |
| if (count + 1 != repeat) { |
| freebl_cpuid(2, &eax, &ebx, &ecx, &edx); |
| } |
| } |
| return lineSize; |
| } |
| |
| /* |
| * returns '0' if the cache info is not supported by this processor. |
| * This is based on the AMD extended cache commands for cpuid. |
| * (see "AMD Processor Recognition Application Note" Publication 20734). |
| * Some other processors use the identical scheme. |
| * (see "Processor Recognition, Transmeta Corporation"). |
| */ |
| static unsigned long |
| getOtherCacheLineSize(unsigned long cpuidLevel) |
| { |
| unsigned long lineSize = 0; |
| unsigned long eax, ebx, ecx, edx; |
| |
| /* get the Extended CPUID level */ |
| freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); |
| cpuidLevel = eax; |
| |
| if (cpuidLevel >= 0x80000005) { |
| freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); |
| lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ |
| } |
| return lineSize; |
| } |
| |
| static const char *const manMap[] = { |
| #define INTEL 0 |
| "GenuineIntel", |
| #define AMD 1 |
| "AuthenticAMD", |
| #define CYRIX 2 |
| "CyrixInstead", |
| #define CENTAUR 2 |
| "CentaurHauls", |
| #define NEXGEN 3 |
| "NexGenDriven", |
| #define TRANSMETA 4 |
| "GenuineTMx86", |
| #define RISE 5 |
| "RiseRiseRise", |
| #define UMC 6 |
| "UMC UMC UMC ", |
| #define SIS 7 |
| "Sis Sis Sis ", |
| #define NATIONAL 8 |
| "Geode by NSC", |
| }; |
| |
| static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]); |
| |
| #define MAN_UNKNOWN 9 |
| |
| #if !defined(AMD_64) |
| #define SSE2_FLAG (1 << 26) |
| unsigned long |
| s_mpi_is_sse2() |
| { |
| unsigned long eax, ebx, ecx, edx; |
| |
| if (is386() || is486()) { |
| return 0; |
| } |
| freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
| |
| /* has no SSE2 extensions */ |
| if (eax == 0) { |
| return 0; |
| } |
| |
| freebl_cpuid(1, &eax, &ebx, &ecx, &edx); |
| return (edx & SSE2_FLAG) == SSE2_FLAG; |
| } |
| #endif |
| |
| unsigned long |
| s_mpi_getProcessorLineSize() |
| { |
| unsigned long eax, ebx, ecx, edx; |
| PRUint32 cpuid[3]; |
| unsigned long cpuidLevel; |
| unsigned long cacheLineSize = 0; |
| int manufacturer = MAN_UNKNOWN; |
| int i; |
| char string[13]; |
| |
| #if !defined(AMD_64) |
| if (is386()) { |
| return 0; /* 386 had no cache */ |
| } |
| if (is486()) { |
| return 32; /* really? need more info */ |
| } |
| #endif |
| |
| /* Pentium, cpuid command is available */ |
| freebl_cpuid(0, &eax, &ebx, &ecx, &edx); |
| cpuidLevel = eax; |
| /* string holds the CPU's manufacturer ID string - a twelve |
| * character ASCII string stored in ebx, edx, ecx, and |
| * the 32-bit extended feature flags are in edx, ecx. |
| */ |
| cpuid[0] = ebx; |
| cpuid[1] = ecx; |
| cpuid[2] = edx; |
| memcpy(string, cpuid, sizeof(cpuid)); |
| string[12] = 0; |
| |
| manufacturer = MAN_UNKNOWN; |
| for (i = 0; i < n_manufacturers; i++) { |
| if (strcmp(manMap[i], string) == 0) { |
| manufacturer = i; |
| } |
| } |
| |
| if (manufacturer == INTEL) { |
| cacheLineSize = getIntelCacheLineSize(cpuidLevel); |
| } else { |
| cacheLineSize = getOtherCacheLineSize(cpuidLevel); |
| } |
| /* doesn't support cache info based on cpuid. This means |
| * an old pentium class processor, which have cache lines of |
| * 32. If we learn differently, we can use a switch based on |
| * the Manufacturer id */ |
| if (cacheLineSize == 0) { |
| cacheLineSize = 32; |
| } |
| return cacheLineSize; |
| } |
| #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
| #endif |
| |
| #if defined(__ppc64__) |
| /* |
| * Sigh, The PPC has some really nice features to help us determine cache |
| * size, since it had lots of direct control functions to do so. The POWER |
| * processor even has an instruction to do this, but it was dropped in |
| * PowerPC. Unfortunately most of them are not available in user mode. |
| * |
| * The dcbz function would be a great way to determine cache line size except |
| * 1) it only works on write-back memory (it throws an exception otherwise), |
| * and 2) because so many mac programs 'knew' the processor cache size was |
| * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new |
| * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep |
| * these programs happy. dcbzl work if 64 bit instructions are supported. |
| * If you know 64 bit instructions are supported, and that stack is |
| * write-back, you can use this code. |
| */ |
| #include "memory.h" |
| |
| /* clear the cache line that contains 'array' */ |
| static inline void |
| dcbzl(char *array) |
| { |
| register char *a asm("r2") = array; |
| __asm__ __volatile__("dcbzl %0,0" |
| : "=r"(a) |
| : "0"(a)); |
| } |
| |
| #define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1))) |
| |
| #define PPC_MAX_LINE_SIZE 256 |
| unsigned long |
| s_mpi_getProcessorLineSize() |
| { |
| char testArray[2 * PPC_MAX_LINE_SIZE + 1]; |
| char *test; |
| int i; |
| |
| /* align the array on a maximum line size boundary, so we |
| * know we are starting to clear from the first address */ |
| test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); |
| /* set all the values to 1's */ |
| memset(test, 0xff, PPC_MAX_LINE_SIZE); |
| /* clear one cache block starting at 'test' */ |
| dcbzl(test); |
| |
| /* find the size of the cleared area, that's our block size */ |
| for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) { |
| if (test[i - 1] == 0) { |
| return i; |
| } |
| } |
| return 0; |
| } |
| |
| #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 |
| #endif |
| |
| /* |
| * put other processor and platform specific cache code here |
| * return the smallest cache line size in bytes on the processor |
| * (usually the L1 cache). If the OS has a call, this would be |
| * a greate place to put it. |
| * |
| * If there is no cache, return 0; |
| * |
| * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions |
| * below aren't compiled. |
| * |
| */ |
| |
| /* If no way to get the processor cache line size has been defined, assume |
| * it's 32 bytes (most common value, does not significantly impact performance) |
| */ |
| #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED |
| unsigned long |
| s_mpi_getProcessorLineSize() |
| { |
| return 32; |
| } |
| #endif |