| .file "exp10f.s" |
| |
| |
| // Copyright (c) 2000 - 2005, Intel Corporation |
| // All rights reserved. |
| // |
| // Contributed 2000 by the Intel Numerics Group, Intel Corporation |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // |
| // * Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // |
| // * The name of Intel Corporation may not be used to endorse or promote |
| // products derived from this software without specific prior written |
| // permission. |
| |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS |
| // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING |
| // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| // |
| // Intel Corporation is the author of this code, and requests that all |
| // problem reports or change requests be submitted to it directly at |
| // http://www.intel.com/software/products/opensource/libraries/num.htm. |
| // |
| // History |
| //============================================================== |
| // 08/25/00 Initial version |
| // 05/20/02 Cleaned up namespace and sf0 syntax |
| // 09/06/02 Improved performance and accuracy; no inexact flags on exact cases |
| // 01/29/03 Added missing } to bundle templates |
| // 12/16/04 Call error handling on underflow. |
| // 03/31/05 Reformatted delimiters between data tables |
| // |
| // API |
| //============================================================== |
| // float exp10f(float) |
| // |
| // Overview of operation |
| //============================================================== |
| // Background |
| // |
| // Implementation |
| // |
| // Let x= (K + fh + fl + r)/log2(10), where |
| // K is an integer, fh= 0.b1 b2 b3 b4 b5, |
| // fl= 2^{-5}* 0.b6 b7 b8 b8 b10 (fh, fl >= 0), |
| // and |r|<2^{-11} |
| // Th is a table that stores 2^fh (32 entries) rounded to |
| // double extended precision (only mantissa is stored) |
| // Tl is a table that stores 2^fl (32 entries) rounded to |
| // double extended precision (only mantissa is stored) |
| // |
| // 10^x is approximated as |
| // 2^K * Th [ f ] * Tl [ f ] * (1+c1*r+c2*r^2) |
| |
| // Note there are only 10 non-zero values that produce an exact result: |
| // 1.0, 2.0, ... 10.0. |
| // We test for these cases and use s1 to avoid setting the inexact flag. |
| |
| // Special values |
| //============================================================== |
| // exp10(0)= 1 |
| // exp10(+inf)= inf |
| // exp10(-inf)= 0 |
| // |
| |
| // Registers used |
| //============================================================== |
| // r2-r3, r14-r40 |
| // f6-f15, f32-f52 |
| // p6-p12 |
| // |
| |
| |
| GR_TBL_START = r2 |
| GR_LOG_TBL = r3 |
| |
| GR_OF_LIMIT = r14 |
| GR_UF_LIMIT = r15 |
| GR_EXP_CORR = r16 |
| GR_F_low = r17 |
| GR_F_high = r18 |
| GR_K = r19 |
| GR_Flow_ADDR = r20 |
| |
| GR_BIAS = r21 |
| GR_Fh = r22 |
| GR_Fh_ADDR = r23 |
| GR_EXPMAX = r24 |
| |
| GR_ROUNDVAL = r26 |
| GR_SNORM_LIMIT = r26 |
| GR_MASK = r27 |
| GR_KF0 = r28 |
| GR_MASK_low = r29 |
| GR_COEFF_START = r30 |
| GR_exact_limit = r31 |
| |
| GR_SAVE_B0 = r33 |
| GR_SAVE_PFS = r34 |
| GR_SAVE_GP = r35 |
| GR_SAVE_SP = r36 |
| |
| GR_Parameter_X = r37 |
| GR_Parameter_Y = r38 |
| GR_Parameter_RESULT = r39 |
| GR_Parameter_TAG = r40 |
| |
| |
| FR_X = f10 |
| FR_Y = f1 |
| FR_RESULT = f8 |
| |
| |
| FR_COEFF1 = f6 |
| FR_COEFF2 = f7 |
| FR_R = f9 |
| FR_LOG2_10 = f10 |
| |
| FR_2P53 = f11 |
| FR_KF0 = f12 |
| FR_COEFF3 = f13 |
| FR_COEFF4 = f14 |
| FR_UF_LIMIT = f15 |
| |
| FR_OF_LIMIT = f32 |
| FR_DX_L210 = f33 |
| FR_ROUNDVAL = f34 |
| FR_KF = f35 |
| |
| FR_2_TO_K = f36 |
| FR_T_low = f37 |
| FR_T_high = f38 |
| |
| FR_P12 = f41 |
| FR_T_low_K = f42 |
| FR_T = f44 |
| FR_P = f45 |
| |
| FR_E = f49 |
| FR_exact_limit = f50 |
| |
| FR_int_x = f51 |
| FR_SNORM_LIMIT = f52 |
| |
| |
| // Data tables |
| //============================================================== |
| |
| RODATA |
| |
| .align 16 |
| |
| LOCAL_OBJECT_START(poly_coeffs) |
| |
| data8 0xd49a784bcd1b8afe, 0x00003fcb // log2(10)*2^(10-63) |
| data8 0xb17217f7d1cf79ab, 0x00004033 // C_1 * 2^53 |
| data8 0xf5fdeffc162c7541, 0x00004066 // C_2 * 2^106 |
| LOCAL_OBJECT_END(poly_coeffs) |
| |
| |
| LOCAL_OBJECT_START(T_table) |
| |
| // 2^{0.00000 b6 b7 b8 b9 b10} |
| data8 0x8000000000000000, 0x8016302f17467628 |
| data8 0x802c6436d0e04f50, 0x80429c17d77c18ed |
| data8 0x8058d7d2d5e5f6b0, 0x806f17687707a7af |
| data8 0x80855ad965e88b83, 0x809ba2264dada76a |
| data8 0x80b1ed4fd999ab6c, 0x80c83c56b50cf77f |
| data8 0x80de8f3b8b85a0af, 0x80f4e5ff089f763e |
| data8 0x810b40a1d81406d4, 0x81219f24a5baa59d |
| data8 0x813801881d886f7b, 0x814e67cceb90502c |
| data8 0x8164d1f3bc030773, 0x817b3ffd3b2f2e47 |
| data8 0x8191b1ea15813bfd, 0x81a827baf7838b78 |
| data8 0x81bea1708dde6055, 0x81d51f0b8557ec1c |
| data8 0x81eba08c8ad4536f, 0x820225f44b55b33b |
| data8 0x8218af4373fc25eb, 0x822f3c7ab205c89a |
| data8 0x8245cd9ab2cec048, 0x825c62a423d13f0c |
| data8 0x8272fb97b2a5894c, 0x828998760d01faf3 |
| data8 0x82a0393fe0bb0ca8, 0x82b6ddf5dbc35906 |
| // |
| // 2^{0.b1 b2 b3 b4 b5} |
| data8 0x8000000000000000, 0x82cd8698ac2ba1d7 |
| data8 0x85aac367cc487b14, 0x88980e8092da8527 |
| data8 0x8b95c1e3ea8bd6e6, 0x8ea4398b45cd53c0 |
| data8 0x91c3d373ab11c336, 0x94f4efa8fef70961 |
| data8 0x9837f0518db8a96f, 0x9b8d39b9d54e5538 |
| data8 0x9ef5326091a111ad, 0xa27043030c496818 |
| data8 0xa5fed6a9b15138ea, 0xa9a15ab4ea7c0ef8 |
| data8 0xad583eea42a14ac6, 0xb123f581d2ac258f |
| data8 0xb504f333f9de6484, 0xb8fbaf4762fb9ee9 |
| data8 0xbd08a39f580c36be, 0xc12c4cca66709456 |
| data8 0xc5672a115506dadd, 0xc9b9bd866e2f27a2 |
| data8 0xce248c151f8480e3, 0xd2a81d91f12ae45a |
| data8 0xd744fccad69d6af4, 0xdbfbb797daf23755 |
| data8 0xe0ccdeec2a94e111, 0xe5b906e77c8348a8 |
| data8 0xeac0c6e7dd24392e, 0xefe4b99bdcdaf5cb |
| data8 0xf5257d152486cc2c, 0xfa83b2db722a033a |
| LOCAL_OBJECT_END(T_table) |
| |
| |
| |
| .section .text |
| GLOBAL_IEEE754_ENTRY(exp10f) |
| |
| |
| {.mfi |
| alloc r32= ar.pfs, 1, 4, 4, 0 |
| // will continue only for non-zero normal/denormal numbers |
| fclass.nm.unc p12, p7= f8, 0x1b |
| nop.i 0 |
| } |
| {.mlx |
| // GR_TBL_START= pointer to log2(10), C_1...C_4 followed by T_table |
| addl GR_TBL_START= @ltoff(poly_coeffs), gp |
| movl GR_ROUNDVAL= 0x3fc00000 // 1.5 (SP) |
| } |
| ;; |
| |
| {.mfi |
| ld8 GR_COEFF_START= [ GR_TBL_START ] // Load pointer to coeff table |
| fcmp.lt.s1 p6, p8= f8, f0 // X<0 ? |
| nop.i 0 |
| } |
| ;; |
| |
| {.mlx |
| nop.m 0 |
| movl GR_UF_LIMIT= 0xc2349e35 // (-2^7-22) / log2(10) |
| } |
| {.mlx |
| setf.s FR_ROUNDVAL= GR_ROUNDVAL |
| movl GR_OF_LIMIT= 0x421a209a // Overflow threshold |
| } |
| ;; |
| |
| {.mlx |
| ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63) |
| movl GR_SNORM_LIMIT= 0xc217b818 // Smallest normal threshold |
| } |
| {.mib |
| nop.m 0 |
| nop.i 0 |
| (p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero |
| } |
| ;; |
| |
| {.mfi |
| setf.s FR_OF_LIMIT= GR_OF_LIMIT // Set overflow limit |
| fma.s0 f8= f8, f1, f0 // normalize x |
| nop.i 0 |
| } |
| ;; |
| |
| {.mfi |
| setf.s FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit |
| (p8) fcvt.fx.s1 FR_int_x = f8 // Convert x to integer |
| nop.i 0 |
| } |
| {.mfi |
| setf.s FR_UF_LIMIT= GR_UF_LIMIT // Set underflow limit |
| fma.s1 FR_KF0= f8, FR_LOG2_10, FR_ROUNDVAL // y= (x*log2(10)*2^10 + |
| // 1.5*2^63) * 2^(-63) |
| mov GR_EXP_CORR= 0xffff-126 |
| } |
| ;; |
| |
| {.mfi |
| ldfe FR_COEFF1= [ GR_COEFF_START ], 16 // load C_1 |
| fms.s1 FR_KF= FR_KF0, f1, FR_ROUNDVAL // (K+f)*2^(10-63) |
| mov GR_MASK= 1023 |
| } |
| ;; |
| |
| {.mfi |
| ldfe FR_COEFF2= [ GR_COEFF_START ], 16 // load C_2 |
| nop.f 0 |
| mov GR_MASK_low= 31 |
| } |
| ;; |
| |
| {.mlx |
| getf.sig GR_KF0= FR_KF0 // (K+f)*2^10= round_to_int(y) |
| (p8) movl GR_exact_limit= 0x41200000 // Largest x for exact result, |
| // +10.0 |
| } |
| ;; |
| |
| {.mfi |
| add GR_LOG_TBL= 256, GR_COEFF_START // Pointer to high T_table |
| fcmp.gt.s1 p12, p7= f8, FR_OF_LIMIT // x>overflow threshold ? |
| nop.i 0 |
| } |
| ;; |
| |
| {.mfi |
| (p8) setf.s FR_exact_limit = GR_exact_limit // Largest x for exact result |
| (p8) fcvt.xf FR_int_x = FR_int_x // Integral part of x |
| shr GR_K= GR_KF0, 10 // K |
| } |
| {.mfi |
| and GR_F_high= GR_MASK, GR_KF0 // f_high*32 |
| fms.s1 FR_R= f8, FR_LOG2_10, FR_KF // r*2^(-53)= [ x*log2(10)- |
| // (K+f) ] *2^{10-63} |
| and GR_F_low= GR_KF0, GR_MASK_low // f_low |
| } |
| ;; |
| |
| {.mmi |
| shladd GR_Flow_ADDR= GR_F_low, 3, GR_COEFF_START // address of 2^{f_low} |
| add GR_BIAS= GR_K, GR_EXP_CORR // K= bias-2*63 |
| shr GR_Fh= GR_F_high, 5 // f_high |
| } |
| ;; |
| |
| {.mfi |
| setf.exp FR_2_TO_K= GR_BIAS // 2^{K-126} |
| (p7) fcmp.lt.s1 p12, p7= f8, FR_UF_LIMIT // x<underflow threshold ? |
| shladd GR_Fh_ADDR= GR_Fh, 3, GR_LOG_TBL // address of 2^{f_high} |
| } |
| {.mfi |
| ldf8 FR_T_low= [ GR_Flow_ADDR ] // load T_low= 2^{f_low} |
| nop.f 0 |
| nop.i 0 |
| } |
| ;; |
| |
| {.mfb |
| ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high} |
| fcmp.ge.s1 p11, p0= f8, FR_SNORM_LIMIT // Test x for normal range |
| (p12) br.cond.spnt OUT_RANGE_exp10 |
| } |
| ;; |
| |
| {.mfi |
| nop.m 0 |
| fma.s1 FR_P12= FR_COEFF2, FR_R, FR_COEFF1 // P12= C_1+C_2*r |
| cmp.eq p7,p9= r0,r0 // Assume inexact result |
| } |
| ;; |
| |
| {.mfi |
| nop.m 0 |
| (p8) fcmp.eq.s1 p9,p7= FR_int_x, f8 // Test x positive integer |
| nop.i 0 |
| } |
| {.mfi |
| nop.m 0 |
| fma.s1 FR_T_low_K= FR_T_low, FR_2_TO_K, f0 // T= 2^{K-126}*T_low |
| nop.i 0 |
| } |
| ;; |
| |
| {.mfi |
| nop.m 0 |
| fma.s1 FR_P= FR_P12, FR_R, f0 // P= P12*r |
| nop.i 0 |
| } |
| ;; |
| |
| // If x a positive integer, will it produce an exact result? |
| // p7 result will be inexact |
| // p9 result will be exact |
| {.mfi |
| nop.m 0 |
| (p9) fcmp.le.s1 p9,p7= f8, FR_exact_limit // Test x gives exact result |
| nop.i 0 |
| } |
| {.mfi |
| nop.m 0 |
| fma.s1 FR_T= FR_T_low_K, FR_T_high, f0 // T= T*T_high |
| nop.i 0 |
| } |
| ;; |
| |
| .pred.rel "mutex",p7,p9 |
| {.mfi |
| nop.m 0 |
| (p7) fma.s.s0 f8= FR_P, FR_T, FR_T // result= T+T*P, inexact set |
| nop.i 0 |
| } |
| {.mfb |
| nop.m 0 |
| (p9) fma.s.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1 |
| (p11) br.ret.sptk b0 // return, if result normal |
| } |
| ;; |
| |
| // Here if result in denormal range (and not zero) |
| {.mib |
| nop.m 0 |
| mov GR_Parameter_TAG= 266 |
| br.cond.sptk __libm_error_region // Branch to error handling |
| } |
| ;; |
| |
| SPECIAL_exp10: |
| {.mfi |
| nop.m 0 |
| fclass.m p6, p0= f8, 0x22 // x= -Infinity ? |
| nop.i 0 |
| } |
| ;; |
| |
| {.mfi |
| nop.m 0 |
| fclass.m p7, p0= f8, 0x21 // x= +Infinity ? |
| nop.i 0 |
| } |
| ;; |
| |
| {.mfi |
| nop.m 0 |
| fclass.m p8, p0= f8, 0x7 // x= +/-Zero ? |
| nop.i 0 |
| } |
| {.mfb |
| nop.m 0 |
| (p6) mov f8= f0 // exp10(-Infinity)= 0 |
| (p6) br.ret.spnt b0 |
| } |
| ;; |
| |
| {.mfb |
| nop.m 0 |
| nop.f 0 |
| (p7) br.ret.spnt b0 // exp10(+Infinity)= +Infinity |
| } |
| ;; |
| |
| {.mfb |
| nop.m 0 |
| (p8) mov f8= f1 // exp10(+/-0)= 1 |
| (p8) br.ret.spnt b0 |
| } |
| ;; |
| |
| {.mfb |
| nop.m 0 |
| fma.s.s0 f8= f8, f1, f0 // Remaining cases: NaNs |
| br.ret.sptk b0 |
| } |
| ;; |
| |
| |
| OUT_RANGE_exp10: |
| |
| // underflow: p6= 1 |
| // overflow: p8= 1 |
| |
| .pred.rel "mutex",p6,p8 |
| {.mmi |
| (p8) mov GR_EXPMAX= 0x1fffe |
| (p6) mov GR_EXPMAX= 1 |
| nop.i 0 |
| } |
| ;; |
| |
| {.mii |
| setf.exp FR_R= GR_EXPMAX |
| (p8) mov GR_Parameter_TAG= 167 |
| (p6) mov GR_Parameter_TAG= 266 |
| } |
| ;; |
| |
| {.mfb |
| nop.m 0 |
| fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow |
| br.cond.sptk __libm_error_region // Branch to error handling |
| } |
| ;; |
| |
| GLOBAL_IEEE754_END(exp10f) |
| weak_alias (exp10f, pow10f) |
| |
| |
| LOCAL_LIBM_ENTRY(__libm_error_region) |
| |
| .prologue |
| {.mfi |
| add GR_Parameter_Y= -32, sp // Parameter 2 value |
| nop.f 0 |
| .save ar.pfs, GR_SAVE_PFS |
| mov GR_SAVE_PFS= ar.pfs // Save ar.pfs |
| } |
| |
| {.mfi |
| .fframe 64 |
| add sp= -64, sp // Create new stack |
| nop.f 0 |
| mov GR_SAVE_GP= gp // Save gp |
| } |
| ;; |
| |
| {.mmi |
| stfs [ GR_Parameter_Y ]= FR_Y, 16 // STORE Parameter 2 on stack |
| add GR_Parameter_X= 16, sp // Parameter 1 address |
| .save b0, GR_SAVE_B0 |
| mov GR_SAVE_B0= b0 // Save b0 |
| } |
| ;; |
| |
| .body |
| {.mib |
| stfs [ GR_Parameter_X ]= FR_X // STORE Parameter 1 on stack |
| add GR_Parameter_RESULT= 0, GR_Parameter_Y // Parameter 3 address |
| nop.b 0 |
| } |
| {.mib |
| stfs [ GR_Parameter_Y ]= FR_RESULT // STORE Parameter 3 on stack |
| add GR_Parameter_Y= -16, GR_Parameter_Y |
| br.call.sptk b0= __libm_error_support# // Call error handling function |
| } |
| ;; |
| |
| {.mmi |
| add GR_Parameter_RESULT= 48, sp |
| nop.m 0 |
| nop.i 0 |
| } |
| ;; |
| |
| {.mmi |
| ldfs f8= [ GR_Parameter_RESULT ] // Get return result off stack |
| .restore sp |
| add sp= 64, sp // Restore stack pointer |
| mov b0= GR_SAVE_B0 // Restore return address |
| } |
| ;; |
| |
| {.mib |
| mov gp= GR_SAVE_GP // Restore gp |
| mov ar.pfs= GR_SAVE_PFS // Restore ar.pfs |
| br.ret.sptk b0 // Return |
| } |
| ;; |
| |
| |
| LOCAL_LIBM_END(__libm_error_region) |
| |
| .type __libm_error_support#, @function |
| .global __libm_error_support# |