| /* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. |
| |
| Permission is hereby granted, free of charge, to any person obtaining |
| a copy of this software and associated documentation files (the |
| ``Software''), to deal in the Software without restriction, including |
| without limitation the rights to use, copy, modify, merge, publish, |
| distribute, sublicense, and/or sell copies of the Software, and to |
| permit persons to whom the Software is furnished to do so, subject to |
| the following conditions: |
| |
| The above copyright notice and this permission notice shall be |
| included in all copies or substantial portions of the Software. |
| |
| THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, |
| EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
| |
| #include <stdio.h> |
| |
| #include <ffi.h> |
| #include <ffi_common.h> |
| |
| #include <stdlib.h> |
| |
| /* Stack alignment requirement in bytes */ |
| #if defined (__APPLE__) |
| #define AARCH64_STACK_ALIGN 1 |
| #else |
| #define AARCH64_STACK_ALIGN 16 |
| #endif |
| |
| #define N_X_ARG_REG 8 |
| #define N_V_ARG_REG 8 |
| |
| #define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT) |
| |
| union _d |
| { |
| UINT64 d; |
| UINT32 s[2]; |
| }; |
| |
| struct call_context |
| { |
| UINT64 x [AARCH64_N_XREG]; |
| struct |
| { |
| union _d d[2]; |
| } v [AARCH64_N_VREG]; |
| }; |
| |
| #if defined (__clang__) && defined (__APPLE__) |
| extern void |
| sys_icache_invalidate (void *start, size_t len); |
| #endif |
| |
| static inline void |
| ffi_clear_cache (void *start, void *end) |
| { |
| #if defined (__clang__) && defined (__APPLE__) |
| sys_icache_invalidate (start, (char *)end - (char *)start); |
| #elif defined (__GNUC__) |
| __builtin___clear_cache (start, end); |
| #else |
| #error "Missing builtin to flush instruction cache" |
| #endif |
| } |
| |
| static void * |
| get_x_addr (struct call_context *context, unsigned n) |
| { |
| return &context->x[n]; |
| } |
| |
| static void * |
| get_s_addr (struct call_context *context, unsigned n) |
| { |
| #if defined __AARCH64EB__ |
| return &context->v[n].d[1].s[1]; |
| #else |
| return &context->v[n].d[0].s[0]; |
| #endif |
| } |
| |
| static void * |
| get_d_addr (struct call_context *context, unsigned n) |
| { |
| #if defined __AARCH64EB__ |
| return &context->v[n].d[1]; |
| #else |
| return &context->v[n].d[0]; |
| #endif |
| } |
| |
| static void * |
| get_v_addr (struct call_context *context, unsigned n) |
| { |
| return &context->v[n]; |
| } |
| |
| /* Return the memory location at which a basic type would reside |
| were it to have been stored in register n. */ |
| |
| static void * |
| get_basic_type_addr (unsigned short type, struct call_context *context, |
| unsigned n) |
| { |
| switch (type) |
| { |
| case FFI_TYPE_FLOAT: |
| return get_s_addr (context, n); |
| case FFI_TYPE_DOUBLE: |
| return get_d_addr (context, n); |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| return get_v_addr (context, n); |
| #endif |
| case FFI_TYPE_UINT8: |
| case FFI_TYPE_SINT8: |
| case FFI_TYPE_UINT16: |
| case FFI_TYPE_SINT16: |
| case FFI_TYPE_UINT32: |
| case FFI_TYPE_SINT32: |
| case FFI_TYPE_INT: |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_UINT64: |
| case FFI_TYPE_SINT64: |
| return get_x_addr (context, n); |
| case FFI_TYPE_VOID: |
| return NULL; |
| default: |
| FFI_ASSERT (0); |
| return NULL; |
| } |
| } |
| |
| /* Return the alignment width for each of the basic types. */ |
| |
| static size_t |
| get_basic_type_alignment (unsigned short type) |
| { |
| switch (type) |
| { |
| case FFI_TYPE_FLOAT: |
| #if defined (__APPLE__) |
| return sizeof (UINT32); |
| #endif |
| case FFI_TYPE_DOUBLE: |
| return sizeof (UINT64); |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| return sizeof (long double); |
| #endif |
| case FFI_TYPE_UINT8: |
| case FFI_TYPE_SINT8: |
| #if defined (__APPLE__) |
| return sizeof (UINT8); |
| #endif |
| case FFI_TYPE_UINT16: |
| case FFI_TYPE_SINT16: |
| #if defined (__APPLE__) |
| return sizeof (UINT16); |
| #endif |
| case FFI_TYPE_UINT32: |
| case FFI_TYPE_INT: |
| case FFI_TYPE_SINT32: |
| #if defined (__APPLE__) |
| return sizeof (UINT32); |
| #endif |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_UINT64: |
| case FFI_TYPE_SINT64: |
| return sizeof (UINT64); |
| |
| default: |
| FFI_ASSERT (0); |
| return 0; |
| } |
| } |
| |
| /* Return the size in bytes for each of the basic types. */ |
| |
| static size_t |
| get_basic_type_size (unsigned short type) |
| { |
| switch (type) |
| { |
| case FFI_TYPE_FLOAT: |
| return sizeof (UINT32); |
| case FFI_TYPE_DOUBLE: |
| return sizeof (UINT64); |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| return sizeof (long double); |
| #endif |
| case FFI_TYPE_UINT8: |
| return sizeof (UINT8); |
| case FFI_TYPE_SINT8: |
| return sizeof (SINT8); |
| case FFI_TYPE_UINT16: |
| return sizeof (UINT16); |
| case FFI_TYPE_SINT16: |
| return sizeof (SINT16); |
| case FFI_TYPE_UINT32: |
| return sizeof (UINT32); |
| case FFI_TYPE_INT: |
| case FFI_TYPE_SINT32: |
| return sizeof (SINT32); |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_UINT64: |
| return sizeof (UINT64); |
| case FFI_TYPE_SINT64: |
| return sizeof (SINT64); |
| |
| default: |
| FFI_ASSERT (0); |
| return 0; |
| } |
| } |
| |
| extern void |
| ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *, |
| extended_cif *), |
| struct call_context *context, |
| extended_cif *, |
| size_t, |
| void (*fn)(void)); |
| |
| extern void |
| ffi_closure_SYSV (ffi_closure *); |
| |
| /* Test for an FFI floating point representation. */ |
| |
| static unsigned |
| is_floating_type (unsigned short type) |
| { |
| return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE |
| || type == FFI_TYPE_LONGDOUBLE); |
| } |
| |
| /* Test for a homogeneous structure. */ |
| |
| static unsigned short |
| get_homogeneous_type (ffi_type *ty) |
| { |
| if (ty->type == FFI_TYPE_STRUCT && ty->elements) |
| { |
| unsigned i; |
| unsigned short candidate_type |
| = get_homogeneous_type (ty->elements[0]); |
| for (i =1; ty->elements[i]; i++) |
| { |
| unsigned short iteration_type = 0; |
| /* If we have a nested struct, we must find its homogeneous type. |
| If that fits with our candidate type, we are still |
| homogeneous. */ |
| if (ty->elements[i]->type == FFI_TYPE_STRUCT |
| && ty->elements[i]->elements) |
| { |
| iteration_type = get_homogeneous_type (ty->elements[i]); |
| } |
| else |
| { |
| iteration_type = ty->elements[i]->type; |
| } |
| |
| /* If we are not homogeneous, return FFI_TYPE_STRUCT. */ |
| if (candidate_type != iteration_type) |
| return FFI_TYPE_STRUCT; |
| } |
| return candidate_type; |
| } |
| |
| /* Base case, we have no more levels of nesting, so we |
| are a basic type, and so, trivially homogeneous in that type. */ |
| return ty->type; |
| } |
| |
| /* Determine the number of elements within a STRUCT. |
| |
| Note, we must handle nested structs. |
| |
| If ty is not a STRUCT this function will return 0. */ |
| |
| static unsigned |
| element_count (ffi_type *ty) |
| { |
| if (ty->type == FFI_TYPE_STRUCT && ty->elements) |
| { |
| unsigned n; |
| unsigned elems = 0; |
| for (n = 0; ty->elements[n]; n++) |
| { |
| if (ty->elements[n]->type == FFI_TYPE_STRUCT |
| && ty->elements[n]->elements) |
| elems += element_count (ty->elements[n]); |
| else |
| elems++; |
| } |
| return elems; |
| } |
| return 0; |
| } |
| |
| /* Test for a homogeneous floating point aggregate. |
| |
| A homogeneous floating point aggregate is a homogeneous aggregate of |
| a half- single- or double- precision floating point type with one |
| to four elements. Note that this includes nested structs of the |
| basic type. */ |
| |
| static int |
| is_hfa (ffi_type *ty) |
| { |
| if (ty->type == FFI_TYPE_STRUCT |
| && ty->elements[0] |
| && is_floating_type (get_homogeneous_type (ty))) |
| { |
| unsigned n = element_count (ty); |
| return n >= 1 && n <= 4; |
| } |
| return 0; |
| } |
| |
| /* Test if an ffi_type is a candidate for passing in a register. |
| |
| This test does not check that sufficient registers of the |
| appropriate class are actually available, merely that IFF |
| sufficient registers are available then the argument will be passed |
| in register(s). |
| |
| Note that an ffi_type that is deemed to be a register candidate |
| will always be returned in registers. |
| |
| Returns 1 if a register candidate else 0. */ |
| |
| static int |
| is_register_candidate (ffi_type *ty) |
| { |
| switch (ty->type) |
| { |
| case FFI_TYPE_VOID: |
| case FFI_TYPE_FLOAT: |
| case FFI_TYPE_DOUBLE: |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| #endif |
| case FFI_TYPE_UINT8: |
| case FFI_TYPE_UINT16: |
| case FFI_TYPE_UINT32: |
| case FFI_TYPE_UINT64: |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_SINT8: |
| case FFI_TYPE_SINT16: |
| case FFI_TYPE_SINT32: |
| case FFI_TYPE_INT: |
| case FFI_TYPE_SINT64: |
| return 1; |
| |
| case FFI_TYPE_STRUCT: |
| if (is_hfa (ty)) |
| { |
| return 1; |
| } |
| else if (ty->size > 16) |
| { |
| /* Too large. Will be replaced with a pointer to memory. The |
| pointer MAY be passed in a register, but the value will |
| not. This test specifically fails since the argument will |
| never be passed by value in registers. */ |
| return 0; |
| } |
| else |
| { |
| /* Might be passed in registers depending on the number of |
| registers required. */ |
| return (ty->size + 7) / 8 < N_X_ARG_REG; |
| } |
| break; |
| |
| default: |
| FFI_ASSERT (0); |
| break; |
| } |
| |
| return 0; |
| } |
| |
| /* Test if an ffi_type argument or result is a candidate for a vector |
| register. */ |
| |
| static int |
| is_v_register_candidate (ffi_type *ty) |
| { |
| return is_floating_type (ty->type) |
| || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty)); |
| } |
| |
| /* Representation of the procedure call argument marshalling |
| state. |
| |
| The terse state variable names match the names used in the AARCH64 |
| PCS. */ |
| |
| struct arg_state |
| { |
| unsigned ngrn; /* Next general-purpose register number. */ |
| unsigned nsrn; /* Next vector register number. */ |
| size_t nsaa; /* Next stack offset. */ |
| |
| #if defined (__APPLE__) |
| unsigned allocating_variadic; |
| #endif |
| }; |
| |
| /* Initialize a procedure call argument marshalling state. */ |
| static void |
| arg_init (struct arg_state *state, size_t call_frame_size) |
| { |
| state->ngrn = 0; |
| state->nsrn = 0; |
| state->nsaa = 0; |
| |
| #if defined (__APPLE__) |
| state->allocating_variadic = 0; |
| #endif |
| } |
| |
| /* Return the number of available consecutive core argument |
| registers. */ |
| |
| static unsigned |
| available_x (struct arg_state *state) |
| { |
| return N_X_ARG_REG - state->ngrn; |
| } |
| |
| /* Return the number of available consecutive vector argument |
| registers. */ |
| |
| static unsigned |
| available_v (struct arg_state *state) |
| { |
| return N_V_ARG_REG - state->nsrn; |
| } |
| |
| static void * |
| allocate_to_x (struct call_context *context, struct arg_state *state) |
| { |
| FFI_ASSERT (state->ngrn < N_X_ARG_REG); |
| return get_x_addr (context, (state->ngrn)++); |
| } |
| |
| static void * |
| allocate_to_s (struct call_context *context, struct arg_state *state) |
| { |
| FFI_ASSERT (state->nsrn < N_V_ARG_REG); |
| return get_s_addr (context, (state->nsrn)++); |
| } |
| |
| static void * |
| allocate_to_d (struct call_context *context, struct arg_state *state) |
| { |
| FFI_ASSERT (state->nsrn < N_V_ARG_REG); |
| return get_d_addr (context, (state->nsrn)++); |
| } |
| |
| static void * |
| allocate_to_v (struct call_context *context, struct arg_state *state) |
| { |
| FFI_ASSERT (state->nsrn < N_V_ARG_REG); |
| return get_v_addr (context, (state->nsrn)++); |
| } |
| |
| /* Allocate an aligned slot on the stack and return a pointer to it. */ |
| static void * |
| allocate_to_stack (struct arg_state *state, void *stack, size_t alignment, |
| size_t size) |
| { |
| void *allocation; |
| |
| /* Round up the NSAA to the larger of 8 or the natural |
| alignment of the argument's type. */ |
| state->nsaa = ALIGN (state->nsaa, alignment); |
| state->nsaa = ALIGN (state->nsaa, alignment); |
| #if defined (__APPLE__) |
| if (state->allocating_variadic) |
| state->nsaa = ALIGN (state->nsaa, 8); |
| #else |
| state->nsaa = ALIGN (state->nsaa, 8); |
| #endif |
| |
| allocation = stack + state->nsaa; |
| |
| state->nsaa += size; |
| return allocation; |
| } |
| |
| static void |
| copy_basic_type (void *dest, void *source, unsigned short type) |
| { |
| /* This is necessary to ensure that basic types are copied |
| sign extended to 64-bits as libffi expects. */ |
| switch (type) |
| { |
| case FFI_TYPE_FLOAT: |
| *(float *) dest = *(float *) source; |
| break; |
| case FFI_TYPE_DOUBLE: |
| *(double *) dest = *(double *) source; |
| break; |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| *(long double *) dest = *(long double *) source; |
| break; |
| #endif |
| case FFI_TYPE_UINT8: |
| *(ffi_arg *) dest = *(UINT8 *) source; |
| break; |
| case FFI_TYPE_SINT8: |
| *(ffi_sarg *) dest = *(SINT8 *) source; |
| break; |
| case FFI_TYPE_UINT16: |
| *(ffi_arg *) dest = *(UINT16 *) source; |
| break; |
| case FFI_TYPE_SINT16: |
| *(ffi_sarg *) dest = *(SINT16 *) source; |
| break; |
| case FFI_TYPE_UINT32: |
| *(ffi_arg *) dest = *(UINT32 *) source; |
| break; |
| case FFI_TYPE_INT: |
| case FFI_TYPE_SINT32: |
| *(ffi_sarg *) dest = *(SINT32 *) source; |
| break; |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_UINT64: |
| *(ffi_arg *) dest = *(UINT64 *) source; |
| break; |
| case FFI_TYPE_SINT64: |
| *(ffi_sarg *) dest = *(SINT64 *) source; |
| break; |
| case FFI_TYPE_VOID: |
| break; |
| |
| default: |
| FFI_ASSERT (0); |
| } |
| } |
| |
| static void |
| copy_hfa_to_reg_or_stack (void *memory, |
| ffi_type *ty, |
| struct call_context *context, |
| unsigned char *stack, |
| struct arg_state *state) |
| { |
| unsigned elems = element_count (ty); |
| if (available_v (state) < elems) |
| { |
| /* There are insufficient V registers. Further V register allocations |
| are prevented, the NSAA is adjusted (by allocate_to_stack ()) |
| and the argument is copied to memory at the adjusted NSAA. */ |
| state->nsrn = N_V_ARG_REG; |
| memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size), |
| memory, |
| ty->size); |
| } |
| else |
| { |
| int i; |
| unsigned short type = get_homogeneous_type (ty); |
| for (i = 0; i < elems; i++) |
| { |
| void *reg = allocate_to_v (context, state); |
| copy_basic_type (reg, memory, type); |
| memory += get_basic_type_size (type); |
| } |
| } |
| } |
| |
| /* Either allocate an appropriate register for the argument type, or if |
| none are available, allocate a stack slot and return a pointer |
| to the allocated space. */ |
| |
| static void * |
| allocate_to_register_or_stack (struct call_context *context, |
| unsigned char *stack, |
| struct arg_state *state, |
| unsigned short type) |
| { |
| size_t alignment = get_basic_type_alignment (type); |
| size_t size = alignment; |
| switch (type) |
| { |
| case FFI_TYPE_FLOAT: |
| /* This is the only case for which the allocated stack size |
| should not match the alignment of the type. */ |
| size = sizeof (UINT32); |
| /* Fall through. */ |
| case FFI_TYPE_DOUBLE: |
| if (state->nsrn < N_V_ARG_REG) |
| return allocate_to_d (context, state); |
| state->nsrn = N_V_ARG_REG; |
| break; |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| if (state->nsrn < N_V_ARG_REG) |
| return allocate_to_v (context, state); |
| state->nsrn = N_V_ARG_REG; |
| break; |
| #endif |
| case FFI_TYPE_UINT8: |
| case FFI_TYPE_SINT8: |
| case FFI_TYPE_UINT16: |
| case FFI_TYPE_SINT16: |
| case FFI_TYPE_UINT32: |
| case FFI_TYPE_SINT32: |
| case FFI_TYPE_INT: |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_UINT64: |
| case FFI_TYPE_SINT64: |
| if (state->ngrn < N_X_ARG_REG) |
| return allocate_to_x (context, state); |
| state->ngrn = N_X_ARG_REG; |
| break; |
| default: |
| FFI_ASSERT (0); |
| } |
| |
| return allocate_to_stack (state, stack, alignment, size); |
| } |
| |
| /* Copy a value to an appropriate register, or if none are |
| available, to the stack. */ |
| |
| static void |
| copy_to_register_or_stack (struct call_context *context, |
| unsigned char *stack, |
| struct arg_state *state, |
| void *value, |
| unsigned short type) |
| { |
| copy_basic_type ( |
| allocate_to_register_or_stack (context, stack, state, type), |
| value, |
| type); |
| } |
| |
| /* Marshall the arguments from FFI representation to procedure call |
| context and stack. */ |
| |
| static unsigned |
| aarch64_prep_args (struct call_context *context, unsigned char *stack, |
| extended_cif *ecif) |
| { |
| int i; |
| struct arg_state state; |
| |
| arg_init (&state, ALIGN(ecif->cif->bytes, 16)); |
| |
| for (i = 0; i < ecif->cif->nargs; i++) |
| { |
| ffi_type *ty = ecif->cif->arg_types[i]; |
| switch (ty->type) |
| { |
| case FFI_TYPE_VOID: |
| FFI_ASSERT (0); |
| break; |
| |
| /* If the argument is a basic type the argument is allocated to an |
| appropriate register, or if none are available, to the stack. */ |
| case FFI_TYPE_FLOAT: |
| case FFI_TYPE_DOUBLE: |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| #endif |
| case FFI_TYPE_UINT8: |
| case FFI_TYPE_SINT8: |
| case FFI_TYPE_UINT16: |
| case FFI_TYPE_SINT16: |
| case FFI_TYPE_UINT32: |
| case FFI_TYPE_INT: |
| case FFI_TYPE_SINT32: |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_UINT64: |
| case FFI_TYPE_SINT64: |
| copy_to_register_or_stack (context, stack, &state, |
| ecif->avalue[i], ty->type); |
| break; |
| |
| case FFI_TYPE_STRUCT: |
| if (is_hfa (ty)) |
| { |
| copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context, |
| stack, &state); |
| } |
| else if (ty->size > 16) |
| { |
| /* If the argument is a composite type that is larger than 16 |
| bytes, then the argument has been copied to memory, and |
| the argument is replaced by a pointer to the copy. */ |
| |
| copy_to_register_or_stack (context, stack, &state, |
| &(ecif->avalue[i]), FFI_TYPE_POINTER); |
| } |
| else if (available_x (&state) >= (ty->size + 7) / 8) |
| { |
| /* If the argument is a composite type and the size in |
| double-words is not more than the number of available |
| X registers, then the argument is copied into consecutive |
| X registers. */ |
| int j; |
| for (j = 0; j < (ty->size + 7) / 8; j++) |
| { |
| memcpy (allocate_to_x (context, &state), |
| &(((UINT64 *) ecif->avalue[i])[j]), |
| sizeof (UINT64)); |
| } |
| } |
| else |
| { |
| /* Otherwise, there are insufficient X registers. Further X |
| register allocations are prevented, the NSAA is adjusted |
| (by allocate_to_stack ()) and the argument is copied to |
| memory at the adjusted NSAA. */ |
| state.ngrn = N_X_ARG_REG; |
| |
| memcpy (allocate_to_stack (&state, stack, ty->alignment, |
| ty->size), ecif->avalue + i, ty->size); |
| } |
| break; |
| |
| default: |
| FFI_ASSERT (0); |
| break; |
| } |
| |
| #if defined (__APPLE__) |
| if (i + 1 == ecif->cif->aarch64_nfixedargs) |
| { |
| state.ngrn = N_X_ARG_REG; |
| state.nsrn = N_V_ARG_REG; |
| |
| state.allocating_variadic = 1; |
| } |
| #endif |
| } |
| |
| return ecif->cif->aarch64_flags; |
| } |
| |
| ffi_status |
| ffi_prep_cif_machdep (ffi_cif *cif) |
| { |
| /* Round the stack up to a multiple of the stack alignment requirement. */ |
| cif->bytes = |
| (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1); |
| |
| /* Initialize our flags. We are interested if this CIF will touch a |
| vector register, if so we will enable context save and load to |
| those registers, otherwise not. This is intended to be friendly |
| to lazy float context switching in the kernel. */ |
| cif->aarch64_flags = 0; |
| |
| if (is_v_register_candidate (cif->rtype)) |
| { |
| cif->aarch64_flags |= AARCH64_FFI_WITH_V; |
| } |
| else |
| { |
| int i; |
| for (i = 0; i < cif->nargs; i++) |
| if (is_v_register_candidate (cif->arg_types[i])) |
| { |
| cif->aarch64_flags |= AARCH64_FFI_WITH_V; |
| break; |
| } |
| } |
| |
| #if defined (__APPLE__) |
| cif->aarch64_nfixedargs = 0; |
| #endif |
| |
| return FFI_OK; |
| } |
| |
| #if defined (__APPLE__) |
| |
| /* Perform Apple-specific cif processing for variadic calls */ |
| ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif, |
| unsigned int nfixedargs, |
| unsigned int ntotalargs) |
| { |
| ffi_status status; |
| |
| status = ffi_prep_cif_machdep (cif); |
| |
| cif->aarch64_nfixedargs = nfixedargs; |
| |
| return status; |
| } |
| |
| #endif |
| |
| /* Call a function with the provided arguments and capture the return |
| value. */ |
| void |
| ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) |
| { |
| extended_cif ecif; |
| |
| ecif.cif = cif; |
| ecif.avalue = avalue; |
| ecif.rvalue = rvalue; |
| |
| switch (cif->abi) |
| { |
| case FFI_SYSV: |
| { |
| struct call_context context; |
| size_t stack_bytes; |
| |
| /* Figure out the total amount of stack space we need, the |
| above call frame space needs to be 16 bytes aligned to |
| ensure correct alignment of the first object inserted in |
| that space hence the ALIGN applied to cif->bytes.*/ |
| stack_bytes = ALIGN(cif->bytes, 16); |
| |
| memset (&context, 0, sizeof (context)); |
| if (is_register_candidate (cif->rtype)) |
| { |
| ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn); |
| switch (cif->rtype->type) |
| { |
| case FFI_TYPE_VOID: |
| case FFI_TYPE_FLOAT: |
| case FFI_TYPE_DOUBLE: |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| #endif |
| case FFI_TYPE_UINT8: |
| case FFI_TYPE_SINT8: |
| case FFI_TYPE_UINT16: |
| case FFI_TYPE_SINT16: |
| case FFI_TYPE_UINT32: |
| case FFI_TYPE_SINT32: |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_UINT64: |
| case FFI_TYPE_INT: |
| case FFI_TYPE_SINT64: |
| { |
| void *addr = get_basic_type_addr (cif->rtype->type, |
| &context, 0); |
| copy_basic_type (rvalue, addr, cif->rtype->type); |
| break; |
| } |
| |
| case FFI_TYPE_STRUCT: |
| if (is_hfa (cif->rtype)) |
| { |
| int j; |
| unsigned short type = get_homogeneous_type (cif->rtype); |
| unsigned elems = element_count (cif->rtype); |
| for (j = 0; j < elems; j++) |
| { |
| void *reg = get_basic_type_addr (type, &context, j); |
| copy_basic_type (rvalue, reg, type); |
| rvalue += get_basic_type_size (type); |
| } |
| } |
| else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG) |
| { |
| size_t size = ALIGN (cif->rtype->size, sizeof (UINT64)); |
| memcpy (rvalue, get_x_addr (&context, 0), size); |
| } |
| else |
| { |
| FFI_ASSERT (0); |
| } |
| break; |
| |
| default: |
| FFI_ASSERT (0); |
| break; |
| } |
| } |
| else |
| { |
| memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64)); |
| ffi_call_SYSV (aarch64_prep_args, &context, &ecif, |
| stack_bytes, fn); |
| } |
| break; |
| } |
| |
| default: |
| FFI_ASSERT (0); |
| break; |
| } |
| } |
| |
| static unsigned char trampoline [] = |
| { 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */ |
| 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */ |
| 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ |
| }; |
| |
| /* Build a trampoline. */ |
| |
| #define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \ |
| ({unsigned char *__tramp = (unsigned char*)(TRAMP); \ |
| UINT64 __fun = (UINT64)(FUN); \ |
| UINT64 __ctx = (UINT64)(CTX); \ |
| UINT64 __flags = (UINT64)(FLAGS); \ |
| memcpy (__tramp, trampoline, sizeof (trampoline)); \ |
| memcpy (__tramp + 12, &__fun, sizeof (__fun)); \ |
| memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \ |
| memcpy (__tramp + 28, &__flags, sizeof (__flags)); \ |
| ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \ |
| }) |
| |
| ffi_status |
| ffi_prep_closure_loc (ffi_closure* closure, |
| ffi_cif* cif, |
| void (*fun)(ffi_cif*,void*,void**,void*), |
| void *user_data, |
| void *codeloc) |
| { |
| if (cif->abi != FFI_SYSV) |
| return FFI_BAD_ABI; |
| |
| FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc, |
| cif->aarch64_flags); |
| |
| closure->cif = cif; |
| closure->user_data = user_data; |
| closure->fun = fun; |
| |
| return FFI_OK; |
| } |
| |
| /* Primary handler to setup and invoke a function within a closure. |
| |
| A closure when invoked enters via the assembler wrapper |
| ffi_closure_SYSV(). The wrapper allocates a call context on the |
| stack, saves the interesting registers (from the perspective of |
| the calling convention) into the context then passes control to |
| ffi_closure_SYSV_inner() passing the saved context and a pointer to |
| the stack at the point ffi_closure_SYSV() was invoked. |
| |
| On the return path the assembler wrapper will reload call context |
| registers. |
| |
| ffi_closure_SYSV_inner() marshalls the call context into ffi value |
| descriptors, invokes the wrapped function, then marshalls the return |
| value back into the call context. */ |
| |
| void FFI_HIDDEN |
| ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context, |
| void *stack) |
| { |
| ffi_cif *cif = closure->cif; |
| void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); |
| void *rvalue = NULL; |
| int i; |
| struct arg_state state; |
| |
| arg_init (&state, ALIGN(cif->bytes, 16)); |
| |
| for (i = 0; i < cif->nargs; i++) |
| { |
| ffi_type *ty = cif->arg_types[i]; |
| |
| switch (ty->type) |
| { |
| case FFI_TYPE_VOID: |
| FFI_ASSERT (0); |
| break; |
| |
| case FFI_TYPE_UINT8: |
| case FFI_TYPE_SINT8: |
| case FFI_TYPE_UINT16: |
| case FFI_TYPE_SINT16: |
| case FFI_TYPE_UINT32: |
| case FFI_TYPE_SINT32: |
| case FFI_TYPE_INT: |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_UINT64: |
| case FFI_TYPE_SINT64: |
| case FFI_TYPE_FLOAT: |
| case FFI_TYPE_DOUBLE: |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| avalue[i] = allocate_to_register_or_stack (context, stack, |
| &state, ty->type); |
| break; |
| #endif |
| |
| case FFI_TYPE_STRUCT: |
| if (is_hfa (ty)) |
| { |
| unsigned n = element_count (ty); |
| if (available_v (&state) < n) |
| { |
| state.nsrn = N_V_ARG_REG; |
| avalue[i] = allocate_to_stack (&state, stack, ty->alignment, |
| ty->size); |
| } |
| else |
| { |
| switch (get_homogeneous_type (ty)) |
| { |
| case FFI_TYPE_FLOAT: |
| { |
| /* Eeek! We need a pointer to the structure, |
| however the homogeneous float elements are |
| being passed in individual S registers, |
| therefore the structure is not represented as |
| a contiguous sequence of bytes in our saved |
| register context. We need to fake up a copy |
| of the structure laid out in memory |
| correctly. The fake can be tossed once the |
| closure function has returned hence alloca() |
| is sufficient. */ |
| int j; |
| UINT32 *p = avalue[i] = alloca (ty->size); |
| for (j = 0; j < element_count (ty); j++) |
| memcpy (&p[j], |
| allocate_to_s (context, &state), |
| sizeof (*p)); |
| break; |
| } |
| |
| case FFI_TYPE_DOUBLE: |
| { |
| /* Eeek! We need a pointer to the structure, |
| however the homogeneous float elements are |
| being passed in individual S registers, |
| therefore the structure is not represented as |
| a contiguous sequence of bytes in our saved |
| register context. We need to fake up a copy |
| of the structure laid out in memory |
| correctly. The fake can be tossed once the |
| closure function has returned hence alloca() |
| is sufficient. */ |
| int j; |
| UINT64 *p = avalue[i] = alloca (ty->size); |
| for (j = 0; j < element_count (ty); j++) |
| memcpy (&p[j], |
| allocate_to_d (context, &state), |
| sizeof (*p)); |
| break; |
| } |
| |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| memcpy (&avalue[i], |
| allocate_to_v (context, &state), |
| sizeof (*avalue)); |
| break; |
| #endif |
| |
| default: |
| FFI_ASSERT (0); |
| break; |
| } |
| } |
| } |
| else if (ty->size > 16) |
| { |
| /* Replace Composite type of size greater than 16 with a |
| pointer. */ |
| memcpy (&avalue[i], |
| allocate_to_register_or_stack (context, stack, |
| &state, FFI_TYPE_POINTER), |
| sizeof (avalue[i])); |
| } |
| else if (available_x (&state) >= (ty->size + 7) / 8) |
| { |
| avalue[i] = get_x_addr (context, state.ngrn); |
| state.ngrn += (ty->size + 7) / 8; |
| } |
| else |
| { |
| state.ngrn = N_X_ARG_REG; |
| |
| avalue[i] = allocate_to_stack (&state, stack, ty->alignment, |
| ty->size); |
| } |
| break; |
| |
| default: |
| FFI_ASSERT (0); |
| break; |
| } |
| } |
| |
| /* Figure out where the return value will be passed, either in |
| registers or in a memory block allocated by the caller and passed |
| in x8. */ |
| |
| if (is_register_candidate (cif->rtype)) |
| { |
| /* Register candidates are *always* returned in registers. */ |
| |
| /* Allocate a scratchpad for the return value, we will let the |
| callee scrible the result into the scratch pad then move the |
| contents into the appropriate return value location for the |
| call convention. */ |
| rvalue = alloca (cif->rtype->size); |
| (closure->fun) (cif, rvalue, avalue, closure->user_data); |
| |
| /* Copy the return value into the call context so that it is returned |
| as expected to our caller. */ |
| switch (cif->rtype->type) |
| { |
| case FFI_TYPE_VOID: |
| break; |
| |
| case FFI_TYPE_UINT8: |
| case FFI_TYPE_UINT16: |
| case FFI_TYPE_UINT32: |
| case FFI_TYPE_POINTER: |
| case FFI_TYPE_UINT64: |
| case FFI_TYPE_SINT8: |
| case FFI_TYPE_SINT16: |
| case FFI_TYPE_INT: |
| case FFI_TYPE_SINT32: |
| case FFI_TYPE_SINT64: |
| case FFI_TYPE_FLOAT: |
| case FFI_TYPE_DOUBLE: |
| #if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE |
| case FFI_TYPE_LONGDOUBLE: |
| #endif |
| { |
| void *addr = get_basic_type_addr (cif->rtype->type, context, 0); |
| copy_basic_type (addr, rvalue, cif->rtype->type); |
| break; |
| } |
| case FFI_TYPE_STRUCT: |
| if (is_hfa (cif->rtype)) |
| { |
| int j; |
| unsigned short type = get_homogeneous_type (cif->rtype); |
| unsigned elems = element_count (cif->rtype); |
| for (j = 0; j < elems; j++) |
| { |
| void *reg = get_basic_type_addr (type, context, j); |
| copy_basic_type (reg, rvalue, type); |
| rvalue += get_basic_type_size (type); |
| } |
| } |
| else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG) |
| { |
| size_t size = ALIGN (cif->rtype->size, sizeof (UINT64)) ; |
| memcpy (get_x_addr (context, 0), rvalue, size); |
| } |
| else |
| { |
| FFI_ASSERT (0); |
| } |
| break; |
| default: |
| FFI_ASSERT (0); |
| break; |
| } |
| } |
| else |
| { |
| memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64)); |
| (closure->fun) (cif, rvalue, avalue, closure->user_data); |
| } |
| } |
| |