| /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
| /* |
| * |
| * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. |
| * |
| * This program is free software and is provided to you under the terms of the |
| * GNU General Public License version 2 as published by the Free Software |
| * Foundation, and any use by you of this program is subject to the terms |
| * of such GNU license. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, you can access it online at |
| * http://www.gnu.org/licenses/gpl-2.0.html. |
| * |
| */ |
| |
| /* |
| * Base structures shared with the kernel. |
| */ |
| |
| #ifndef _UAPI_BASE_KERNEL_H_ |
| #define _UAPI_BASE_KERNEL_H_ |
| |
| #include <linux/types.h> |
| |
| struct base_mem_handle { |
| struct { |
| __u64 handle; |
| } basep; |
| }; |
| |
| #include "mali_base_mem_priv.h" |
| #include "gpu/mali_kbase_gpu_id.h" |
| #include "gpu/mali_kbase_gpu_coherency.h" |
| |
| #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 |
| |
| #define BASE_MAX_COHERENT_GROUPS 16 |
| |
| #if defined(CDBG_ASSERT) |
| #define LOCAL_ASSERT CDBG_ASSERT |
| #elif defined(KBASE_DEBUG_ASSERT) |
| #define LOCAL_ASSERT KBASE_DEBUG_ASSERT |
| #else |
| #if defined(__KERNEL__) |
| #error assert macro not defined! |
| #else |
| #define LOCAL_ASSERT(...) ((void)#__VA_ARGS__) |
| #endif |
| #endif |
| |
| #if defined(PAGE_MASK) && defined(PAGE_SHIFT) |
| #define LOCAL_PAGE_SHIFT PAGE_SHIFT |
| #define LOCAL_PAGE_LSB ~PAGE_MASK |
| #else |
| #ifndef OSU_CONFIG_CPU_PAGE_SIZE_LOG2 |
| #define OSU_CONFIG_CPU_PAGE_SIZE_LOG2 12 |
| #endif |
| |
| #if defined(OSU_CONFIG_CPU_PAGE_SIZE_LOG2) |
| #define LOCAL_PAGE_SHIFT OSU_CONFIG_CPU_PAGE_SIZE_LOG2 |
| #define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) |
| #else |
| #error Failed to find page size |
| #endif |
| #endif |
| |
| /* Physical memory group ID for normal usage. |
| */ |
| #define BASE_MEM_GROUP_DEFAULT (0) |
| |
| /* Number of physical memory groups. |
| */ |
| #define BASE_MEM_GROUP_COUNT (16) |
| |
| /** |
| * typedef base_mem_alloc_flags - Memory allocation, access/hint flags. |
| * |
| * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator |
| * in order to determine the best cache policy. Some combinations are |
| * of course invalid (e.g. MEM_PROT_CPU_WR | MEM_HINT_CPU_RD), |
| * which defines a write-only region on the CPU side, which is |
| * heavily read by the CPU... |
| * Other flags are only meaningful to a particular allocator. |
| * More flags can be added to this list, as long as they don't clash |
| * (see BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). |
| */ |
| typedef __u32 base_mem_alloc_flags; |
| |
| /* A mask for all the flags which are modifiable via the base_mem_set_flags |
| * interface. |
| */ |
| #define BASE_MEM_FLAGS_MODIFIABLE \ |
| (BASE_MEM_DONT_NEED | BASE_MEM_COHERENT_SYSTEM | \ |
| BASE_MEM_COHERENT_LOCAL) |
| |
| /* A mask of all the flags that can be returned via the base_mem_get_flags() |
| * interface. |
| */ |
| #define BASE_MEM_FLAGS_QUERYABLE \ |
| (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \ |
| BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \ |
| BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \ |
| BASEP_MEM_FLAGS_KERNEL_ONLY)) |
| |
| /** |
| * enum base_mem_import_type - Memory types supported by @a base_mem_import |
| * |
| * @BASE_MEM_IMPORT_TYPE_INVALID: Invalid type |
| * @BASE_MEM_IMPORT_TYPE_UMM: UMM import. Handle type is a file descriptor (int) |
| * @BASE_MEM_IMPORT_TYPE_USER_BUFFER: User buffer import. Handle is a |
| * base_mem_import_user_buffer |
| * |
| * Each type defines what the supported handle type is. |
| * |
| * If any new type is added here ARM must be contacted |
| * to allocate a numeric value for it. |
| * Do not just add a new type without synchronizing with ARM |
| * as future releases from ARM might include other new types |
| * which could clash with your custom types. |
| */ |
| enum base_mem_import_type { |
| BASE_MEM_IMPORT_TYPE_INVALID = 0, |
| /* |
| * Import type with value 1 is deprecated. |
| */ |
| BASE_MEM_IMPORT_TYPE_UMM = 2, |
| BASE_MEM_IMPORT_TYPE_USER_BUFFER = 3 |
| }; |
| |
| /** |
| * struct base_mem_import_user_buffer - Handle of an imported user buffer |
| * |
| * @ptr: address of imported user buffer |
| * @length: length of imported user buffer in bytes |
| * |
| * This structure is used to represent a handle of an imported user buffer. |
| */ |
| |
| struct base_mem_import_user_buffer { |
| __u64 ptr; |
| __u64 length; |
| }; |
| |
| /* Mask to detect 4GB boundary alignment */ |
| #define BASE_MEM_MASK_4GB 0xfffff000UL |
| /* Mask to detect 4GB boundary (in page units) alignment */ |
| #define BASE_MEM_PFN_MASK_4GB (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT) |
| |
| /* Limit on the 'extension' parameter for an allocation with the |
| * BASE_MEM_TILER_ALIGN_TOP flag set |
| * |
| * This is the same as the maximum limit for a Buffer Descriptor's chunk size |
| */ |
| #define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2 \ |
| (21u - (LOCAL_PAGE_SHIFT)) |
| #define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES \ |
| (1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2)) |
| |
| /* Bit mask of cookies used for for memory allocation setup */ |
| #define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ |
| |
| /* Maximum size allowed in a single KBASE_IOCTL_MEM_ALLOC call */ |
| #define KBASE_MEM_ALLOC_MAX_SIZE ((8ull << 30) >> PAGE_SHIFT) /* 8 GB */ |
| |
| /* |
| * struct base_fence - Cross-device synchronisation fence. |
| * |
| * A fence is used to signal when the GPU has finished accessing a resource that |
| * may be shared with other devices, and also to delay work done asynchronously |
| * by the GPU until other devices have finished accessing a shared resource. |
| */ |
| struct base_fence { |
| struct { |
| int fd; |
| int stream_fd; |
| } basep; |
| }; |
| |
| /** |
| * struct base_mem_aliasing_info - Memory aliasing info |
| * |
| * Describes a memory handle to be aliased. |
| * A subset of the handle can be chosen for aliasing, given an offset and a |
| * length. |
| * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a |
| * region where a special page is mapped with a write-alloc cache setup, |
| * typically used when the write result of the GPU isn't needed, but the GPU |
| * must write anyway. |
| * |
| * Offset and length are specified in pages. |
| * Offset must be within the size of the handle. |
| * Offset+length must not overrun the size of the handle. |
| * |
| * @handle: Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE |
| * @offset: Offset within the handle to start aliasing from, in pages. |
| * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. |
| * @length: Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE |
| * specifies the number of times the special page is needed. |
| */ |
| struct base_mem_aliasing_info { |
| struct base_mem_handle handle; |
| __u64 offset; |
| __u64 length; |
| }; |
| |
| /* Maximum percentage of just-in-time memory allocation trimming to perform |
| * on free. |
| */ |
| #define BASE_JIT_MAX_TRIM_LEVEL (100) |
| |
| /* Maximum number of concurrent just-in-time memory allocations. |
| */ |
| #define BASE_JIT_ALLOC_COUNT (255) |
| |
| /* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5 |
| * |
| * jit_version is 1 |
| * |
| * Due to the lack of padding specified, user clients between 32 and 64-bit |
| * may have assumed a different size of the struct |
| * |
| * An array of structures was not supported |
| */ |
| struct base_jit_alloc_info_10_2 { |
| __u64 gpu_alloc_addr; |
| __u64 va_pages; |
| __u64 commit_pages; |
| __u64 extension; |
| __u8 id; |
| }; |
| |
| /* base_jit_alloc_info introduced by kernel driver version 11.5, and in use up |
| * to 11.19 |
| * |
| * This structure had a number of modifications during and after kernel driver |
| * version 11.5, but remains size-compatible throughout its version history, and |
| * with earlier variants compatible with future variants by requiring |
| * zero-initialization to the unused space in the structure. |
| * |
| * jit_version is 2 |
| * |
| * Kernel driver version history: |
| * 11.5: Initial introduction with 'usage_id' and padding[5]. All padding bytes |
| * must be zero. Kbase minor version was not incremented, so some |
| * versions of 11.5 do not have this change. |
| * 11.5: Added 'bin_id' and 'max_allocations', replacing 2 padding bytes (Kbase |
| * minor version not incremented) |
| * 11.6: Added 'flags', replacing 1 padding byte |
| * 11.10: Arrays of this structure are supported |
| */ |
| struct base_jit_alloc_info_11_5 { |
| __u64 gpu_alloc_addr; |
| __u64 va_pages; |
| __u64 commit_pages; |
| __u64 extension; |
| __u8 id; |
| __u8 bin_id; |
| __u8 max_allocations; |
| __u8 flags; |
| __u8 padding[2]; |
| __u16 usage_id; |
| }; |
| |
| /** |
| * struct base_jit_alloc_info - Structure which describes a JIT allocation |
| * request. |
| * @gpu_alloc_addr: The GPU virtual address to write the JIT |
| * allocated GPU virtual address to. |
| * @va_pages: The minimum number of virtual pages required. |
| * @commit_pages: The minimum number of physical pages which |
| * should back the allocation. |
| * @extension: Granularity of physical pages to grow the |
| * allocation by during a fault. |
| * @id: Unique ID provided by the caller, this is used |
| * to pair allocation and free requests. |
| * Zero is not a valid value. |
| * @bin_id: The JIT allocation bin, used in conjunction with |
| * @max_allocations to limit the number of each |
| * type of JIT allocation. |
| * @max_allocations: The maximum number of allocations allowed within |
| * the bin specified by @bin_id. Should be the same |
| * for all allocations within the same bin. |
| * @flags: flags specifying the special requirements for |
| * the JIT allocation, see |
| * %BASE_JIT_ALLOC_VALID_FLAGS |
| * @padding: Expansion space - should be initialised to zero |
| * @usage_id: A hint about which allocation should be reused. |
| * The kernel should attempt to use a previous |
| * allocation with the same usage_id |
| * @heap_info_gpu_addr: Pointer to an object in GPU memory describing |
| * the actual usage of the region. |
| * |
| * jit_version is 3. |
| * |
| * When modifications are made to this structure, it is still compatible with |
| * jit_version 3 when: a) the size is unchanged, and b) new members only |
| * replace the padding bytes. |
| * |
| * Previous jit_version history: |
| * jit_version == 1, refer to &base_jit_alloc_info_10_2 |
| * jit_version == 2, refer to &base_jit_alloc_info_11_5 |
| * |
| * Kbase version history: |
| * 11.20: added @heap_info_gpu_addr |
| */ |
| struct base_jit_alloc_info { |
| __u64 gpu_alloc_addr; |
| __u64 va_pages; |
| __u64 commit_pages; |
| __u64 extension; |
| __u8 id; |
| __u8 bin_id; |
| __u8 max_allocations; |
| __u8 flags; |
| __u8 padding[2]; |
| __u16 usage_id; |
| __u64 heap_info_gpu_addr; |
| }; |
| |
| enum base_external_resource_access { |
| BASE_EXT_RES_ACCESS_SHARED, |
| BASE_EXT_RES_ACCESS_EXCLUSIVE |
| }; |
| |
| struct base_external_resource { |
| __u64 ext_resource; |
| }; |
| |
| |
| /** |
| * The maximum number of external resources which can be mapped/unmapped |
| * in a single request. |
| */ |
| #define BASE_EXT_RES_COUNT_MAX 10 |
| |
| /** |
| * struct base_external_resource_list - Structure which describes a list of |
| * external resources. |
| * @count: The number of resources. |
| * @ext_res: Array of external resources which is |
| * sized at allocation time. |
| */ |
| struct base_external_resource_list { |
| __u64 count; |
| struct base_external_resource ext_res[1]; |
| }; |
| |
| struct base_jd_debug_copy_buffer { |
| __u64 address; |
| __u64 size; |
| struct base_external_resource extres; |
| }; |
| |
| #define GPU_MAX_JOB_SLOTS 16 |
| |
| /** |
| * User-side Base GPU Property Queries |
| * |
| * The User-side Base GPU Property Query interface encapsulates two |
| * sub-modules: |
| * |
| * - "Dynamic GPU Properties" |
| * - "Base Platform Config GPU Properties" |
| * |
| * Base only deals with properties that vary between different GPU |
| * implementations - the Dynamic GPU properties and the Platform Config |
| * properties. |
| * |
| * For properties that are constant for the GPU Architecture, refer to the |
| * GPU module. However, we will discuss their relevance here just to |
| * provide background information. |
| * |
| * About the GPU Properties in Base and GPU modules |
| * |
| * The compile-time properties (Platform Config, GPU Compile-time |
| * properties) are exposed as pre-processor macros. |
| * |
| * Complementing the compile-time properties are the Dynamic GPU |
| * Properties, which act as a conduit for the GPU Configuration |
| * Discovery. |
| * |
| * In general, the dynamic properties are present to verify that the platform |
| * has been configured correctly with the right set of Platform Config |
| * Compile-time Properties. |
| * |
| * As a consistent guide across the entire DDK, the choice for dynamic or |
| * compile-time should consider the following, in order: |
| * 1. Can the code be written so that it doesn't need to know the |
| * implementation limits at all? |
| * 2. If you need the limits, get the information from the Dynamic Property |
| * lookup. This should be done once as you fetch the context, and then cached |
| * as part of the context data structure, so it's cheap to access. |
| * 3. If there's a clear and arguable inefficiency in using Dynamic Properties, |
| * then use a Compile-Time Property (Platform Config, or GPU Compile-time |
| * property). Examples of where this might be sensible follow: |
| * - Part of a critical inner-loop |
| * - Frequent re-use throughout the driver, causing significant extra load |
| * instructions or control flow that would be worthwhile optimizing out. |
| * |
| * We cannot provide an exhaustive set of examples, neither can we provide a |
| * rule for every possible situation. Use common sense, and think about: what |
| * the rest of the driver will be doing; how the compiler might represent the |
| * value if it is a compile-time constant; whether an OEM shipping multiple |
| * devices would benefit much more from a single DDK binary, instead of |
| * insignificant micro-optimizations. |
| * |
| * Dynamic GPU Properties |
| * |
| * Dynamic GPU properties are presented in two sets: |
| * 1. the commonly used properties in @ref base_gpu_props, which have been |
| * unpacked from GPU register bitfields. |
| * 2. The full set of raw, unprocessed properties in gpu_raw_gpu_props |
| * (also a member of base_gpu_props). All of these are presented in |
| * the packed form, as presented by the GPU registers themselves. |
| * |
| * The raw properties in gpu_raw_gpu_props are necessary to |
| * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device |
| * behaving differently?". In this case, all information about the |
| * configuration is potentially useful, but it does not need to be processed |
| * by the driver. Instead, the raw registers can be processed by the Mali |
| * Tools software on the host PC. |
| * |
| * The properties returned extend the GPU Configuration Discovery |
| * registers. For example, GPU clock speed is not specified in the GPU |
| * Architecture, but is necessary for OpenCL's clGetDeviceInfo() function. |
| * |
| * The GPU properties are obtained by a call to |
| * base_get_gpu_props(). This simply returns a pointer to a const |
| * base_gpu_props structure. It is constant for the life of a base |
| * context. Multiple calls to base_get_gpu_props() to a base context |
| * return the same pointer to a constant structure. This avoids cache pollution |
| * of the common data. |
| * |
| * This pointer must not be freed, because it does not point to the start of a |
| * region allocated by the memory allocator; instead, just close the @ref |
| * base_context. |
| * |
| * |
| * Kernel Operation |
| * |
| * During Base Context Create time, user-side makes a single kernel call: |
| * - A call to fill user memory with GPU information structures |
| * |
| * The kernel-side will fill the provided the entire processed base_gpu_props |
| * structure, because this information is required in both |
| * user and kernel side; it does not make sense to decode it twice. |
| * |
| * Coherency groups must be derived from the bitmasks, but this can be done |
| * kernel side, and just once at kernel startup: Coherency groups must already |
| * be known kernel-side, to support chains that specify a 'Only Coherent Group' |
| * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. |
| * |
| * Coherency Group calculation |
| * |
| * Creation of the coherent group data is done at device-driver startup, and so |
| * is one-time. This will most likely involve a loop with CLZ, shifting, and |
| * bit clearing on the L2_PRESENT mask, depending on whether the |
| * system is L2 Coherent. The number of shader cores is done by a |
| * population count, since faulty cores may be disabled during production, |
| * producing a non-contiguous mask. |
| * |
| * The memory requirements for this algorithm can be determined either by a __u64 |
| * population count on the L2_PRESENT mask (a LUT helper already is |
| * required for the above), or simple assumption that there can be no more than |
| * 16 coherent groups, since core groups are typically 4 cores. |
| */ |
| |
| #define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 4 |
| |
| #define BASE_MAX_COHERENT_GROUPS 16 |
| /** |
| * struct mali_base_gpu_core_props - GPU core props info |
| * @product_id: Pro specific value. |
| * @version_status: Status of the GPU release. No defined values, but starts at |
| * 0 and increases by one for each release status (alpha, beta, EAC, etc.). |
| * 4 bit values (0-15). |
| * @minor_revision: Minor release number of the GPU. "P" part of an "RnPn" |
| * release number. |
| * 8 bit values (0-255). |
| * @major_revision: Major release number of the GPU. "R" part of an "RnPn" |
| * release number. |
| * 4 bit values (0-15). |
| * @padding: padding to allign to 8-byte |
| * @gpu_freq_khz_max: The maximum GPU frequency. Reported to applications by |
| * clGetDeviceInfo() |
| * @log2_program_counter_size: Size of the shader program counter, in bits. |
| * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU. This |
| * is a bitpattern where a set bit indicates that the format is supported. |
| * Before using a texture format, it is recommended that the corresponding |
| * bit be checked. |
| * @gpu_available_memory_size: Theoretical maximum memory available to the GPU. |
| * It is unlikely that a client will be able to allocate all of this memory |
| * for their own purposes, but this at least provides an upper bound on the |
| * memory available to the GPU. |
| * This is required for OpenCL's clGetDeviceInfo() call when |
| * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The |
| * client will not be expecting to allocate anywhere near this value. |
| * @num_exec_engines: The number of execution engines. |
| */ |
| struct mali_base_gpu_core_props { |
| __u32 product_id; |
| __u16 version_status; |
| __u16 minor_revision; |
| __u16 major_revision; |
| __u16 padding; |
| __u32 gpu_freq_khz_max; |
| __u32 log2_program_counter_size; |
| __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; |
| __u64 gpu_available_memory_size; |
| __u8 num_exec_engines; |
| }; |
| |
| /* |
| * More information is possible - but associativity and bus width are not |
| * required by upper-level apis. |
| */ |
| struct mali_base_gpu_l2_cache_props { |
| __u8 log2_line_size; |
| __u8 log2_cache_size; |
| __u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ |
| __u8 padding[5]; |
| }; |
| |
| struct mali_base_gpu_tiler_props { |
| __u32 bin_size_bytes; /* Max is 4*2^15 */ |
| __u32 max_active_levels; /* Max is 2^15 */ |
| }; |
| |
| /** |
| * struct mali_base_gpu_thread_props - GPU threading system details. |
| * @max_threads: Max. number of threads per core |
| * @max_workgroup_size: Max. number of threads per workgroup |
| * @max_barrier_size: Max. number of threads that can synchronize on a |
| * simple barrier |
| * @max_registers: Total size [1..65535] of the register file available |
| * per core. |
| * @max_task_queue: Max. tasks [1..255] which may be sent to a core |
| * before it becomes blocked. |
| * @max_thread_group_split: Max. allowed value [1..15] of the Thread Group Split |
| * field. |
| * @impl_tech: 0 = Not specified, 1 = Silicon, 2 = FPGA, |
| * 3 = SW Model/Emulation |
| * @padding: padding to allign to 8-byte |
| * @tls_alloc: Number of threads per core that TLS must be |
| * allocated for |
| */ |
| struct mali_base_gpu_thread_props { |
| __u32 max_threads; |
| __u32 max_workgroup_size; |
| __u32 max_barrier_size; |
| __u16 max_registers; |
| __u8 max_task_queue; |
| __u8 max_thread_group_split; |
| __u8 impl_tech; |
| __u8 padding[3]; |
| __u32 tls_alloc; |
| }; |
| |
| /** |
| * struct mali_base_gpu_coherent_group - descriptor for a coherent group |
| * @core_mask: Core restriction mask required for the group |
| * @num_cores: Number of cores in the group |
| * @padding: padding to allign to 8-byte |
| * |
| * \c core_mask exposes all cores in that coherent group, and \c num_cores |
| * provides a cached population-count for that mask. |
| * |
| * @note Whilst all cores are exposed in the mask, not all may be available to |
| * the application, depending on the Kernel Power policy. |
| * |
| * @note if u64s must be 8-byte aligned, then this structure has 32-bits of |
| * wastage. |
| */ |
| struct mali_base_gpu_coherent_group { |
| __u64 core_mask; |
| __u16 num_cores; |
| __u16 padding[3]; |
| }; |
| |
| /** |
| * struct mali_base_gpu_coherent_group_info - Coherency group information |
| * @num_groups: Number of coherent groups in the GPU. |
| * @num_core_groups: Number of core groups (coherent or not) in the GPU. |
| * Equivalent to the number of L2 Caches. |
| * The GPU Counter dumping writes 2048 bytes per core group, regardless |
| * of whether the core groups are coherent or not. Hence this member is |
| * needed to calculate how much memory is required for dumping. |
| * @note Do not use it to work out how many valid elements are in the |
| * group[] member. Use num_groups instead. |
| * @coherency: Coherency features of the memory, accessed by gpu_mem_features |
| * methods |
| * @padding: padding to allign to 8-byte |
| * @group: Descriptors of coherent groups |
| * |
| * Note that the sizes of the members could be reduced. However, the \c group |
| * member might be 8-byte aligned to ensure the __u64 core_mask is 8-byte |
| * aligned, thus leading to wastage if the other members sizes were reduced. |
| * |
| * The groups are sorted by core mask. The core masks are non-repeating and do |
| * not intersect. |
| */ |
| struct mali_base_gpu_coherent_group_info { |
| __u32 num_groups; |
| __u32 num_core_groups; |
| __u32 coherency; |
| __u32 padding; |
| struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; |
| }; |
| |
| /** |
| * struct gpu_raw_gpu_props - A complete description of the GPU's Hardware |
| * Configuration Discovery registers. |
| * @shader_present: Shader core present bitmap |
| * @tiler_present: Tiler core present bitmap |
| * @l2_present: Level 2 cache present bitmap |
| * @stack_present: Core stack present bitmap |
| * @l2_features: L2 features |
| * @core_features: Core features |
| * @mem_features: Mem features |
| * @mmu_features: Mmu features |
| * @as_present: Bitmap of address spaces present |
| * @js_present: Job slots present |
| * @js_features: Array of job slot features. |
| * @tiler_features: Tiler features |
| * @texture_features: TEXTURE_FEATURES_x registers, as exposed by the GPU |
| * @gpu_id: GPU and revision identifier |
| * @thread_max_threads: Maximum number of threads per core |
| * @thread_max_workgroup_size: Maximum number of threads per workgroup |
| * @thread_max_barrier_size: Maximum number of threads per barrier |
| * @thread_features: Thread features |
| * @coherency_mode: Note: This is the _selected_ coherency mode rather than the |
| * available modes as exposed in the coherency_features register |
| * @thread_tls_alloc: Number of threads per core that TLS must be allocated for |
| * @gpu_features: GPU features |
| * |
| * The information is presented inefficiently for access. For frequent access, |
| * the values should be better expressed in an unpacked form in the |
| * base_gpu_props structure. |
| * |
| * The raw properties in gpu_raw_gpu_props are necessary to |
| * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device |
| * behaving differently?". In this case, all information about the |
| * configuration is potentially useful, but it does not need to be processed |
| * by the driver. Instead, the raw registers can be processed by the Mali |
| * Tools software on the host PC. |
| * |
| */ |
| struct gpu_raw_gpu_props { |
| __u64 shader_present; |
| __u64 tiler_present; |
| __u64 l2_present; |
| __u64 stack_present; |
| __u32 l2_features; |
| __u32 core_features; |
| __u32 mem_features; |
| __u32 mmu_features; |
| |
| __u32 as_present; |
| |
| __u32 js_present; |
| __u32 js_features[GPU_MAX_JOB_SLOTS]; |
| __u32 tiler_features; |
| __u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; |
| |
| __u32 gpu_id; |
| |
| __u32 thread_max_threads; |
| __u32 thread_max_workgroup_size; |
| __u32 thread_max_barrier_size; |
| __u32 thread_features; |
| |
| /* |
| * Note: This is the _selected_ coherency mode rather than the |
| * available modes as exposed in the coherency_features register. |
| */ |
| __u32 coherency_mode; |
| |
| __u32 thread_tls_alloc; |
| __u64 gpu_features; |
| }; |
| |
| /** |
| * struct base_gpu_props - Return structure for base_get_gpu_props(). |
| * @core_props: Core props. |
| * @l2_props: L2 props. |
| * @unused_1: Keep for backwards compatibility. |
| * @tiler_props: Tiler props. |
| * @thread_props: Thread props. |
| * @raw_props: This member is large, likely to be 128 bytes. |
| * @coherency_info: This must be last member of the structure. |
| * |
| * NOTE: the raw_props member in this data structure contains the register |
| * values from which the value of the other members are derived. The derived |
| * members exist to allow for efficient access and/or shielding the details |
| * of the layout of the registers. |
| * */ |
| struct base_gpu_props { |
| struct mali_base_gpu_core_props core_props; |
| struct mali_base_gpu_l2_cache_props l2_props; |
| __u64 unused_1; |
| struct mali_base_gpu_tiler_props tiler_props; |
| struct mali_base_gpu_thread_props thread_props; |
| struct gpu_raw_gpu_props raw_props; |
| struct mali_base_gpu_coherent_group_info coherency_info; |
| }; |
| |
| #if MALI_USE_CSF |
| #include "csf/mali_base_csf_kernel.h" |
| #else |
| #include "jm/mali_base_jm_kernel.h" |
| #endif |
| |
| /** |
| * base_mem_group_id_get() - Get group ID from flags |
| * @flags: Flags to pass to base_mem_alloc |
| * |
| * This inline function extracts the encoded group ID from flags |
| * and converts it into numeric value (0~15). |
| * |
| * Return: group ID(0~15) extracted from the parameter |
| */ |
| static __inline__ int base_mem_group_id_get(base_mem_alloc_flags flags) |
| { |
| LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0); |
| return (int)((flags & BASE_MEM_GROUP_ID_MASK) >> |
| BASEP_MEM_GROUP_ID_SHIFT); |
| } |
| |
| /** |
| * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags |
| * @id: group ID(0~15) you want to encode |
| * |
| * This inline function encodes specific group ID into base_mem_alloc_flags. |
| * Parameter 'id' should lie in-between 0 to 15. |
| * |
| * Return: base_mem_alloc_flags with the group ID (id) encoded |
| * |
| * The return value can be combined with other flags against base_mem_alloc |
| * to identify a specific memory group. |
| */ |
| static __inline__ base_mem_alloc_flags base_mem_group_id_set(int id) |
| { |
| if ((id < 0) || (id >= BASE_MEM_GROUP_COUNT)) { |
| /* Set to default value when id is out of range. */ |
| id = BASE_MEM_GROUP_DEFAULT; |
| } |
| |
| return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) & |
| BASE_MEM_GROUP_ID_MASK; |
| } |
| |
| /** |
| * base_context_mmu_group_id_set - Encode a memory group ID in |
| * base_context_create_flags |
| * |
| * Memory allocated for GPU page tables will come from the specified group. |
| * |
| * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1). |
| * |
| * Return: Bitmask of flags to pass to base_context_init. |
| */ |
| static __inline__ base_context_create_flags base_context_mmu_group_id_set( |
| int const group_id) |
| { |
| LOCAL_ASSERT(group_id >= 0); |
| LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT); |
| return BASEP_CONTEXT_MMU_GROUP_ID_MASK & |
| ((base_context_create_flags)group_id << |
| BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); |
| } |
| |
| /** |
| * base_context_mmu_group_id_get - Decode a memory group ID from |
| * base_context_create_flags |
| * |
| * Memory allocated for GPU page tables will come from the returned group. |
| * |
| * @flags: Bitmask of flags to pass to base_context_init. |
| * |
| * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1). |
| */ |
| static __inline__ int base_context_mmu_group_id_get( |
| base_context_create_flags const flags) |
| { |
| LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS)); |
| return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >> |
| BASEP_CONTEXT_MMU_GROUP_ID_SHIFT); |
| } |
| |
| /* |
| * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These |
| * flags are also used, where applicable, for specifying which fields |
| * are valid following the request operation. |
| */ |
| |
| /* For monotonic (counter) timefield */ |
| #define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0) |
| /* For system wide timestamp */ |
| #define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1) |
| /* For GPU cycle counter */ |
| #define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2) |
| /* Specify kernel GPU register timestamp */ |
| #define BASE_TIMEINFO_KERNEL_SOURCE_FLAG (1UL << 30) |
| /* Specify userspace cntvct_el0 timestamp source */ |
| #define BASE_TIMEINFO_USER_SOURCE_FLAG (1UL << 31) |
| |
| #define BASE_TIMEREQUEST_ALLOWED_FLAGS (\ |
| BASE_TIMEINFO_MONOTONIC_FLAG | \ |
| BASE_TIMEINFO_TIMESTAMP_FLAG | \ |
| BASE_TIMEINFO_CYCLE_COUNTER_FLAG | \ |
| BASE_TIMEINFO_KERNEL_SOURCE_FLAG | \ |
| BASE_TIMEINFO_USER_SOURCE_FLAG) |
| |
| /* Maximum number of source allocations allowed to create an alias allocation. |
| * This needs to be 4096 * 6 to allow cube map arrays with up to 4096 array |
| * layers, since each cube map in the array will have 6 faces. |
| */ |
| #define BASE_MEM_ALIAS_MAX_ENTS ((size_t)24576) |
| |
| #endif /* _UAPI_BASE_KERNEL_H_ */ |