x86_64/usr/lib/rustlib/src/rust/library/stdarch/crates/core_arch/src/nvptx/mod.rs - manifest_repos/toolchain - Git at Google

 //! NVPTX intrinsics (experimental)
 //!
 //! These intrinsics form the foundation of the CUDA
 //! programming model.
 //!
 //! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also
 //! the [LLVM NVPTX Backend documentation][llvm_docs].
 //!
 //! [cuda_c]:
 //! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
 //! [llvm_docs]:
 //! https://llvm.org/docs/NVPTXUsage.html

 use crate::ffi::c_void;

 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.nvvm.barrier0"]
     fn syncthreads() -> ();
     #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
     fn block_dim_x() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
     fn block_dim_y() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
     fn block_dim_z() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
     fn block_idx_x() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
     fn block_idx_y() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
     fn block_idx_z() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
     fn grid_dim_x() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
     fn grid_dim_y() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
     fn grid_dim_z() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
     fn thread_idx_x() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
     fn thread_idx_y() -> i32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
     fn thread_idx_z() -> i32;
 }

 /// Synchronizes all threads in the block.
 #[inline]
 pub unsafe fn _syncthreads() -> () {
     syncthreads()
 }

 /// x-th thread-block dimension.
 #[inline]
 pub unsafe fn _block_dim_x() -> i32 {
     block_dim_x()
 }

 /// y-th thread-block dimension.
 #[inline]
 pub unsafe fn _block_dim_y() -> i32 {
     block_dim_y()
 }

 /// z-th thread-block dimension.
 #[inline]
 pub unsafe fn _block_dim_z() -> i32 {
     block_dim_z()
 }

 /// x-th thread-block index.
 #[inline]
 pub unsafe fn _block_idx_x() -> i32 {
     block_idx_x()
 }

 /// y-th thread-block index.
 #[inline]
 pub unsafe fn _block_idx_y() -> i32 {
     block_idx_y()
 }

 /// z-th thread-block index.
 #[inline]
 pub unsafe fn _block_idx_z() -> i32 {
     block_idx_z()
 }

 /// x-th block-grid dimension.
 #[inline]
 pub unsafe fn _grid_dim_x() -> i32 {
     grid_dim_x()
 }

 /// y-th block-grid dimension.
 #[inline]
 pub unsafe fn _grid_dim_y() -> i32 {
     grid_dim_y()
 }

 /// z-th block-grid dimension.
 #[inline]
 pub unsafe fn _grid_dim_z() -> i32 {
     grid_dim_z()
 }

 /// x-th thread index.
 #[inline]
 pub unsafe fn _thread_idx_x() -> i32 {
     thread_idx_x()
 }

 /// y-th thread index.
 #[inline]
 pub unsafe fn _thread_idx_y() -> i32 {
     thread_idx_y()
 }

 /// z-th thread index.
 #[inline]
 pub unsafe fn _thread_idx_z() -> i32 {
     thread_idx_z()
 }

 /// Generates the trap instruction `TRAP`
 #[inline]
 pub unsafe fn trap() -> ! {
     crate::intrinsics::abort()
 }

 // Basic CUDA syscall declarations.
 extern "C" {
     /// Print formatted output from a kernel to a host-side output stream.
     ///
     /// Syscall arguments:
     /// * `status`: The status value that is returned by `vprintf`.
     /// * `format`: A pointer to the format specifier input (uses common `printf` format).
     /// * `valist`: A pointer to the valist input.
     ///
     /// ```
     /// #[repr(C)]
     /// struct PrintArgs(f32, f32, f32, i32);
     ///
     /// vprintf(
     ///     "int(%f + %f) = int(%f) = %d\n".as_ptr(),
     ///     transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
     /// );
     /// ```
     ///
     /// Sources:
     /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
     /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
     pub fn vprintf(format: *const u8, valist: *const c_void) -> i32;

     /// Allocate memory dynamically from a fixed-size heap in global memory.
     ///
     /// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
     /// from the device heap and returns a pointer to the allocated memory
     /// or `NULL` if insufficient memory exists to fulfill the request.
     ///
     /// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
     ///
     /// The memory allocated by a given CUDA thread via `malloc()` remains allocated
     /// for the lifetime of the CUDA context, or until it is explicitly released
     /// by a call to `free()`. It can be used by any other CUDA threads
     /// even from subsequent kernel launches.
     ///
     /// Sources:
     /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
     /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
     // FIXME(denzp): assign `malloc` and `nothrow` attributes.
     pub fn malloc(size: usize) -> *mut c_void;

     /// Free previously dynamically allocated memory.
     ///
     /// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
     /// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
     /// the call to `free()` is ignored.
     ///
     /// Any CUDA thread may free memory allocated by another thread, but care should be taken
     /// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
     /// with the same `ptr` has undefined behavior.
     ///
     /// Sources:
     /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
     /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
     // FIXME(denzp): assign `nothrow` attribute.
     pub fn free(ptr: *mut c_void);

     // Internal declaration of the syscall. Exported variant has
     // the `char_size` parameter set to `1` (single char size in bytes).
     fn __assertfail(
         message: *const u8,
         file: *const u8,
         line: u32,
         function: *const u8,
         char_size: usize,
     );
 }

 /// Syscall to be used whenever the *assert expression produces a `false` value*.
 ///
 /// Syscall arguments:
 /// * `message`: The pointer to the string that should be output.
 /// * `file`: The pointer to the file name string associated with the assert.
 /// * `line`: The line number associated with the assert.
 /// * `function`: The pointer to the function name string associated with the assert.
 ///
 /// Source:
 /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
 #[inline]
 pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) {
     __assertfail(message, file, line, function, 1)
 }
	//! NVPTX intrinsics (experimental)
	//!
	//! These intrinsics form the foundation of the CUDA
	//! programming model.
	//!
	//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also
	//! the [LLVM NVPTX Backend documentation][llvm_docs].
	//!
	//! [cuda_c]:
	//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
	//! [llvm_docs]:
	//! https://llvm.org/docs/NVPTXUsage.html

	use crate::ffi::c_void;

	#[allow(improper_ctypes)]
	extern "C" {
	#[link_name = "llvm.nvvm.barrier0"]
	fn syncthreads() -> ();
	#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
	fn block_dim_x() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
	fn block_dim_y() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
	fn block_dim_z() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
	fn block_idx_x() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
	fn block_idx_y() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
	fn block_idx_z() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
	fn grid_dim_x() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
	fn grid_dim_y() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
	fn grid_dim_z() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
	fn thread_idx_x() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
	fn thread_idx_y() -> i32;
	#[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
	fn thread_idx_z() -> i32;
	}

	/// Synchronizes all threads in the block.
	#[inline]
	pub unsafe fn _syncthreads() -> () {
	syncthreads()
	}

	/// x-th thread-block dimension.
	#[inline]
	pub unsafe fn _block_dim_x() -> i32 {
	block_dim_x()
	}

	/// y-th thread-block dimension.
	#[inline]
	pub unsafe fn _block_dim_y() -> i32 {
	block_dim_y()
	}

	/// z-th thread-block dimension.
	#[inline]
	pub unsafe fn _block_dim_z() -> i32 {
	block_dim_z()
	}

	/// x-th thread-block index.
	#[inline]
	pub unsafe fn _block_idx_x() -> i32 {
	block_idx_x()
	}

	/// y-th thread-block index.
	#[inline]
	pub unsafe fn _block_idx_y() -> i32 {
	block_idx_y()
	}

	/// z-th thread-block index.
	#[inline]
	pub unsafe fn _block_idx_z() -> i32 {
	block_idx_z()
	}

	/// x-th block-grid dimension.
	#[inline]
	pub unsafe fn _grid_dim_x() -> i32 {
	grid_dim_x()
	}

	/// y-th block-grid dimension.
	#[inline]
	pub unsafe fn _grid_dim_y() -> i32 {
	grid_dim_y()
	}

	/// z-th block-grid dimension.
	#[inline]
	pub unsafe fn _grid_dim_z() -> i32 {
	grid_dim_z()
	}

	/// x-th thread index.
	#[inline]
	pub unsafe fn _thread_idx_x() -> i32 {
	thread_idx_x()
	}

	/// y-th thread index.
	#[inline]
	pub unsafe fn _thread_idx_y() -> i32 {
	thread_idx_y()
	}

	/// z-th thread index.
	#[inline]
	pub unsafe fn _thread_idx_z() -> i32 {
	thread_idx_z()
	}

	/// Generates the trap instruction `TRAP`
	#[inline]
	pub unsafe fn trap() -> ! {
	crate::intrinsics::abort()
	}

	// Basic CUDA syscall declarations.
	extern "C" {
	/// Print formatted output from a kernel to a host-side output stream.
	///
	/// Syscall arguments:
	/// * `status`: The status value that is returned by `vprintf`.
	/// * `format`: A pointer to the format specifier input (uses common `printf` format).
	/// * `valist`: A pointer to the valist input.
	///
	/// ```
	/// #[repr(C)]
	/// struct PrintArgs(f32, f32, f32, i32);
	///
	/// vprintf(
	/// "int(%f + %f) = int(%f) = %d\n".as_ptr(),
	/// transmute(&PrintArgs(a, b, a + b, (a + b) as i32)),
	/// );
	/// ```
	///
	/// Sources:
	/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output),
	/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
	pub fn vprintf(format: const u8, valist: const c_void) -> i32;

	/// Allocate memory dynamically from a fixed-size heap in global memory.
	///
	/// The CUDA in-kernel `malloc()` function allocates at least `size` bytes
	/// from the device heap and returns a pointer to the allocated memory
	/// or `NULL` if insufficient memory exists to fulfill the request.
	///
	/// The returned pointer is guaranteed to be aligned to a 16-byte boundary.
	///
	/// The memory allocated by a given CUDA thread via `malloc()` remains allocated
	/// for the lifetime of the CUDA context, or until it is explicitly released
	/// by a call to `free()`. It can be used by any other CUDA threads
	/// even from subsequent kernel launches.
	///
	/// Sources:
	/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
	/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
	// FIXME(denzp): assign `malloc` and `nothrow` attributes.
	pub fn malloc(size: usize) -> *mut c_void;

	/// Free previously dynamically allocated memory.
	///
	/// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`,
	/// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL,
	/// the call to `free()` is ignored.
	///
	/// Any CUDA thread may free memory allocated by another thread, but care should be taken
	/// to ensure that the same pointer is not freed more than once. Repeated calls to `free()`
	/// with the same `ptr` has undefined behavior.
	///
	/// Sources:
	/// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations),
	/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
	// FIXME(denzp): assign `nothrow` attribute.
	pub fn free(ptr: *mut c_void);

	// Internal declaration of the syscall. Exported variant has
	// the `char_size` parameter set to `1` (single char size in bytes).
	fn __assertfail(
	message: *const u8,
	file: *const u8,
	line: u32,
	function: *const u8,
	char_size: usize,
	);
	}

	/// Syscall to be used whenever the assert expression produces a `false` value.
	///
	/// Syscall arguments:
	/// * `message`: The pointer to the string that should be output.
	/// * `file`: The pointer to the file name string associated with the assert.
	/// * `line`: The line number associated with the assert.
	/// * `function`: The pointer to the function name string associated with the assert.
	///
	/// Source:
	/// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls).
	#[inline]
	pub unsafe fn __assert_fail(message: const u8, file: const u8, line: u32, function: *const u8) {
	__assertfail(message, file, line, function, 1)
	}