aarch64/usr/lib/rustlib/src/rust/library/std/src/sys/windows/thread_local_key.rs - manifest_repos/toolchain - Git at Google

 use crate::cell::UnsafeCell;
 use crate::ptr;
 use crate::sync::atomic::{
     AtomicPtr, AtomicU32,
     Ordering::{AcqRel, Acquire, Relaxed, Release},
 };
 use crate::sys::c;

 #[cfg(test)]
 mod tests;

 type Key = c::DWORD;
 type Dtor = unsafe extern "C" fn(*mut u8);

 // Turns out, like pretty much everything, Windows is pretty close the
 // functionality that Unix provides, but slightly different! In the case of
 // TLS, Windows does not provide an API to provide a destructor for a TLS
 // variable. This ends up being pretty crucial to this implementation, so we
 // need a way around this.
 //
 // The solution here ended up being a little obscure, but fear not, the
 // internet has informed me [1][2] that this solution is not unique (no way
 // I could have thought of it as well!). The key idea is to insert some hook
 // somewhere to run arbitrary code on thread termination. With this in place
 // we'll be able to run anything we like, including all TLS destructors!
 //
 // To accomplish this feat, we perform a number of threads, all contained
 // within this module:
 //
 // * All TLS destructors are tracked by *us*, not the Windows runtime. This
 //   means that we have a global list of destructors for each TLS key that
 //   we know about.
 // * When a thread exits, we run over the entire list and run dtors for all
 //   non-null keys. This attempts to match Unix semantics in this regard.
 //
 // For more details and nitty-gritty, see the code sections below!
 //
 // [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
 // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42

 pub struct StaticKey {
     /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
     /// is not a valid key value, this allows us to use zero as sentinel value
     /// without risking overflow.
     key: AtomicU32,
     dtor: Option<Dtor>,
     next: AtomicPtr<StaticKey>,
     /// Currently, destructors cannot be unregistered, so we cannot use racy
     /// initialization for keys. Instead, we need synchronize initialization.
     /// Use the Windows-provided `Once` since it does not require TLS.
     once: UnsafeCell<c::INIT_ONCE>,
 }

 impl StaticKey {
     #[inline]
     pub const fn new(dtor: Option<Dtor>) -> StaticKey {
         StaticKey {
             key: AtomicU32::new(0),
             dtor,
             next: AtomicPtr::new(ptr::null_mut()),
             once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),
         }
     }

     #[inline]
     pub unsafe fn set(&'static self, val: *mut u8) {
         let r = c::TlsSetValue(self.key(), val.cast());
         debug_assert_eq!(r, c::TRUE);
     }

     #[inline]
     pub unsafe fn get(&'static self) -> *mut u8 {
         c::TlsGetValue(self.key()).cast()
     }

     #[inline]
     unsafe fn key(&'static self) -> Key {
         match self.key.load(Acquire) {
             0 => self.init(),
             key => key - 1,
         }
     }

     #[cold]
     unsafe fn init(&'static self) -> Key {
         if self.dtor.is_some() {
             let mut pending = c::FALSE;
             let r = c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut());
             assert_eq!(r, c::TRUE);

             if pending == c::FALSE {
                 // Some other thread initialized the key, load it.
                 self.key.load(Relaxed) - 1
             } else {
                 let key = c::TlsAlloc();
                 if key == c::TLS_OUT_OF_INDEXES {
                     // Wakeup the waiting threads before panicking to avoid deadlock.
                     c::InitOnceComplete(self.once.get(), c::INIT_ONCE_INIT_FAILED, ptr::null_mut());
                     panic!("out of TLS indexes");
                 }

                 self.key.store(key + 1, Release);
                 register_dtor(self);

                 let r = c::InitOnceComplete(self.once.get(), 0, ptr::null_mut());
                 debug_assert_eq!(r, c::TRUE);

                 key
             }
         } else {
             // If there is no destructor to clean up, we can use racy initialization.

             let key = c::TlsAlloc();
             assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");

             match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {
                 Ok(_) => key,
                 Err(new) => {
                     // Some other thread completed initialization first, so destroy
                     // our key and use theirs.
                     let r = c::TlsFree(key);
                     debug_assert_eq!(r, c::TRUE);
                     new - 1
                 }
             }
         }
     }
 }

 unsafe impl Send for StaticKey {}
 unsafe impl Sync for StaticKey {}

 // -------------------------------------------------------------------------
 // Dtor registration
 //
 // Windows has no native support for running destructors so we manage our own
 // list of destructors to keep track of how to destroy keys. We then install a
 // callback later to get invoked whenever a thread exits, running all
 // appropriate destructors.
 //
 // Currently unregistration from this list is not supported. A destructor can be
 // registered but cannot be unregistered. There's various simplifying reasons
 // for doing this, the big ones being:
 //
 // 1. Currently we don't even support deallocating TLS keys, so normal operation
 //    doesn't need to deallocate a destructor.
 // 2. There is no point in time where we know we can unregister a destructor
 //    because it could always be getting run by some remote thread.
 //
 // Typically processes have a statically known set of TLS keys which is pretty
 // small, and we'd want to keep this memory alive for the whole process anyway
 // really.

 static DTORS: AtomicPtr<StaticKey> = AtomicPtr::new(ptr::null_mut());

 /// Should only be called once per key, otherwise loops or breaks may occur in
 /// the linked list.
 unsafe fn register_dtor(key: &'static StaticKey) {
     let this = <*const StaticKey>::cast_mut(key);
     // Use acquire ordering to pass along the changes done by the previously
     // registered keys when we store the new head with release ordering.
     let mut head = DTORS.load(Acquire);
     loop {
         key.next.store(head, Relaxed);
         match DTORS.compare_exchange_weak(head, this, Release, Acquire) {
             Ok(_) => break,
             Err(new) => head = new,
         }
     }
 }

 // -------------------------------------------------------------------------
 // Where the Magic (TM) Happens
 //
 // If you're looking at this code, and wondering "what is this doing?",
 // you're not alone! I'll try to break this down step by step:
 //
 // # What's up with CRT$XLB?
 //
 // For anything about TLS destructors to work on Windows, we have to be able
 // to run *something* when a thread exits. To do so, we place a very special
 // static in a very special location. If this is encoded in just the right
 // way, the kernel's loader is apparently nice enough to run some function
 // of ours whenever a thread exits! How nice of the kernel!
 //
 // Lots of detailed information can be found in source [1] above, but the
 // gist of it is that this is leveraging a feature of Microsoft's PE format
 // (executable format) which is not actually used by any compilers today.
 // This apparently translates to any callbacks in the ".CRT$XLB" section
 // being run on certain events.
 //
 // So after all that, we use the compiler's #[link_section] feature to place
 // a callback pointer into the magic section so it ends up being called.
 //
 // # What's up with this callback?
 //
 // The callback specified receives a number of parameters from... someone!
 // (the kernel? the runtime? I'm not quite sure!) There are a few events that
 // this gets invoked for, but we're currently only interested on when a
 // thread or a process "detaches" (exits). The process part happens for the
 // last thread and the thread part happens for any normal thread.
 //
 // # Ok, what's up with running all these destructors?
 //
 // This will likely need to be improved over time, but this function
 // attempts a "poor man's" destructor callback system. Once we've got a list
 // of what to run, we iterate over all keys, check their values, and then run
 // destructors if the values turn out to be non null (setting them to null just
 // beforehand). We do this a few times in a loop to basically match Unix
 // semantics. If we don't reach a fixed point after a short while then we just
 // inevitably leak something most likely.
 //
 // # The article mentions weird stuff about "/INCLUDE"?
 //
 // It sure does! Specifically we're talking about this quote:
 //
 //      The Microsoft run-time library facilitates this process by defining a
 //      memory image of the TLS Directory and giving it the special name
 //      “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
 //      linker looks for this memory image and uses the data there to create the
 //      TLS Directory. Other compilers that support TLS and work with the
 //      Microsoft linker must use this same technique.
 //
 // Basically what this means is that if we want support for our TLS
 // destructors/our hook being called then we need to make sure the linker does
 // not omit this symbol. Otherwise it will omit it and our callback won't be
 // wired up.
 //
 // We don't actually use the `/INCLUDE` linker flag here like the article
 // mentions because the Rust compiler doesn't propagate linker flags, but
 // instead we use a shim function which performs a volatile 1-byte load from
 // the address of the symbol to ensure it sticks around.

 #[link_section = ".CRT$XLB"]
 #[allow(dead_code, unused_variables)]
 #[used] // we don't want LLVM eliminating this symbol for any reason, and
 // when the symbol makes it to the linker the linker will take over
 pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
     on_tls_callback;

 #[allow(dead_code, unused_variables)]
 unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) {
     if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
         run_dtors();
         #[cfg(target_thread_local)]
         super::thread_local_dtor::run_keyless_dtors();
     }

     // See comments above for what this is doing. Note that we don't need this
     // trickery on GNU windows, just on MSVC.
     reference_tls_used();
     #[cfg(target_env = "msvc")]
     unsafe fn reference_tls_used() {
         extern "C" {
             static _tls_used: u8;
         }
         crate::intrinsics::volatile_load(&_tls_used);
     }
     #[cfg(not(target_env = "msvc"))]
     unsafe fn reference_tls_used() {}
 }

 #[allow(dead_code)] // actually called below
 unsafe fn run_dtors() {
     for _ in 0..5 {
         let mut any_run = false;

         // Use acquire ordering to observe key initialization.
         let mut cur = DTORS.load(Acquire);
         while !cur.is_null() {
             let key = (*cur).key.load(Relaxed) - 1;
             let dtor = (*cur).dtor.unwrap();

             let ptr = c::TlsGetValue(key);
             if !ptr.is_null() {
                 c::TlsSetValue(key, ptr::null_mut());
                 dtor(ptr as *mut _);
                 any_run = true;
             }

             cur = (*cur).next.load(Relaxed);
         }

         if !any_run {
             break;
         }
     }
 }
	use crate::cell::UnsafeCell;
	use crate::ptr;
	use crate::sync::atomic::{
	AtomicPtr, AtomicU32,
	Ordering::{AcqRel, Acquire, Relaxed, Release},
	};
	use crate::sys::c;

	#[cfg(test)]
	mod tests;

	type Key = c::DWORD;
	type Dtor = unsafe extern "C" fn(*mut u8);

	// Turns out, like pretty much everything, Windows is pretty close the
	// functionality that Unix provides, but slightly different! In the case of
	// TLS, Windows does not provide an API to provide a destructor for a TLS
	// variable. This ends up being pretty crucial to this implementation, so we
	// need a way around this.
	//
	// The solution here ended up being a little obscure, but fear not, the
	// internet has informed me [1][2] that this solution is not unique (no way
	// I could have thought of it as well!). The key idea is to insert some hook
	// somewhere to run arbitrary code on thread termination. With this in place
	// we'll be able to run anything we like, including all TLS destructors!
	//
	// To accomplish this feat, we perform a number of threads, all contained
	// within this module:
	//
	// * All TLS destructors are tracked by us, not the Windows runtime. This
	// means that we have a global list of destructors for each TLS key that
	// we know about.
	// * When a thread exits, we run over the entire list and run dtors for all
	// non-null keys. This attempts to match Unix semantics in this regard.
	//
	// For more details and nitty-gritty, see the code sections below!
	//
	// [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
	// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42

	pub struct StaticKey {
	/// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
	/// is not a valid key value, this allows us to use zero as sentinel value
	/// without risking overflow.
	key: AtomicU32,
	dtor: Option<Dtor>,
	next: AtomicPtr<StaticKey>,
	/// Currently, destructors cannot be unregistered, so we cannot use racy
	/// initialization for keys. Instead, we need synchronize initialization.
	/// Use the Windows-provided `Once` since it does not require TLS.
	once: UnsafeCell<c::INIT_ONCE>,
	}

	impl StaticKey {
	#[inline]
	pub const fn new(dtor: Option<Dtor>) -> StaticKey {
	StaticKey {
	key: AtomicU32::new(0),
	dtor,
	next: AtomicPtr::new(ptr::null_mut()),
	once: UnsafeCell::new(c::INIT_ONCE_STATIC_INIT),
	}
	}

	#[inline]
	pub unsafe fn set(&'static self, val: *mut u8) {
	let r = c::TlsSetValue(self.key(), val.cast());
	debug_assert_eq!(r, c::TRUE);
	}

	#[inline]
	pub unsafe fn get(&'static self) -> *mut u8 {
	c::TlsGetValue(self.key()).cast()
	}

	#[inline]
	unsafe fn key(&'static self) -> Key {
	match self.key.load(Acquire) {
	0 => self.init(),
	key => key - 1,
	}
	}

	#[cold]
	unsafe fn init(&'static self) -> Key {
	if self.dtor.is_some() {
	let mut pending = c::FALSE;
	let r = c::InitOnceBeginInitialize(self.once.get(), 0, &mut pending, ptr::null_mut());
	assert_eq!(r, c::TRUE);

	if pending == c::FALSE {
	// Some other thread initialized the key, load it.
	self.key.load(Relaxed) - 1
	} else {
	let key = c::TlsAlloc();
	if key == c::TLS_OUT_OF_INDEXES {
	// Wakeup the waiting threads before panicking to avoid deadlock.
	c::InitOnceComplete(self.once.get(), c::INIT_ONCE_INIT_FAILED, ptr::null_mut());
	panic!("out of TLS indexes");
	}

	self.key.store(key + 1, Release);
	register_dtor(self);

	let r = c::InitOnceComplete(self.once.get(), 0, ptr::null_mut());
	debug_assert_eq!(r, c::TRUE);

	key
	}
	} else {
	// If there is no destructor to clean up, we can use racy initialization.

	let key = c::TlsAlloc();
	assert_ne!(key, c::TLS_OUT_OF_INDEXES, "out of TLS indexes");

	match self.key.compare_exchange(0, key + 1, AcqRel, Acquire) {
	Ok(_) => key,
	Err(new) => {
	// Some other thread completed initialization first, so destroy
	// our key and use theirs.
	let r = c::TlsFree(key);
	debug_assert_eq!(r, c::TRUE);
	new - 1
	}
	}
	}
	}
	}

	unsafe impl Send for StaticKey {}
	unsafe impl Sync for StaticKey {}

	// -------------------------------------------------------------------------
	// Dtor registration
	//
	// Windows has no native support for running destructors so we manage our own
	// list of destructors to keep track of how to destroy keys. We then install a
	// callback later to get invoked whenever a thread exits, running all
	// appropriate destructors.
	//
	// Currently unregistration from this list is not supported. A destructor can be
	// registered but cannot be unregistered. There's various simplifying reasons
	// for doing this, the big ones being:
	//
	// 1. Currently we don't even support deallocating TLS keys, so normal operation
	// doesn't need to deallocate a destructor.
	// 2. There is no point in time where we know we can unregister a destructor
	// because it could always be getting run by some remote thread.
	//
	// Typically processes have a statically known set of TLS keys which is pretty
	// small, and we'd want to keep this memory alive for the whole process anyway
	// really.

	static DTORS: AtomicPtr<StaticKey> = AtomicPtr::new(ptr::null_mut());

	/// Should only be called once per key, otherwise loops or breaks may occur in
	/// the linked list.
	unsafe fn register_dtor(key: &'static StaticKey) {
	let this = <*const StaticKey>::cast_mut(key);
	// Use acquire ordering to pass along the changes done by the previously
	// registered keys when we store the new head with release ordering.
	let mut head = DTORS.load(Acquire);
	loop {
	key.next.store(head, Relaxed);
	match DTORS.compare_exchange_weak(head, this, Release, Acquire) {
	Ok(_) => break,
	Err(new) => head = new,
	}
	}
	}

	// -------------------------------------------------------------------------
	// Where the Magic (TM) Happens
	//
	// If you're looking at this code, and wondering "what is this doing?",
	// you're not alone! I'll try to break this down step by step:
	//
	// # What's up with CRT$XLB?
	//
	// For anything about TLS destructors to work on Windows, we have to be able
	// to run something when a thread exits. To do so, we place a very special
	// static in a very special location. If this is encoded in just the right
	// way, the kernel's loader is apparently nice enough to run some function
	// of ours whenever a thread exits! How nice of the kernel!
	//
	// Lots of detailed information can be found in source [1] above, but the
	// gist of it is that this is leveraging a feature of Microsoft's PE format
	// (executable format) which is not actually used by any compilers today.
	// This apparently translates to any callbacks in the ".CRT$XLB" section
	// being run on certain events.
	//
	// So after all that, we use the compiler's #[link_section] feature to place
	// a callback pointer into the magic section so it ends up being called.
	//
	// # What's up with this callback?
	//
	// The callback specified receives a number of parameters from... someone!
	// (the kernel? the runtime? I'm not quite sure!) There are a few events that
	// this gets invoked for, but we're currently only interested on when a
	// thread or a process "detaches" (exits). The process part happens for the
	// last thread and the thread part happens for any normal thread.
	//
	// # Ok, what's up with running all these destructors?
	//
	// This will likely need to be improved over time, but this function
	// attempts a "poor man's" destructor callback system. Once we've got a list
	// of what to run, we iterate over all keys, check their values, and then run
	// destructors if the values turn out to be non null (setting them to null just
	// beforehand). We do this a few times in a loop to basically match Unix
	// semantics. If we don't reach a fixed point after a short while then we just
	// inevitably leak something most likely.
	//
	// # The article mentions weird stuff about "/INCLUDE"?
	//
	// It sure does! Specifically we're talking about this quote:
	//
	// The Microsoft run-time library facilitates this process by defining a
	// memory image of the TLS Directory and giving it the special name
	// “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
	// linker looks for this memory image and uses the data there to create the
	// TLS Directory. Other compilers that support TLS and work with the
	// Microsoft linker must use this same technique.
	//
	// Basically what this means is that if we want support for our TLS
	// destructors/our hook being called then we need to make sure the linker does
	// not omit this symbol. Otherwise it will omit it and our callback won't be
	// wired up.
	//
	// We don't actually use the `/INCLUDE` linker flag here like the article
	// mentions because the Rust compiler doesn't propagate linker flags, but
	// instead we use a shim function which performs a volatile 1-byte load from
	// the address of the symbol to ensure it sticks around.

	#[link_section = ".CRT$XLB"]
	#[allow(dead_code, unused_variables)]
	#[used] // we don't want LLVM eliminating this symbol for any reason, and
	// when the symbol makes it to the linker the linker will take over
	pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
	on_tls_callback;

	#[allow(dead_code, unused_variables)]
	unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) {
	if dwReason == c::DLL_THREAD_DETACH \|\| dwReason == c::DLL_PROCESS_DETACH {
	run_dtors();
	#[cfg(target_thread_local)]
	super::thread_local_dtor::run_keyless_dtors();
	}

	// See comments above for what this is doing. Note that we don't need this
	// trickery on GNU windows, just on MSVC.
	reference_tls_used();
	#[cfg(target_env = "msvc")]
	unsafe fn reference_tls_used() {
	extern "C" {
	static _tls_used: u8;
	}
	crate::intrinsics::volatile_load(&_tls_used);
	}
	#[cfg(not(target_env = "msvc"))]
	unsafe fn reference_tls_used() {}
	}

	#[allow(dead_code)] // actually called below
	unsafe fn run_dtors() {
	for _ in 0..5 {
	let mut any_run = false;

	// Use acquire ordering to observe key initialization.
	let mut cur = DTORS.load(Acquire);
	while !cur.is_null() {
	let key = (*cur).key.load(Relaxed) - 1;
	let dtor = (*cur).dtor.unwrap();

	let ptr = c::TlsGetValue(key);
	if !ptr.is_null() {
	c::TlsSetValue(key, ptr::null_mut());
	dtor(ptr as *mut _);
	any_run = true;
	}

	cur = (*cur).next.load(Relaxed);
	}

	if !any_run {
	break;
	}
	}
	}