@@ -4,7 +4,6 @@ use crate::{
44 Error ,
55} ;
66use core:: {
7- cell:: UnsafeCell ,
87 ffi:: c_void,
98 mem:: MaybeUninit ,
109 sync:: atomic:: { AtomicI32 , Ordering } ,
@@ -18,159 +17,186 @@ use core::{
1817/// - On Haiku and QNX Neutrino they are identical.
1918const FILE_PATH : & [ u8 ] = b"/dev/urandom\0 " ;
2019
21- // Do not inline this when it is the fallback implementation, but don't mark it
22- // `#[cold]` because it is hot when it is actually used.
23- #[ cfg_attr( any( target_os = "android" , target_os = "linux" ) , inline( never) ) ]
20+ // File descriptor is a "nonnegative integer", so we can safely use negative sentinel values.
21+ const FD_UNINIT : libc:: c_int = -1 ;
22+ const FD_ONGOING_INIT : libc:: c_int = -2 ;
23+
24+ // In theory `libc::c_int` could be something other than `i32`, but for the
25+ // targets we currently support that use `use_file`, it is always `i32`.
26+ // If/when we add support for a target where that isn't the case, we may
27+ // need to use a different atomic type or make other accomodations. The
28+ // compiler will let us know if/when that is the case, because the
29+ // `FD.store(fd)` would fail to compile.
30+ //
31+ // The opening of the file, by libc/libstd/etc. may write some unknown
32+ // state into in-process memory. (Such state may include some sanitizer
33+ // bookkeeping, or we might be operating in a unikernal-like environment
34+ // where all the "kernel" file descriptor bookkeeping is done in our
35+ // process.) `get_fd_locked` stores into FD using `Ordering::Release` to
36+ // ensure any such state is synchronized. `get_fd` loads from `FD` with
37+ // `Ordering::Acquire` to synchronize with it.
38+ static FD : AtomicI32 = AtomicI32 :: new ( FD_UNINIT ) ;
39+
2440pub fn getrandom_inner ( dest : & mut [ MaybeUninit < u8 > ] ) -> Result < ( ) , Error > {
25- let fd = get_rng_fd ( ) ?;
41+ let mut fd = FD . load ( Ordering :: Acquire ) ;
42+ if fd == FD_UNINIT || fd == FD_ONGOING_INIT {
43+ fd = open_or_wait ( ) ?;
44+ }
2645 sys_fill_exact ( dest, |buf| unsafe {
2746 libc:: read ( fd, buf. as_mut_ptr ( ) . cast :: < c_void > ( ) , buf. len ( ) )
2847 } )
2948}
3049
31- // Returns the file descriptor for the device file used to retrieve random
32- // bytes. The file will be opened exactly once. All subsequent calls will
33- // return the same file descriptor. This file descriptor is never closed.
34- fn get_rng_fd ( ) -> Result < libc:: c_int , Error > {
35- // std::os::fd::{BorrowedFd, OwnedFd} guarantee that -1 is not a valid file descriptor.
36- const FD_UNINIT : libc:: c_int = -1 ;
37-
38- // In theory `libc::c_int` could be something other than `i32`, but for the
39- // targets we currently support that use `use_file`, it is always `i32`.
40- // If/when we add support for a target where that isn't the case, we may
41- // need to use a different atomic type or make other accomodations. The
42- // compiler will let us know if/when that is the case, because the
43- // `FD.store(fd)` would fail to compile.
44- //
45- // The opening of the file, by libc/libstd/etc. may write some unknown
46- // state into in-process memory. (Such state may include some sanitizer
47- // bookkeeping, or we might be operating in a unikernal-like environment
48- // where all the "kernel" file descriptor bookkeeping is done in our
49- // process.) `get_fd_locked` stores into FD using `Ordering::Release` to
50- // ensure any such state is synchronized. `get_fd` loads from `FD` with
51- // `Ordering::Acquire` to synchronize with it.
52- static FD : AtomicI32 = AtomicI32 :: new ( FD_UNINIT ) ;
53-
54- fn get_fd ( ) -> Option < libc:: c_int > {
50+ #[ cold]
51+ fn open_or_wait ( ) -> Result < libc:: c_int , Error > {
52+ loop {
5553 match FD . load ( Ordering :: Acquire ) {
56- FD_UNINIT => None ,
57- val => Some ( val) ,
54+ FD_UNINIT => {
55+ let res = FD . compare_exchange_weak (
56+ FD_UNINIT ,
57+ FD_ONGOING_INIT ,
58+ Ordering :: AcqRel ,
59+ Ordering :: Relaxed ,
60+ ) ;
61+ if res. is_ok ( ) {
62+ break ;
63+ }
64+ }
65+ FD_ONGOING_INIT => sync:: wait ( ) ,
66+ fd => return Ok ( fd) ,
5867 }
5968 }
6069
61- #[ cold]
62- fn get_fd_locked ( ) -> Result < libc:: c_int , Error > {
63- // This mutex is used to prevent multiple threads from opening file
64- // descriptors concurrently, which could run into the limit on the
65- // number of open file descriptors. Our goal is to have no more than one
66- // file descriptor open, ever.
67- //
68- // SAFETY: We use the mutex only in this method, and we always unlock it
69- // before returning, making sure we don't violate the pthread_mutex_t API.
70- static MUTEX : Mutex = Mutex :: new ( ) ;
71- unsafe { MUTEX . lock ( ) } ;
72- let _guard = DropGuard ( || unsafe { MUTEX . unlock ( ) } ) ;
73-
74- if let Some ( fd) = get_fd ( ) {
75- return Ok ( fd) ;
76- }
77-
78- // On Linux, /dev/urandom might return insecure values.
79- #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
80- wait_until_rng_ready ( ) ?;
70+ let res = open_fd ( ) ;
71+ let val = match res {
72+ Ok ( fd) => fd,
73+ Err ( _) => FD_UNINIT ,
74+ } ;
75+ FD . store ( val, Ordering :: Release ) ;
8176
82- let fd = open_readonly ( FILE_PATH ) ?;
83- debug_assert ! ( fd != FD_UNINIT ) ;
84- FD . store ( fd, Ordering :: Release ) ;
77+ // On non-Linux targets `wait` is just 1 ms sleep,
78+ // so we don't need any explicit wake up in addition
79+ // to updating value of `FD`.
80+ #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
81+ sync:: wake ( ) ;
8582
86- Ok ( fd)
87- }
88-
89- // Use double-checked locking to avoid acquiring the lock if possible.
90- if let Some ( fd) = get_fd ( ) {
91- Ok ( fd)
92- } else {
93- get_fd_locked ( )
94- }
83+ res
9584}
9685
97- // Polls /dev/random to make sure it is ok to read from /dev/urandom.
98- //
99- // Polling avoids draining the estimated entropy from /dev/random;
100- // short-lived processes reading even a single byte from /dev/random could
101- // be problematic if they are being executed faster than entropy is being
102- // collected.
103- //
104- // OTOH, reading a byte instead of polling is more compatible with
105- // sandboxes that disallow `poll()` but which allow reading /dev/random,
106- // e.g. sandboxes that assume that `poll()` is for network I/O. This way,
107- // fewer applications will have to insert pre-sandbox-initialization logic.
108- // Often (blocking) file I/O is not allowed in such early phases of an
109- // application for performance and/or security reasons.
110- //
111- // It is hard to write a sandbox policy to support `libc::poll()` because
112- // it may invoke the `poll`, `ppoll`, `ppoll_time64` (since Linux 5.1, with
113- // newer versions of glibc), and/or (rarely, and probably only on ancient
114- // systems) `select`. depending on the libc implementation (e.g. glibc vs
115- // musl), libc version, potentially the kernel version at runtime, and/or
116- // the target architecture.
117- //
118- // BoringSSL and libstd don't try to protect against insecure output from
119- // `/dev/urandom'; they don't open `/dev/random` at all.
120- //
121- // OpenSSL uses `libc::select()` unless the `dev/random` file descriptor
122- // is too large; if it is too large then it does what we do here.
123- //
124- // libsodium uses `libc::poll` similarly to this.
125- #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
126- fn wait_until_rng_ready ( ) -> Result < ( ) , Error > {
127- let fd = open_readonly ( b"/dev/random\0 " ) ?;
128- let mut pfd = libc:: pollfd {
129- fd,
130- events : libc:: POLLIN ,
131- revents : 0 ,
132- } ;
133- let _guard = DropGuard ( || unsafe {
134- libc:: close ( fd) ;
135- } ) ;
86+ fn open_fd ( ) -> Result < libc:: c_int , Error > {
87+ #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
88+ sync:: wait_until_rng_ready ( ) ?;
89+ let fd = open_readonly ( FILE_PATH ) ?;
90+ debug_assert ! ( fd >= 0 ) ;
91+ Ok ( fd)
92+ }
13693
137- loop {
138- // A negative timeout means an infinite timeout.
139- let res = unsafe { libc:: poll ( & mut pfd, 1 , -1 ) } ;
140- if res >= 0 {
141- debug_assert_eq ! ( res, 1 ) ; // We only used one fd, and cannot timeout.
142- return Ok ( ( ) ) ;
143- }
144- let err = crate :: util_libc:: last_os_error ( ) ;
145- match err. raw_os_error ( ) {
146- Some ( libc:: EINTR ) | Some ( libc:: EAGAIN ) => continue ,
147- _ => return Err ( err) ,
94+ #[ cfg( not( any( target_os = "android" , target_os = "linux" ) ) ) ]
95+ mod sync {
96+ /// Sleep 1 ms before checking `FD` again.
97+ ///
98+ /// On non-Linux targets the critical section only opens file,
99+ /// which should not block, so in the unlikely contended case,
100+ /// we can sleep-wait for the opening operation to finish.
101+ pub ( super ) fn wait ( ) {
102+ let rqtp = libc:: timespec {
103+ tv_sec : 0 ,
104+ tv_nsec : 1_000_000 ,
105+ } ;
106+ let mut rmtp = libc:: timespec {
107+ tv_sec : 0 ,
108+ tv_nsec : 0 ,
109+ } ;
110+ // We do not care if sleep gets interrupted, so the return value is ignored
111+ unsafe {
112+ libc:: nanosleep ( & rqtp, & mut rmtp) ;
148113 }
149114 }
150115}
151116
152- struct Mutex ( UnsafeCell < libc:: pthread_mutex_t > ) ;
153-
154- impl Mutex {
155- const fn new ( ) -> Self {
156- Self ( UnsafeCell :: new ( libc:: PTHREAD_MUTEX_INITIALIZER ) )
157- }
158- unsafe fn lock ( & self ) {
159- let r = libc:: pthread_mutex_lock ( self . 0 . get ( ) ) ;
160- debug_assert_eq ! ( r, 0 ) ;
161- }
162- unsafe fn unlock ( & self ) {
163- let r = libc:: pthread_mutex_unlock ( self . 0 . get ( ) ) ;
164- debug_assert_eq ! ( r, 0 ) ;
117+ #[ cfg( any( target_os = "android" , target_os = "linux" ) ) ]
118+ mod sync {
119+ use super :: { Error , FD , FD_ONGOING_INIT } ;
120+ use crate :: util_libc:: { last_os_error, open_readonly} ;
121+
122+ /// Wait for atomic `FD` to change value from `FD_ONGOING_INIT` to something else.
123+ ///
124+ /// Futex syscall with `FUTEX_WAIT` op puts the current thread to sleep
125+ /// until futex syscall with `FUTEX_WAKE` op gets executed for `FD`.
126+ ///
127+ /// For more information read: https://www.man7.org/linux/man-pages/man2/futex.2.html
128+ pub ( super ) fn wait ( ) {
129+ let op = libc:: FUTEX_WAIT | libc:: FUTEX_PRIVATE_FLAG ;
130+ let timeout_ptr = core:: ptr:: null :: < libc:: timespec > ( ) ;
131+ let ret = unsafe { libc:: syscall ( libc:: SYS_futex , & FD , op, FD_ONGOING_INIT , timeout_ptr) } ;
132+ // FUTEX_WAIT should return either 0 or EAGAIN error
133+ debug_assert ! ( {
134+ match ret {
135+ 0 => true ,
136+ -1 => last_os_error( ) . raw_os_error( ) == Some ( libc:: EAGAIN ) ,
137+ _ => false ,
138+ }
139+ } ) ;
165140 }
166- }
167-
168- unsafe impl Sync for Mutex { }
169141
170- struct DropGuard < F : FnMut ( ) > ( F ) ;
142+ /// Wake up all threads which wait for value of atomic `FD` to change.
143+ pub ( super ) fn wake ( ) {
144+ let op = libc:: FUTEX_WAKE | libc:: FUTEX_PRIVATE_FLAG ;
145+ let ret = unsafe { libc:: syscall ( libc:: SYS_futex , & FD , op, libc:: INT_MAX ) } ;
146+ debug_assert ! ( ret >= 0 ) ;
147+ }
171148
172- impl < F : FnMut ( ) > Drop for DropGuard < F > {
173- fn drop ( & mut self ) {
174- self . 0 ( )
149+ // Polls /dev/random to make sure it is ok to read from /dev/urandom.
150+ //
151+ // Polling avoids draining the estimated entropy from /dev/random;
152+ // short-lived processes reading even a single byte from /dev/random could
153+ // be problematic if they are being executed faster than entropy is being
154+ // collected.
155+ //
156+ // OTOH, reading a byte instead of polling is more compatible with
157+ // sandboxes that disallow `poll()` but which allow reading /dev/random,
158+ // e.g. sandboxes that assume that `poll()` is for network I/O. This way,
159+ // fewer applications will have to insert pre-sandbox-initialization logic.
160+ // Often (blocking) file I/O is not allowed in such early phases of an
161+ // application for performance and/or security reasons.
162+ //
163+ // It is hard to write a sandbox policy to support `libc::poll()` because
164+ // it may invoke the `poll`, `ppoll`, `ppoll_time64` (since Linux 5.1, with
165+ // newer versions of glibc), and/or (rarely, and probably only on ancient
166+ // systems) `select`. depending on the libc implementation (e.g. glibc vs
167+ // musl), libc version, potentially the kernel version at runtime, and/or
168+ // the target architecture.
169+ //
170+ // BoringSSL and libstd don't try to protect against insecure output from
171+ // `/dev/urandom'; they don't open `/dev/random` at all.
172+ //
173+ // OpenSSL uses `libc::select()` unless the `dev/random` file descriptor
174+ // is too large; if it is too large then it does what we do here.
175+ //
176+ // libsodium uses `libc::poll` similarly to this.
177+ pub ( super ) fn wait_until_rng_ready ( ) -> Result < ( ) , Error > {
178+ let fd = open_readonly ( b"/dev/random\0 " ) ?;
179+ let mut pfd = libc:: pollfd {
180+ fd,
181+ events : libc:: POLLIN ,
182+ revents : 0 ,
183+ } ;
184+
185+ let res = loop {
186+ // A negative timeout means an infinite timeout.
187+ let res = unsafe { libc:: poll ( & mut pfd, 1 , -1 ) } ;
188+ if res >= 0 {
189+ // We only used one fd, and cannot timeout.
190+ debug_assert_eq ! ( res, 1 ) ;
191+ break Ok ( ( ) ) ;
192+ }
193+ let err = last_os_error ( ) ;
194+ match err. raw_os_error ( ) {
195+ Some ( libc:: EINTR ) | Some ( libc:: EAGAIN ) => continue ,
196+ _ => break Err ( err) ,
197+ }
198+ } ;
199+ unsafe { libc:: close ( fd) } ;
200+ res
175201 }
176202}
0 commit comments