diff --git a/src/libos/src/entry.rs b/src/libos/src/entry.rs
index 928cd091..1f0668e7 100644
--- a/src/libos/src/entry.rs
+++ b/src/libos/src/entry.rs
@@ -21,6 +21,11 @@ use sgx_tse::*;
 pub static mut INSTANCE_DIR: String = String::new();
 static mut ENCLAVE_PATH: String = String::new();
 
+/// Note about memory ordering:
+/// HAS_INIT need to synchronize the relevant resources in interrupt::init().
+/// The read operation of HAS_INIT needs to see the change of the resources.
+/// Just `Acquire` or `Release` needs to be used to make all the change of the
+/// wakers visible to us.
 lazy_static! {
     static ref INIT_ONCE: Once = Once::new();
     static ref HAS_INIT: AtomicBool = AtomicBool::new(false);
@@ -49,7 +54,7 @@ pub extern "C" fn occlum_ecall_init(
     instance_dir: *const c_char,
     file_buffer: *const host_file_buffer,
 ) -> i32 {
-    if HAS_INIT.load(Ordering::SeqCst) == true {
+    if HAS_INIT.load(Ordering::Acquire) == true {
         return ecall_errno!(EEXIST);
     }
 
@@ -101,7 +106,7 @@ pub extern "C" fn occlum_ecall_init(
 
         interrupt::init();
 
-        HAS_INIT.store(true, Ordering::SeqCst);
+        HAS_INIT.store(true, Ordering::Release);
 
         // Init boot up time stamp here.
         time::up_time::init();
@@ -135,7 +140,7 @@ pub extern "C" fn occlum_ecall_new_process(
     env: *const *const c_char,
     host_stdio_fds: *const HostStdioFds,
 ) -> i32 {
-    if HAS_INIT.load(Ordering::SeqCst) == false {
+    if HAS_INIT.load(Ordering::Acquire) == false {
         return ecall_errno!(EAGAIN);
     }
 
@@ -164,7 +169,7 @@ pub extern "C" fn occlum_ecall_new_process(
 
 #[no_mangle]
 pub extern "C" fn occlum_ecall_exec_thread(libos_pid: i32, host_tid: i32) -> i32 {
-    if HAS_INIT.load(Ordering::SeqCst) == false {
+    if HAS_INIT.load(Ordering::Acquire) == false {
         return ecall_errno!(EAGAIN);
     }
 
@@ -184,7 +189,7 @@ pub extern "C" fn occlum_ecall_exec_thread(libos_pid: i32, host_tid: i32) -> i32
 
 #[no_mangle]
 pub extern "C" fn occlum_ecall_kill(pid: i32, sig: i32) -> i32 {
-    if HAS_INIT.load(Ordering::SeqCst) == false {
+    if HAS_INIT.load(Ordering::Acquire) == false {
         return ecall_errno!(EAGAIN);
     }
 
@@ -202,7 +207,7 @@ pub extern "C" fn occlum_ecall_kill(pid: i32, sig: i32) -> i32 {
 
 #[no_mangle]
 pub extern "C" fn occlum_ecall_broadcast_interrupts() -> i32 {
-    if HAS_INIT.load(Ordering::SeqCst) == false {
+    if HAS_INIT.load(Ordering::Acquire) == false {
         return ecall_errno!(EAGAIN);
     }
 
diff --git a/src/libos/src/events/waiter.rs b/src/libos/src/events/waiter.rs
index c1ca6564..8b9ff738 100644
--- a/src/libos/src/events/waiter.rs
+++ b/src/libos/src/events/waiter.rs
@@ -101,6 +101,35 @@ impl Waker {
     }
 }
 
+/// Instruction rearrangement about control dependency
+///
+/// Such as the following code:
+/// fn function(flag: bool, a: i32, b: i32) {
+///     if flag { // 1
+///         let i = a * b; // 2
+///     }
+/// }
+///
+/// Guidelines for compilation optimization：without changing the single-threaded semantics
+/// of the program, the execution order of statements can be rearranged. There is a control
+/// dependency between flag and i. When the instruction is reordered, step 2 will write the
+/// result value to the hardware cache, and when judged to be true, the result value will be
+/// written to the variable i. Therefore, controlling dependency does not prevent compiler
+/// optimizations
+///
+/// Note about memory ordering:
+/// Here is_woken needs to be synchronized with host_eventfd. The read operation of
+/// is_woken needs to see the change of the host_eventfd field. Just `Acquire` or
+/// `Release` needs to be used to make all the change of the host_eventfd visible to us.
+///
+/// The ordering in CAS operations can be `Relaxed`, `Acquire`, `AcqRel` or `SeqCst`,
+/// The key is to consider the specific usage scenario. Here fail does not synchronize other
+/// variables in the CAS operation, which can use `Relaxed`, and the host_enent needs
+/// to be synchronized in success, so `Acquire` needs to be used so that we can see all the
+/// changes in the host_eventfd after that.
+///
+/// Although it is correct to use AcqRel, here I think it is okay to use Acquire, because
+/// you don't need to synchronize host_event before is_woken, only later.
 struct Inner {
     is_woken: AtomicBool,
     host_eventfd: Arc<HostEventFd>,
@@ -117,11 +146,11 @@ impl Inner {
     }
 
     pub fn is_woken(&self) -> bool {
-        self.is_woken.load(Ordering::SeqCst)
+        self.is_woken.load(Ordering::Acquire)
     }
 
     pub fn reset(&self) {
-        self.is_woken.store(false, Ordering::SeqCst);
+        self.is_woken.store(false, Ordering::Release);
     }
 
     pub fn wait(&self, timeout: Option<&Duration>) -> Result<()> {
@@ -154,7 +183,7 @@ impl Inner {
     pub fn wake(&self) {
         if self
             .is_woken
-            .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
+            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
             .is_ok()
         {
             self.host_eventfd.write_u64(1);
@@ -167,7 +196,7 @@ impl Inner {
             .filter(|inner| {
                 inner
                     .is_woken
-                    .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
+                    .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
                     .is_ok()
             })
             .map(|inner| inner.host_eventfd.host_fd())
diff --git a/src/libos/src/events/waiter_queue.rs b/src/libos/src/events/waiter_queue.rs
index 7ec178e8..48adcfe3 100644
--- a/src/libos/src/events/waiter_queue.rs
+++ b/src/libos/src/events/waiter_queue.rs
@@ -11,6 +11,21 @@ use crate::prelude::*;
 ///
 /// While the queue is conceptually for `Waiter`s, it internally maintains a list
 /// of `Waker`s.
+///
+/// Note about memory ordering:
+/// Here count needs to be synchronized with wakers. The read operation of count
+/// needs to see the change of the waker field. Just `Acquire` or `Release` needs
+/// to be used to make all the change of the wakers visible to us.
+///
+/// Regarding the usage of functions like fetch_add and fetch_sub, they perform
+/// atomic addition or subtraction operations. The memory ordering parameter for
+/// these functions can be chosen from options such as `Relaxed`, `Acquire`, `Release`,
+/// `AcqRel` and `SeqCst`. It is important to select the appropriate memory ordering
+/// based on the corresponding usage scenario.
+///
+/// In this code snippet, the count variable is synchronized with the wakers field.
+/// In this case, we only need to ensure that waker.lock() occurs before count.
+/// Although it is safer to use AcqRel，here using `Release` would be enough.
 pub struct WaiterQueue {
     count: AtomicUsize,
     wakers: SgxMutex<VecDeque<Waker>>,
@@ -27,7 +42,9 @@ impl WaiterQueue {
 
     /// Returns whether the queue is empty.
     pub fn is_empty(&self) -> bool {
-        self.count.load(Ordering::SeqCst) == 0
+        // Here is_empty function is only used in line 76 below. And when calling this, it
+        // doesn't need to synchronize with the wakers. Therefore, Relaxed can be enough.
+        self.count.load(Ordering::Relaxed) == 0
     }
 
     /// Reset a waiter and enqueue it.
@@ -39,7 +56,7 @@ impl WaiterQueue {
         waiter.reset();
 
         let mut wakers = self.wakers.lock().unwrap();
-        self.count.fetch_add(1, Ordering::SeqCst);
+        self.count.fetch_add(1, Ordering::Release);
         wakers.push_back(waiter.waker());
     }
 
@@ -65,7 +82,7 @@ impl WaiterQueue {
             let mut wakers = self.wakers.lock().unwrap();
             let max_count = max_count.min(wakers.len());
             let to_wake: Vec<Waker> = wakers.drain(..max_count).collect();
-            self.count.fetch_sub(to_wake.len(), Ordering::SeqCst);
+            self.count.fetch_sub(to_wake.len(), Ordering::Release);
             to_wake
         };
 
diff --git a/src/libos/src/net/io_multiplexing/epoll/epoll_file.rs b/src/libos/src/net/io_multiplexing/epoll/epoll_file.rs
index 6eee52da..36c0b27f 100644
--- a/src/libos/src/net/io_multiplexing/epoll/epoll_file.rs
+++ b/src/libos/src/net/io_multiplexing/epoll/epoll_file.rs
@@ -282,10 +282,12 @@ impl EpollFile {
         // A critical section protected by the lock of self.interest
         {
             let mut interest_entries = self.interest.lock().unwrap();
+            // There is a data-dependency, so this cannot be re-ordered,
+            // `Relaxed` should be enough.
             let ep_entry = interest_entries
                 .remove(&fd)
                 .ok_or_else(|| errno!(ENOENT, "fd is not added"))?;
-            ep_entry.is_deleted.store(true, Ordering::Release);
+            ep_entry.is_deleted.store(true, Ordering::Relaxed);
 
             let notifier = ep_entry.file.notifier().unwrap();
             let weak_observer = self.weak_self.clone() as Weak<dyn Observer<_>>;
@@ -501,6 +503,11 @@ impl AsEpollFile for FileRef {
     }
 }
 
+/// Note about memory ordering:
+/// Here is_deleted needs to be synchronized with entry. The read
+/// operation of is_deleted needs to see the entry has been deleted
+/// from the interest list. Just `Acquire` or `Release` needs to be
+/// used to make all the change of the wakers visible to us.
 #[derive(Debug)]
 struct EpollEntry {
     fd: FileDesc,
diff --git a/src/libos/src/process/do_futex.rs b/src/libos/src/process/do_futex.rs
index e825b834..23b71dbf 100644
--- a/src/libos/src/process/do_futex.rs
+++ b/src/libos/src/process/do_futex.rs
@@ -420,6 +420,10 @@ impl FutexBucketVec {
     }
 }
 
+/// Note about memory ordering:
+/// Here is_woken needs to be synchronized with thread. The read operation of is_woken
+/// needs to see the change of the thread field. Just `Acquire` or `Release` needs
+/// to be used to make all the change of the thread visible to us.
 #[derive(Debug)]
 struct Waiter {
     thread: *const c_void,
@@ -441,9 +445,9 @@ impl Waiter {
         if current != self.thread {
             return Ok(());
         }
-        while self.is_woken.load(Ordering::SeqCst) == false {
+        while self.is_woken.load(Ordering::Acquire) == false {
             if let Err(e) = wait_event_timeout(self.thread, timeout) {
-                self.is_woken.store(true, Ordering::SeqCst);
+                self.is_woken.store(true, Ordering::Release);
                 return_errno!(e.errno(), "wait_timeout error");
             }
         }
@@ -451,7 +455,7 @@ impl Waiter {
     }
 
     pub fn wake(&self) {
-        if self.is_woken().fetch_or(true, Ordering::SeqCst) == false {
+        if self.is_woken().fetch_or(true, Ordering::Acquire) == false {
             set_events(&[self.thread])
         }
     }
@@ -470,7 +474,7 @@ impl Waiter {
             .filter_map(|waiter| {
                 // Only wake up items that are not woken.
                 // Set the item to be woken if it is not woken.
-                if waiter.is_woken().fetch_or(true, Ordering::SeqCst) == false {
+                if waiter.is_woken().fetch_or(true, Ordering::Acquire) == false {
                     Some(waiter.thread())
                 } else {
                     None
diff --git a/src/libos/src/process/do_robust_list.rs b/src/libos/src/process/do_robust_list.rs
index babb087d..904f0a6e 100644
--- a/src/libos/src/process/do_robust_list.rs
+++ b/src/libos/src/process/do_robust_list.rs
@@ -165,12 +165,18 @@ const FUTEX_OWNER_DIED: u32 = 0x4000_0000;
 const FUTEX_TID_MASK: u32 = 0x3FFF_FFFF;
 
 /// Wakeup one robust futex owned by the thread
+///
+/// Note about memory ordering:
+/// Here futex_val is just a shared variables between threads and doesn't synchronize
+/// with other variables. Compare_exchange guarantees that the operation is on the
+/// "latest" value. Therefore, `Relaxed` can be used in both single-threaded and
+/// multi-threaded environments.
 pub fn wake_robust_futex(futex_addr: *const i32, tid: pid_t) -> Result<()> {
     let futex_val = {
         check_ptr(futex_addr)?;
         unsafe { AtomicU32::from_mut(&mut *(futex_addr as *mut u32)) }
     };
-    let mut old_val = futex_val.load(Ordering::SeqCst);
+    let mut old_val = futex_val.load(Ordering::Relaxed);
     loop {
         // This futex may held by another thread, do nothing
         if old_val & FUTEX_TID_MASK != tid {
@@ -178,14 +184,14 @@ pub fn wake_robust_futex(futex_addr: *const i32, tid: pid_t) -> Result<()> {
         }
         let new_val = (old_val & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
         if let Err(cur_val) =
-            futex_val.compare_exchange(old_val, new_val, Ordering::SeqCst, Ordering::SeqCst)
+            futex_val.compare_exchange(old_val, new_val, Ordering::Relaxed, Ordering::Relaxed)
         {
             // The futex value has changed, let's retry with current value
             old_val = cur_val;
             continue;
         }
         // Wakeup one waiter
-        if futex_val.load(Ordering::SeqCst) & FUTEX_WAITERS != 0 {
+        if futex_val.load(Ordering::Relaxed) & FUTEX_WAITERS != 0 {
             debug!("wake robust futex addr: {:?}", futex_addr);
             super::do_futex::futex_wake(futex_addr, 1)?;
         }
diff --git a/src/libos/src/process/task/mod.rs b/src/libos/src/process/task/mod.rs
index 6700cd1a..752c7d8b 100644
--- a/src/libos/src/process/task/mod.rs
+++ b/src/libos/src/process/task/mod.rs
@@ -8,6 +8,11 @@ pub use self::exec::{enqueue, enqueue_and_exec, exec};
 mod exec;
 
 /// Note: this definition must be in sync with task.h
+///
+/// Note about memory ordering:
+/// Here user_fs is just a signal and doesn't synchronize with other
+/// variables. Therefore, `Relaxed` can be used in both single-threaded
+/// and multi-threaded environments.
 #[derive(Debug, Default)]
 #[repr(C)]
 pub struct Task {
@@ -51,10 +56,10 @@ impl Task {
     }
 
     pub(super) fn set_user_fs(&self, user_fs: usize) {
-        self.user_fs.store(user_fs, Ordering::SeqCst);
+        self.user_fs.store(user_fs, Ordering::Relaxed);
     }
 
     pub fn user_fs(&self) -> usize {
-        self.user_fs.load(Ordering::SeqCst)
+        self.user_fs.load(Ordering::Relaxed)
     }
 }
diff --git a/src/libos/src/process/term_status.rs b/src/libos/src/process/term_status.rs
index 175a5bf8..19a879c6 100644
--- a/src/libos/src/process/term_status.rs
+++ b/src/libos/src/process/term_status.rs
@@ -4,6 +4,10 @@ use crate::signal::SigNum;
 use sgx_tstd::sync::SgxMutex;
 use std::sync::atomic::{AtomicBool, Ordering};
 
+/// Note about memory ordering:
+/// Here exited needs to be synchronized with status. The read operation of exited
+/// needs to see the change of the status field. Just `Acquire` or `Release` needs
+/// to be used to make all the change of the status visible to us.
 pub struct ForcedExitStatus {
     exited: AtomicBool,
     status: SgxMutex<Option<TermStatus>>,
@@ -18,13 +22,13 @@ impl ForcedExitStatus {
     }
 
     pub fn is_forced_to_exit(&self) -> bool {
-        self.exited.load(Ordering::SeqCst)
+        self.exited.load(Ordering::Acquire)
     }
 
     pub fn force_exit(&self, status: TermStatus) {
         let mut old_status = self.status.lock().unwrap();
         // set the bool after getting the status lock
-        self.exited.store(true, Ordering::SeqCst);
+        self.exited.store(true, Ordering::Release);
         old_status.get_or_insert(status);
     }
 
diff --git a/src/libos/src/untrusted/slice_alloc.rs b/src/libos/src/untrusted/slice_alloc.rs
index c1130a4f..84d1ae3a 100644
--- a/src/libos/src/untrusted/slice_alloc.rs
+++ b/src/libos/src/untrusted/slice_alloc.rs
@@ -5,6 +5,12 @@ use std::ptr::NonNull;
 use std::sync::atomic::{AtomicUsize, Ordering};
 
 /// An memory allocator for slices, backed by a fixed-size, untrusted buffer
+///
+/// Note about memory ordering:
+/// Here buf_pos is used here for counting, not to synchronize access to other
+/// shared variables. Fetch_update guarantees that the operation is on the
+/// "latest" value. Therefore, `Relaxed` can be used in both single-threaded and
+/// multi-threaded environments.
 pub struct UntrustedSliceAlloc {
     /// The pointer to the untrusted buffer
     buf_ptr: *mut u8,
@@ -48,7 +54,7 @@ impl UntrustedSliceAlloc {
             // Move self.buf_pos forward if enough space _atomically_.
             let old_pos = self
                 .buf_pos
-                .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |old_pos| {
+                .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |old_pos| {
                     let new_pos = old_pos + new_slice_len;
                     if new_pos <= self.buf_size {
                         Some(new_pos)