diff --git a/libos/src/bookkeep/libos_vma.c b/libos/src/bookkeep/libos_vma.c index 77b37be6ef..3e22adbea7 100644 --- a/libos/src/bookkeep/libos_vma.c +++ b/libos/src/bookkeep/libos_vma.c @@ -20,16 +20,16 @@ #include "libos_handle.h" #include "libos_internal.h" #include "libos_lock.h" +#include "libos_rwlock.h" #include "libos_tcb.h" #include "libos_utils.h" #include "libos_vma.h" #include "linux_abi/memory.h" -#include "spinlock.h" -/* The amount of total memory usage, all accesses must be protected by `vma_tree_lock`. */ +/* The amount of total memory usage, all accesses must be protected by `g_vma_tree_lock`. */ static size_t g_total_memory_size = 0; /* The peak amount of total memory usage, all accesses must use atomics, writes must also hold - * `vma_tree_lock`. */ + * `g_vma_tree_lock`. */ static size_t g_peak_total_memory_size = 0; /* Filter flags that will be saved in `struct libos_vma`. For example there is no need for saving @@ -51,7 +51,7 @@ struct libos_vma { struct libos_handle* file; uint64_t offset; // offset inside `file`, where `begin` starts union { - /* If this `vma` is used, it is included in `vma_tree` using this node. */ + /* If this `vma` is used, it is included in `g_vma_tree` using this node. */ struct avl_tree_node tree_node; /* Otherwise it might be cached in per thread vma cache, or might be on a temporary list * of to-be-freed vmas (used by _vma_bkeep_remove). Such lists use the field below. */ @@ -102,27 +102,84 @@ static bool cmp_addr_to_vma(void* addr, struct avl_tree_node* node) { } /* - * "vma_tree" holds all vmas with the assumption that no 2 overlap (though they could be adjacent). - * Currently we do not merge similar adjacent vmas - if we ever start doing it, this code needs - * to be revisited as there might be some optimizations that would break due to it. + * "g_vma_tree" holds all vmas with the assumption that no 2 overlap (though they could be + * adjacent). Currently we do not merge similar adjacent vmas - if we ever start doing it, this code + * needs to be revisited as there might be some optimizations that would break due to it. */ -static struct avl_tree vma_tree = {.cmp = vma_tree_cmp}; -static spinlock_t vma_tree_lock = INIT_SPINLOCK_UNLOCKED; +static struct avl_tree g_vma_tree = {.cmp = vma_tree_cmp}; +static struct libos_rwlock g_vma_tree_lock; +static bool g_vma_tree_lock_created = false; + +/* + * It is important to use the below wrappers instead of raw `rwlock_*_lock()` functions. This is + * because at LibOS startup, the lock `g_vma_tree_lock` is not yet created. Fortunately, at LibOS + * startup there is only one thread, so the lock would be redundant anyway. + * + * We cannot create `g_vma_tree_lock` at the very beginning of LibOS startup, because creating this + * lock itself requires the memory subsystem (VMA) to be fully initialized. So we start with VMA + * locking disabled first, then init the VMA subsystem, and only then create the lock. At this point + * the VMA subsystem can be used in thread-safe manner. + */ +static void vma_rwlock_read_lock(void) { + if (!g_vma_tree_lock_created) + return; + rwlock_read_lock(&g_vma_tree_lock); +} + +static void vma_rwlock_read_unlock(void) { + if (!g_vma_tree_lock_created) + return; + rwlock_read_unlock(&g_vma_tree_lock); +} + +static void vma_rwlock_write_lock(void) { + if (!g_vma_tree_lock_created) + return; + rwlock_write_lock(&g_vma_tree_lock); +} + +static void vma_rwlock_write_unlock(void) { + if (!g_vma_tree_lock_created) + return; + rwlock_write_unlock(&g_vma_tree_lock); +} + +#ifdef DEBUG +static bool vma_rwlock_is_read_or_write_locked(void) { + if (!g_vma_tree_lock_created) + return true; + return rwlock_is_read_locked(&g_vma_tree_lock) || rwlock_is_write_locked(&g_vma_tree_lock); +} + +static bool vma_rwlock_is_write_locked(void) { + if (!g_vma_tree_lock_created) + return true; + return rwlock_is_write_locked(&g_vma_tree_lock); +} +#endif + +/* VMA code is supposed to use the vma_* wrappers of RW lock; hide the actual RW lock funcs */ +#define rwlock_is_write_locked(x) false +#define rwlock_is_read_locked(x) false +#define rwlock_read_lock(x) static_assert(false, "hidden func") +#define rwlock_read_unlock(x) static_assert(false, "hidden func") +#define rwlock_write_lock(x) static_assert(false, "hidden func") +#define rwlock_write_unlock(x) static_assert(false, "hidden func") static void total_memory_size_add(size_t length) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_write_locked()); g_total_memory_size += length; /* We can read `g_peak_total_memory_size` non atomically, because writes are protected by - * `vma_tree_lock`, which we hold. Store needs to be atomic to synchronize with readers. */ + * `g_vma_tree_lock`, which we hold. Store needs to be atomic to synchronize with readers. */ if (g_peak_total_memory_size < g_total_memory_size) { __atomic_store_n(&g_peak_total_memory_size, g_total_memory_size, __ATOMIC_RELAXED); } } static void total_memory_size_sub(size_t length) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_write_locked()); assert(g_total_memory_size >= length); g_total_memory_size -= length; @@ -136,31 +193,31 @@ static struct libos_vma* node2vma(struct avl_tree_node* node) { } static struct libos_vma* _get_next_vma(struct libos_vma* vma) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_read_or_write_locked()); return node2vma(avl_tree_next(&vma->tree_node)); } static struct libos_vma* _get_prev_vma(struct libos_vma* vma) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_read_or_write_locked()); return node2vma(avl_tree_prev(&vma->tree_node)); } static struct libos_vma* _get_last_vma(void) { - assert(spinlock_is_locked(&vma_tree_lock)); - return node2vma(avl_tree_last(&vma_tree)); + assert(vma_rwlock_is_read_or_write_locked()); + return node2vma(avl_tree_last(&g_vma_tree)); } static struct libos_vma* _get_first_vma(void) { - assert(spinlock_is_locked(&vma_tree_lock)); - return node2vma(avl_tree_first(&vma_tree)); + assert(vma_rwlock_is_read_or_write_locked()); + return node2vma(avl_tree_first(&g_vma_tree)); } /* Returns the vma that contains `addr`. If there is no such vma, returns the closest vma with * higher address. */ static struct libos_vma* _lookup_vma(uintptr_t addr) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_read_or_write_locked()); - struct avl_tree_node* node = avl_tree_lower_bound_fn(&vma_tree, (void*)addr, cmp_addr_to_vma); + struct avl_tree_node* node = avl_tree_lower_bound_fn(&g_vma_tree, (void*)addr, cmp_addr_to_vma); if (!node) { return NULL; } @@ -193,7 +250,7 @@ typedef bool (*traverse_visitor)(struct libos_vma* vma, void* visitor_arg); // TODO: Probably other VMA functions could make use of this helper. static bool _traverse_vmas_in_range(uintptr_t begin, uintptr_t end, bool use_only_valid_part, traverse_visitor visitor, void* visitor_arg) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_read_or_write_locked()); assert(begin <= end); if (begin == end) @@ -256,7 +313,7 @@ static void split_vma(struct libos_vma* old_vma, struct libos_vma* new_vma, uint */ static int _vma_bkeep_remove(uintptr_t begin, uintptr_t end, bool is_internal, struct libos_vma** new_vma_ptr, struct libos_vma** vmas_to_free) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_write_locked()); assert(!new_vma_ptr || *new_vma_ptr); assert(IS_ALLOC_ALIGNED_PTR(begin) && IS_ALLOC_ALIGNED_PTR(end)); @@ -297,7 +354,7 @@ static int _vma_bkeep_remove(uintptr_t begin, uintptr_t end, bool is_internal, vma->valid_end = vma->end; } - avl_tree_insert(&vma_tree, &new_vma->tree_node); + avl_tree_insert(&g_vma_tree, &new_vma->tree_node); total_memory_size_sub(end - begin); return 0; @@ -319,7 +376,7 @@ static int _vma_bkeep_remove(uintptr_t begin, uintptr_t end, bool is_internal, /* We need to search for the next node before deletion. */ struct libos_vma* next = _get_next_vma(vma); - avl_tree_delete(&vma_tree, &vma->tree_node); + avl_tree_delete(&g_vma_tree, &vma->tree_node); total_memory_size_sub(vma->end - vma->begin); vma->next_free = NULL; @@ -362,11 +419,11 @@ static void* _vma_malloc(size_t size) { if (ret < 0) { struct libos_vma* vmas_to_free = NULL; - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); /* Since we are freeing a range we just created, additional vma is not needed. */ ret = _vma_bkeep_remove((uintptr_t)addr, (uintptr_t)addr + size, /*is_internal=*/true, NULL, &vmas_to_free); - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); if (ret < 0) { log_error("Removing a vma we just created failed: %s", unix_strerror(ret)); BUG(); @@ -520,17 +577,17 @@ static struct libos_vma* alloc_vma(void) { BUG(); } - spinlock_lock(&vma_tree_lock); - /* Currently `tmp_vma` is always used (added to `vma_tree`), but this assumption could + vma_rwlock_write_lock(); + /* Currently `tmp_vma` is always used (added to `g_vma_tree`), but this assumption could * easily be changed (e.g. if we implement VMAs merging).*/ struct avl_tree_node* node = &tmp_vma.tree_node; - if (node->parent || vma_tree.root == node) { - /* `tmp_vma` is in `vma_tree`, we need to migrate it. */ + if (node->parent || g_vma_tree.root == node) { + /* `tmp_vma` is in `g_vma_tree`, we need to migrate it. */ copy_vma(&tmp_vma, vma_migrate); - avl_tree_swap_node(&vma_tree, node, &vma_migrate->tree_node); + avl_tree_swap_node(&g_vma_tree, node, &vma_migrate->tree_node); vma_migrate = NULL; } - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); if (vma_migrate) { free_mem_obj_to_mgr(vma_mgr, vma_migrate); @@ -578,13 +635,13 @@ static void free_vmas_freelist(struct libos_vma* vma) { } static int _bkeep_initial_vma(struct libos_vma* new_vma) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_write_locked()); struct libos_vma* tmp_vma = _lookup_vma(new_vma->begin); if (tmp_vma && tmp_vma->begin < new_vma->end) { return -EEXIST; } else { - avl_tree_insert(&vma_tree, &new_vma->tree_node); + avl_tree_insert(&g_vma_tree, &new_vma->tree_node); total_memory_size_add(new_vma->end - new_vma->begin); return 0; @@ -625,7 +682,8 @@ int init_vma(void) { init_vmas[1 + idx].end = g_pal_public_state->initial_mem_ranges[i].end; init_vmas[1 + idx].valid_end = g_pal_public_state->initial_mem_ranges[i].end; - init_vmas[1 + idx].prot = PAL_PROT_TO_LINUX(g_pal_public_state->initial_mem_ranges[i].prot); + init_vmas[1 + idx].prot = PAL_PROT_TO_LINUX( + g_pal_public_state->initial_mem_ranges[i].prot); init_vmas[1 + idx].flags = MAP_PRIVATE | MAP_ANONYMOUS | VMA_INTERNAL; init_vmas[1 + idx].file = NULL; init_vmas[1 + idx].offset = 0; @@ -637,7 +695,7 @@ int init_vma(void) { } assert(1 + idx == ARRAY_SIZE(init_vmas)); - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); int ret = 0; /* First of init_vmas is reserved for later usage. */ for (size_t i = 1; i < ARRAY_SIZE(init_vmas); i++) { @@ -663,9 +721,9 @@ int init_vma(void) { log_debug("Initial VMA region 0x%lx-0x%lx (%s) bookkeeped", init_vmas[i].begin, init_vmas[i].end, init_vmas[i].comment); } - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); /* From now on if we return with an error we might leave a structure local to this function in - * vma_tree. We do not bother with removing them - this is initialization of VMA subsystem, if + * g_vma_tree. We do not bother with removing them - this is initialization of VMA subsystem, if * it fails the whole application startup fails and we should never call any of functions in * this file. */ if (ret < 0) { @@ -712,6 +770,11 @@ int init_vma(void) { return -ENOMEM; } + if (!rwlock_create(&g_vma_tree_lock)) { + return -ENOMEM; + } + g_vma_tree_lock_created = true; + /* Now we need to migrate temporary initial vmas. */ struct libos_vma* vmas_to_migrate_to[ARRAY_SIZE(init_vmas)]; for (size_t i = 0; i < ARRAY_SIZE(vmas_to_migrate_to); i++) { @@ -721,17 +784,17 @@ int init_vma(void) { } } - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); for (size_t i = 0; i < ARRAY_SIZE(init_vmas); i++) { /* Skip empty areas. */ if (init_vmas[i].begin == init_vmas[i].end) { continue; } copy_vma(&init_vmas[i], vmas_to_migrate_to[i]); - avl_tree_swap_node(&vma_tree, &init_vmas[i].tree_node, &vmas_to_migrate_to[i]->tree_node); + avl_tree_swap_node(&g_vma_tree, &init_vmas[i].tree_node, &vmas_to_migrate_to[i]->tree_node); vmas_to_migrate_to[i] = NULL; } - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); for (size_t i = 0; i < ARRAY_SIZE(vmas_to_migrate_to); i++) { if (vmas_to_migrate_to[i]) { @@ -743,7 +806,7 @@ int init_vma(void) { } static void _add_unmapped_vma(uintptr_t begin, uintptr_t end, struct libos_vma* vma) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_write_locked()); vma->begin = begin; vma->end = end; @@ -755,7 +818,7 @@ static void _add_unmapped_vma(uintptr_t begin, uintptr_t end, struct libos_vma* vma->offset = 0; copy_comment(vma, ""); - avl_tree_insert(&vma_tree, &vma->tree_node); + avl_tree_insert(&g_vma_tree, &vma->tree_node); total_memory_size_add(vma->end - vma->begin); } @@ -776,7 +839,7 @@ int bkeep_munmap(void* addr, size_t length, bool is_internal, void** tmp_vma_ptr struct libos_vma* vmas_to_free = NULL; - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); int ret = _vma_bkeep_remove((uintptr_t)addr, (uintptr_t)addr + length, is_internal, vma2 ? &vma2 : NULL, &vmas_to_free); if (ret >= 0) { @@ -784,7 +847,7 @@ int bkeep_munmap(void* addr, size_t length, bool is_internal, void** tmp_vma_ptr *tmp_vma_ptr = (void*)vma1; vma1 = NULL; } - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); free_vmas_freelist(vmas_to_free); if (vma1) { @@ -808,10 +871,10 @@ void bkeep_remove_tmp_vma(void* _vma) { assert(vma->flags == (VMA_INTERNAL | VMA_UNMAPPED)); - spinlock_lock(&vma_tree_lock); - avl_tree_delete(&vma_tree, &vma->tree_node); + vma_rwlock_write_lock(); + avl_tree_delete(&g_vma_tree, &vma->tree_node); total_memory_size_sub(vma->end - vma->begin); - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); free_vma(vma); } @@ -819,10 +882,10 @@ void bkeep_remove_tmp_vma(void* _vma) { void bkeep_convert_tmp_vma_to_user(void* _vma) { struct libos_vma* vma = (struct libos_vma*)_vma; - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); assert(vma->flags == (VMA_INTERNAL | VMA_UNMAPPED)); vma->flags &= ~VMA_INTERNAL; - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); } static bool is_file_prot_matching(struct libos_handle* file_hdl, int prot) { @@ -864,7 +927,7 @@ int bkeep_mmap_fixed(void* addr, size_t length, int prot, int flags, struct libo struct libos_vma* vmas_to_free = NULL; - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); int ret = 0; if (flags & MAP_FIXED_NOREPLACE) { struct libos_vma* tmp_vma = _lookup_vma(new_vma->begin); @@ -876,10 +939,10 @@ int bkeep_mmap_fixed(void* addr, size_t length, int prot, int flags, struct libo vma1 ? &vma1 : NULL, &vmas_to_free); } if (ret >= 0) { - avl_tree_insert(&vma_tree, &new_vma->tree_node); + avl_tree_insert(&g_vma_tree, &new_vma->tree_node); total_memory_size_add(new_vma->end - new_vma->begin); } - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); free_vmas_freelist(vmas_to_free); if (vma1) { @@ -901,7 +964,7 @@ static void vma_update_prot(struct libos_vma* vma, int prot) { static int _vma_bkeep_change(uintptr_t begin, uintptr_t end, int prot, bool is_internal, struct libos_vma** new_vma_ptr1, struct libos_vma** new_vma_ptr2) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_write_locked()); assert(IS_ALLOC_ALIGNED_PTR(begin) && IS_ALLOC_ALIGNED_PTR(end)); assert(begin < end); @@ -967,7 +1030,7 @@ static int _vma_bkeep_change(uintptr_t begin, uintptr_t end, int prot, bool is_i struct libos_vma* next = _get_next_vma(vma); - avl_tree_insert(&vma_tree, &new_vma1->tree_node); + avl_tree_insert(&g_vma_tree, &new_vma1->tree_node); if (end < new_vma1->end) { struct libos_vma* new_vma2 = *new_vma_ptr2; @@ -976,7 +1039,7 @@ static int _vma_bkeep_change(uintptr_t begin, uintptr_t end, int prot, bool is_i split_vma(new_vma1, new_vma2, end); vma_update_prot(new_vma2, vma->prot); - avl_tree_insert(&vma_tree, &new_vma2->tree_node); + avl_tree_insert(&g_vma_tree, &new_vma2->tree_node); return 0; } @@ -1010,7 +1073,7 @@ static int _vma_bkeep_change(uintptr_t begin, uintptr_t end, int prot, bool is_i split_vma(vma, new_vma2, end); vma_update_prot(vma, prot); - avl_tree_insert(&vma_tree, &new_vma2->tree_node); + avl_tree_insert(&g_vma_tree, &new_vma2->tree_node); return 0; } @@ -1030,10 +1093,10 @@ int bkeep_mprotect(void* addr, size_t length, int prot, bool is_internal) { return -ENOMEM; } - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); int ret = _vma_bkeep_change((uintptr_t)addr, (uintptr_t)addr + length, prot, is_internal, &vma1, &vma2); - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); if (vma1) { free_vma(vma1); @@ -1046,7 +1109,7 @@ int bkeep_mprotect(void* addr, size_t length, int prot, bool is_internal) { } /* TODO consider: - * maybe it's worth to keep another tree, complementary to `vma_tree`, that would hold free areas. + * maybe it's worth to keep another tree, complementary to `g_vma_tree`, that would hold free areas. * It would give O(logn) unmapped lookup, which now is O(n) in the worst case, but it would also * double the memory usage of this subsystem and add some complexity. * Another idea is to merge adjacent vmas, that are not backed by any file and have the same prot @@ -1113,7 +1176,7 @@ int bkeep_mmap_any_in_range(void* _bottom_addr, void* _top_addr, size_t length, new_vma->offset = file ? offset : 0; copy_comment(new_vma, comment ?: ""); - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); struct libos_vma* vma = _lookup_vma(top_addr); uintptr_t max_addr; @@ -1149,14 +1212,14 @@ int bkeep_mmap_any_in_range(void* _bottom_addr, void* _top_addr, size_t length, * exceeds the file); it should be updated in the mmap syscall (for file-backed mappings) */ new_vma->valid_end = max_addr; - avl_tree_insert(&vma_tree, &new_vma->tree_node); + avl_tree_insert(&g_vma_tree, &new_vma->tree_node); total_memory_size_add(new_vma->end - new_vma->begin); ret_val = new_vma->begin; new_vma = NULL; out: - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); if (new_vma) { free_vma(new_vma); } @@ -1188,7 +1251,7 @@ int bkeep_mmap_any_aslr(size_t length, int prot, int flags, struct libos_handle* int bkeep_vma_update_valid_length(void* begin_addr, size_t valid_length) { int ret; - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); struct libos_vma* vma = _lookup_vma((uintptr_t)begin_addr); if (!vma || !is_addr_in_vma((uintptr_t)begin_addr, vma)) { ret = -ENOENT; @@ -1203,7 +1266,7 @@ int bkeep_vma_update_valid_length(void* begin_addr, size_t valid_length) { vma->valid_end = vma->begin + valid_length; ret = 0; out: - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); return ret; } @@ -1271,7 +1334,7 @@ int lookup_vma(void* addr, struct libos_vma_info* vma_info) { assert(vma_info); int ret = 0; - spinlock_lock(&vma_tree_lock); + vma_rwlock_read_lock(); struct libos_vma* vma = _lookup_vma((uintptr_t)addr); if (!vma || !is_addr_in_vma((uintptr_t)addr, vma)) { ret = -ENOENT; @@ -1281,7 +1344,7 @@ int lookup_vma(void* addr, struct libos_vma_info* vma_info) { dump_vma(vma_info, vma); out: - spinlock_unlock(&vma_tree_lock); + vma_rwlock_read_unlock(); return ret; } @@ -1308,10 +1371,10 @@ bool is_in_adjacent_user_vmas(const void* addr, size_t length, int prot) { .is_ok = true, }; - spinlock_lock(&vma_tree_lock); + vma_rwlock_read_lock(); bool is_continuous = _traverse_vmas_in_range(begin, end, /*use_only_valid_part=*/true, adj_visitor, &ctx); - spinlock_unlock(&vma_tree_lock); + vma_rwlock_read_unlock(); return is_continuous && ctx.is_ok; } @@ -1322,7 +1385,7 @@ static size_t dump_vmas_with_buf(struct libos_vma_info* infos, size_t max_count, size_t size = 0; struct libos_vma_info* vma_info = infos; - spinlock_lock(&vma_tree_lock); + vma_rwlock_write_lock(); struct libos_vma* vma; for (vma = _lookup_vma(begin); vma && vma->begin < end; vma = _get_next_vma(vma)) { @@ -1335,7 +1398,7 @@ static size_t dump_vmas_with_buf(struct libos_vma_info* infos, size_t max_count, size++; } - spinlock_unlock(&vma_tree_lock); + vma_rwlock_write_unlock(); return size; } @@ -1364,14 +1427,14 @@ static int dump_vmas(struct libos_vma_info** out_infos, size_t* out_count, } static bool vma_filter_all(struct libos_vma* vma, void* arg) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_read_or_write_locked()); __UNUSED(arg); return !(vma->flags & VMA_INTERNAL); } static bool vma_filter_exclude_unmapped(struct libos_vma* vma, void* arg) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_read_or_write_locked()); __UNUSED(arg); return !(vma->flags & (VMA_INTERNAL | VMA_UNMAPPED)); @@ -1409,7 +1472,7 @@ struct madvise_dontneed_ctx { }; static bool madvise_dontneed_visitor(struct libos_vma* vma, void* visitor_arg) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_read_or_write_locked()); struct madvise_dontneed_ctx* ctx = (struct madvise_dontneed_ctx*)visitor_arg; @@ -1463,7 +1526,7 @@ static bool madvise_dontneed_visitor(struct libos_vma* vma, void* visitor_arg) { if (vma->flags & MAP_NORESERVE) { /* Lazy allocation of pages, zeroize only the committed pages. Note that the uncommitted * pages have to be skipped to avoid deadlocks. This is because we're holding the - * non-reentrant/recursive `vma_tree_lock` when we're in this visitor callback (which is + * non-reentrant/recursive `g_vma_tree_lock` when we're in this visitor callback (which is * invoked during VMA traversing). And if we hit page faults on accessing the uncommitted * pages, our lazy allocation logic would also try to acquire the same lock for VMA lookup * in g_mem_bkeep_get_vma_info_upcall (see `pal_mem_bkeep_get_vma_info()` for details). */ @@ -1507,17 +1570,17 @@ int madvise_dontneed_range(uintptr_t begin, uintptr_t end) { .error = 0, }; - spinlock_lock(&vma_tree_lock); + vma_rwlock_read_lock(); bool is_continuous = _traverse_vmas_in_range(begin, end, /*use_only_valid_part=*/false, madvise_dontneed_visitor, &ctx); - spinlock_unlock(&vma_tree_lock); + vma_rwlock_read_unlock(); if (!is_continuous) return -ENOMEM; return ctx.error; #else /* allocate the bitvector for committed pages info outside the VMA traversing to avoid - * recursively holding `vma_tree_lock` */ + * recursively holding `g_vma_tree_lock` */ size_t bitvector_size = UDIV_ROUND_UP((end - begin) / PAGE_SIZE, 8); uint8_t* bitvector = calloc(1, bitvector_size); if (!bitvector) @@ -1530,10 +1593,10 @@ int madvise_dontneed_range(uintptr_t begin, uintptr_t end) { .error = 0, }; - spinlock_lock(&vma_tree_lock); + vma_rwlock_read_lock(); bool is_continuous = _traverse_vmas_in_range(begin, end, /*use_only_valid_part=*/false, madvise_dontneed_visitor, &ctx); - spinlock_unlock(&vma_tree_lock); + vma_rwlock_read_unlock(); if (!is_continuous) ctx.error = -ENOMEM; @@ -1543,7 +1606,7 @@ int madvise_dontneed_range(uintptr_t begin, uintptr_t end) { } static bool vma_filter_needs_reload(struct libos_vma* vma, void* arg) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_read_or_write_locked()); struct libos_handle* hdl = arg; assert(hdl && hdl->inode); /* guaranteed to have inode because invoked from `write` callback */ @@ -1655,7 +1718,7 @@ struct vma_update_valid_end_args { /* returns whether prot_refresh_vma() must be applied on a VMA */ static bool vma_update_valid_end(struct libos_vma* vma, void* _args) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_write_locked()); struct vma_update_valid_end_args* args = _args; @@ -1739,7 +1802,7 @@ static int prot_refresh_mmaped_from_file_handle(struct libos_handle* hdl, size_t } static bool vma_filter_needs_msync(struct libos_vma* vma, void* arg) { - assert(spinlock_is_locked(&vma_tree_lock)); + assert(vma_rwlock_is_read_or_write_locked()); struct libos_handle* hdl = arg; @@ -2020,7 +2083,7 @@ static void debug_print_vma(struct libos_vma* vma) { } void debug_print_all_vmas(void) { - spinlock_lock(&vma_tree_lock); + vma_rwlock_read_lock(); struct libos_vma* vma = _get_first_vma(); while (vma) { @@ -2028,7 +2091,7 @@ void debug_print_all_vmas(void) { vma = _get_next_vma(vma); } - spinlock_unlock(&vma_tree_lock); + vma_rwlock_read_unlock(); } size_t get_peak_memory_usage(void) { @@ -2036,9 +2099,9 @@ size_t get_peak_memory_usage(void) { } size_t get_total_memory_usage(void) { - spinlock_lock(&vma_tree_lock); + vma_rwlock_read_lock(); size_t total_memory_size = g_total_memory_size; - spinlock_unlock(&vma_tree_lock); + vma_rwlock_read_unlock(); /* This memory accounting is just a simple heuristic, which does not account swap, reserved * memory, unmapped VMAs etc. */ return MIN(total_memory_size, g_pal_public_state->mem_total);