From a223e0d751c91f23f5f24264d79cc18f84a3bf8c Mon Sep 17 00:00:00 2001 From: Peter M Date: Thu, 28 May 2026 12:28:48 +0200 Subject: [PATCH 1/3] Add persistent_term support Implement a VM-level persistent_term table with NIFs for get/0,1,2, put/2, put_new/2, erase/1, and info/0. Persistent terms are copied into VM-owned heaps on write and returned without copying on read. Replaced and erased entries are retained until VM shutdown so previously returned terms remain valid without an OTP-style global GC pass, and info/0 reports retained memory. Also share the ETS term hashing helper for persistent_term lookup and add coverage for put/get/erase behavior, retained values, info/0, complex keys, and function keys. Signed-off-by: Peter M --- CHANGELOG.md | 1 + libs/estdlib/src/CMakeLists.txt | 1 + libs/estdlib/src/persistent_term.erl | 65 ++++ src/libAtomVM/CMakeLists.txt | 4 + src/libAtomVM/ets_multimap.c | 219 +----------- src/libAtomVM/globalcontext.c | 2 + src/libAtomVM/globalcontext.h | 2 + src/libAtomVM/nifs.c | 162 +++++++++ src/libAtomVM/nifs.gperf | 7 + src/libAtomVM/persistent_term.c | 360 ++++++++++++++++++++ src/libAtomVM/persistent_term.h | 92 +++++ src/libAtomVM/term_hash.c | 265 ++++++++++++++ src/libAtomVM/term_hash.h | 51 +++ tests/erlang_tests/CMakeLists.txt | 2 + tests/erlang_tests/test_persistent_term.erl | 134 ++++++++ tests/test.c | 1 + 16 files changed, 1154 insertions(+), 214 deletions(-) create mode 100644 libs/estdlib/src/persistent_term.erl create mode 100644 src/libAtomVM/persistent_term.c create mode 100644 src/libAtomVM/persistent_term.h create mode 100644 src/libAtomVM/term_hash.c create mode 100644 src/libAtomVM/term_hash.h create mode 100644 tests/erlang_tests/test_persistent_term.erl diff --git a/CHANGELOG.md b/CHANGELOG.md index e24a91384f..21f9836522 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added xtensa JIT backend for esp32 platform - Added support for configuring pins and width for sdmmc on ESP32 - Added support for map comprehensions +- Added `persistent_term` module with `get/0,1,2`, `put/2`, `put_new/2`, `erase/1`, and `info/0` ### Changed - Updated network type db() to dbm() to reflect the actual representation of the type diff --git a/libs/estdlib/src/CMakeLists.txt b/libs/estdlib/src/CMakeLists.txt index 4fbd77993d..3d84665e2e 100644 --- a/libs/estdlib/src/CMakeLists.txt +++ b/libs/estdlib/src/CMakeLists.txt @@ -62,6 +62,7 @@ set(ERLANG_MODULES math net os + persistent_term proc_lib sys logger diff --git a/libs/estdlib/src/persistent_term.erl b/libs/estdlib/src/persistent_term.erl new file mode 100644 index 0000000000..ef64b0cdf3 --- /dev/null +++ b/libs/estdlib/src/persistent_term.erl @@ -0,0 +1,65 @@ +% +% This file is part of AtomVM. +% +% Copyright 2026 Peter M. +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +%%----------------------------------------------------------------------------- +%% @doc A limited implementation of the Erlang/OTP `persistent_term' module. +%% +%% Values are stored globally and reads return stored values without copying. +%% Replaced or erased values remain allocated until VM shutdown so references +%% already returned to processes stay valid without a global GC pass. The +%% `memory' value returned by `info/0' includes those retained old values. +%% @end +%%----------------------------------------------------------------------------- +-module(persistent_term). + +-export([erase/1, get/0, get/1, get/2, info/0, put/2, put_new/2]). + +-export_type([key/0, value/0]). + +-type key() :: term(). +-type value() :: term(). + +-spec erase(Key :: key()) -> boolean(). +erase(_Key) -> + erlang:nif_error(undefined). + +-spec get() -> [{key(), value()}]. +get() -> + erlang:nif_error(undefined). + +-spec get(Key :: key()) -> value(). +get(_Key) -> + erlang:nif_error(undefined). + +-spec get(Key :: key(), Default :: value()) -> value(). +get(_Key, _Default) -> + erlang:nif_error(undefined). + +-spec info() -> #{count := non_neg_integer(), memory := non_neg_integer()}. +info() -> + erlang:nif_error(undefined). + +-spec put(Key :: key(), Value :: value()) -> ok. +put(_Key, _Value) -> + erlang:nif_error(undefined). + +-spec put_new(Key :: key(), Value :: value()) -> ok. +put_new(_Key, _Value) -> + erlang:nif_error(undefined). diff --git a/src/libAtomVM/CMakeLists.txt b/src/libAtomVM/CMakeLists.txt index 37c2e02bc4..1abfb51212 100644 --- a/src/libAtomVM/CMakeLists.txt +++ b/src/libAtomVM/CMakeLists.txt @@ -54,6 +54,7 @@ set(HEADER_FILES opcodesswitch.h overflow_helpers.h nifs.h + persistent_term.h platform_nifs.h port.h posix_nifs.h @@ -66,6 +67,7 @@ set(HEADER_FILES sys.h term_typedef.h term.h + term_hash.h timer_list.h trace.h unicode.h @@ -99,6 +101,7 @@ set(SOURCE_FILES memory.c module.c nifs.c + persistent_term.c port.c posix_nifs.c refc_binary.c @@ -106,6 +109,7 @@ set(SOURCE_FILES scheduler.c stacktrace.c term.c + term_hash.c timer_list.c unicode.c unlocalized.c diff --git a/src/libAtomVM/ets_multimap.c b/src/libAtomVM/ets_multimap.c index a16cac8b32..e38a2e69b4 100644 --- a/src/libAtomVM/ets_multimap.c +++ b/src/libAtomVM/ets_multimap.c @@ -23,14 +23,13 @@ #include "globalcontext.h" #include "term.h" +#include "term_hash.h" #include "ets_multimap.h" #define DYNARRAY_INITIAL_CAPACITY 8 #define DYNARRAY_GROWTH_FACTOR 2 -static uint32_t hash_term(term t, GlobalContext *global); - static EtsMultimapEntry *entry_new(term tuple); static void entry_delete(EtsMultimapEntry *entry, GlobalContext *global); static EtsMultimapNode *node_new(EtsMultimapNode *next, EtsMultimapEntry *entries); @@ -176,7 +175,7 @@ ets_result_t ets_multimap_insert( assert(new_node->entries != NULL); - uint32_t idx = hash_term(key, global) % ETS_MULTIMAP_NUM_BUCKETS; + uint32_t idx = term_hash(key, global) % ETS_MULTIMAP_NUM_BUCKETS; new_node->next = multimap->buckets[idx]; multimap->buckets[idx] = new_node; continue; @@ -231,7 +230,7 @@ ets_result_t ets_multimap_remove( assert(node->entries != NULL); assert(term_compare(key, node_key(multimap, node), TermCompareExact, global) == TermEquals); - uint32_t idx = hash_term(key, global) % ETS_MULTIMAP_NUM_BUCKETS; + uint32_t idx = term_hash(key, global) % ETS_MULTIMAP_NUM_BUCKETS; EtsMultimapNode *iter = multimap->buckets[idx]; EtsMultimapNode *prev = NULL; @@ -338,7 +337,7 @@ ets_result_t ets_multimap_remove_tuple( } if (node->entries == NULL) { - uint32_t idx = hash_term(key, global) % ETS_MULTIMAP_NUM_BUCKETS; + uint32_t idx = term_hash(key, global) % ETS_MULTIMAP_NUM_BUCKETS; EtsMultimapNode *prev_node = NULL; for (EtsMultimapNode *iter = multimap->buckets[idx]; iter != NULL; prev_node = iter, iter = iter->next) { @@ -370,7 +369,7 @@ static ets_result_t node_find( *out_node = NULL; - uint32_t idx = hash_term(key, global) % ETS_MULTIMAP_NUM_BUCKETS; + uint32_t idx = term_hash(key, global) % ETS_MULTIMAP_NUM_BUCKETS; EtsMultimapNode *node = multimap->buckets[idx]; while (node) { @@ -542,211 +541,3 @@ static void entry_delete(EtsMultimapEntry *entry, GlobalContext *global) free(entry->heap); free(entry); } - -// -// hash function -// -// Conceptually similar to (but not identical to) the `make_hash` algorithm described in -// https://github.com/erlang/otp/blob/cbd1378ee1fde835e55614bac9290b281bafe49a/erts/emulator/beam/utils.c#L644 -// -// Also described in character folding algorithm (PJW Hash) -// https://en.wikipedia.org/wiki/Hash_function#Character_folding -// -// TODO: implement erlang:phash2 using the OTP algorithm -// - -// some large (close to 2^24) primes taken from -// http://compoasso.free.fr/primelistweb/page/prime/liste_online_en.php - -#define LARGE_PRIME_INITIAL 16777259 -#define LARGE_PRIME_ATOM 16777643 -#define LARGE_PRIME_INTEGER 16777781 -#define LARGE_PRIME_FLOAT 16777973 -#define LARGE_PRIME_PID 16778147 -#define LARGE_PRIME_REF 16778441 -#define LARGE_PRIME_BINARY 16780483 -#define LARGE_PRIME_TUPLE 16778821 -#define LARGE_PRIME_LIST 16779179 -#define LARGE_PRIME_MAP 16779449 -#define LARGE_PRIME_PORT 16778077 - -static uint32_t hash_atom(term t, uint32_t h, GlobalContext *global) -{ - size_t len; - const uint8_t *data = atom_table_get_atom_string(global->atom_table, term_to_atom_index(t), &len); - for (size_t i = 0; i < len; ++i) { - h = h * LARGE_PRIME_ATOM + data[i]; - } - return h * LARGE_PRIME_ATOM; -} - -static uint32_t hash_integer(term t, uint32_t h, GlobalContext *global) -{ - UNUSED(global); - uint64_t n = (uint64_t) term_maybe_unbox_int64(t); - while (n) { - h = h * LARGE_PRIME_INTEGER + (n & 0xFF); - n >>= 8; - } - return h * LARGE_PRIME_INTEGER; -} - -static uint32_t hash_float(term t, uint32_t h, GlobalContext *global) -{ - UNUSED(global); - avm_float_t f = term_to_float(t); - // Normalize -0.0 to +0.0 so that hash is consistent with term_compare (-0.0 == +0.0). - if (f == 0.0) { - f = 0.0; - } - uint8_t *data = (uint8_t *) &f; - size_t len = sizeof(avm_float_t); - for (size_t i = 0; i < len; ++i) { - h = h * LARGE_PRIME_FLOAT + data[i]; - } - return h * LARGE_PRIME_FLOAT; -} - -static uint32_t hash_local_pid(term t, uint32_t h, GlobalContext *global) -{ - UNUSED(global); - uint32_t n = (uint32_t) term_to_local_process_id(t); - while (n) { - h = h * LARGE_PRIME_PID + (n & 0xFF); - n >>= 8; - } - return h * LARGE_PRIME_PID; -} - -static uint32_t hash_local_port(term t, uint32_t h, GlobalContext *global) -{ - UNUSED(global); - uint32_t n = (uint32_t) term_to_local_process_id(t); - while (n) { - h = h * LARGE_PRIME_PORT + (n & 0xFF); - n >>= 8; - } - return h * LARGE_PRIME_PORT; -} - -static uint32_t hash_external_pid(term t, uint32_t h, GlobalContext *global) -{ - UNUSED(global); - uint32_t n = (uint32_t) term_get_external_pid_process_id(t); - while (n) { - h = h * LARGE_PRIME_PID + (n & 0xFF); - n >>= 8; - } - return h * LARGE_PRIME_PID; -} - -static uint32_t hash_external_port(term t, uint32_t h, GlobalContext *global) -{ - UNUSED(global); - uint32_t n = (uint32_t) term_get_external_port_number(t); - while (n) { - h = h * LARGE_PRIME_PORT + (n & 0xFF); - n >>= 8; - } - return h * LARGE_PRIME_PORT; -} - -static uint32_t hash_local_reference(term t, uint32_t h, GlobalContext *global) -{ - UNUSED(global); - uint64_t n = term_to_ref_ticks(t); - while (n) { - h = h * LARGE_PRIME_REF + (n & 0xFF); - n >>= 8; - } - return h * LARGE_PRIME_REF; -} - -static uint32_t hash_external_reference(term t, uint32_t h, GlobalContext *global) -{ - UNUSED(global); - uint32_t l = term_get_external_reference_len(t); - const uint32_t *words = term_get_external_reference_words(t); - for (uint32_t i = 0; i < l; i++) { - uint32_t n = words[i]; - while (n) { - h = h * LARGE_PRIME_REF + (n & 0xFF); - n >>= 8; - } - } - return h * LARGE_PRIME_REF; -} - -static uint32_t hash_binary(term t, uint32_t h, GlobalContext *global) -{ - UNUSED(global); - size_t len = (size_t) term_binary_size(t); - uint8_t *data = (uint8_t *) term_binary_data(t); - for (size_t i = 0; i < len; ++i) { - h = h * LARGE_PRIME_BINARY + data[i]; - } - return h * LARGE_PRIME_BINARY; -} - -static uint32_t hash_term_incr(term t, uint32_t h, GlobalContext *global) -{ - if (term_is_atom(t)) { - return hash_atom(t, h, global); - } else if (term_is_any_integer(t)) { - return hash_integer(t, h, global); - } else if (term_is_float(t)) { - return hash_float(t, h, global); - } else if (term_is_local_pid(t)) { - return hash_local_pid(t, h, global); - } else if (term_is_external_pid(t)) { - return hash_external_pid(t, h, global); - } else if (term_is_local_port(t)) { - return hash_local_port(t, h, global); - } else if (term_is_external_port(t)) { - return hash_external_port(t, h, global); - } else if (term_is_local_reference(t)) { - return hash_local_reference(t, h, global); - } else if (term_is_external_reference(t)) { - return hash_external_reference(t, h, global); - } else if (term_is_binary(t)) { - return hash_binary(t, h, global); - } else if (term_is_tuple(t)) { - size_t arity = term_get_tuple_arity(t); - for (size_t i = 0; i < arity; ++i) { - term elt = term_get_tuple_element(t, (int) i); - h = h * LARGE_PRIME_TUPLE + hash_term_incr(elt, h, global); - } - return h * LARGE_PRIME_TUPLE; - } else if (term_is_list(t)) { - while (term_is_nonempty_list(t)) { - term elt = term_get_list_head(t); - h = h * LARGE_PRIME_LIST + hash_term_incr(elt, h, global); - t = term_get_list_tail(t); - if (term_is_nil(t)) { - h = h * LARGE_PRIME_LIST; - break; - } else if (!term_is_list(t)) { - h = h * LARGE_PRIME_LIST + hash_term_incr(t, h, global); - break; - } - } - return h * LARGE_PRIME_LIST; - } else if (term_is_map(t)) { - size_t size = term_get_map_size(t); - for (size_t i = 0; i < size; ++i) { - term key = term_get_map_key(t, (avm_uint_t) i); - h = h * LARGE_PRIME_MAP + hash_term_incr(key, h, global); - term value = term_get_map_value(t, (avm_uint_t) i); - h = h * LARGE_PRIME_MAP + hash_term_incr(value, h, global); - } - return h * LARGE_PRIME_MAP; - } else { - fprintf(stderr, "hash_term: unsupported term type\n"); - return h; - } -} - -static uint32_t hash_term(term t, GlobalContext *global) -{ - return hash_term_incr(t, LARGE_PRIME_INITIAL, global); -} diff --git a/src/libAtomVM/globalcontext.c b/src/libAtomVM/globalcontext.c index d00213284c..a9e7ec8f50 100644 --- a/src/libAtomVM/globalcontext.c +++ b/src/libAtomVM/globalcontext.c @@ -86,6 +86,7 @@ GlobalContext *globalcontext_new(void) synclist_init(&glb->select_events); ets_init(&glb->ets); + persistent_term_init(&glb->persistent_term); glb->last_process_id = 0; @@ -284,6 +285,7 @@ COLD_FUNC void globalcontext_destroy(GlobalContext *glb) synclist_destroy(&glb->select_events); ets_destroy(&glb->ets, glb); + persistent_term_destroy(&glb->persistent_term, glb); // Destroy refc binaries including resources // (this list should be empty if resources were properly refcounted) diff --git a/src/libAtomVM/globalcontext.h b/src/libAtomVM/globalcontext.h index eb7779e9d1..3044a6eb3b 100644 --- a/src/libAtomVM/globalcontext.h +++ b/src/libAtomVM/globalcontext.h @@ -36,6 +36,7 @@ #include "ets.h" #include "list.h" #include "mailbox.h" +#include "persistent_term.h" #include "smp.h" #include "synclist.h" #include "term.h" @@ -122,6 +123,7 @@ struct GlobalContext struct SyncList select_events; struct Ets ets; + struct PersistentTerm persistent_term; int32_t last_process_id; diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 886c66a922..8d30729ffe 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -53,6 +53,7 @@ #include "mailbox.h" #include "memory.h" #include "module.h" +#include "persistent_term.h" #include "platform_nifs.h" #include "port.h" #include "posix_nifs.h" @@ -236,6 +237,11 @@ static term nif_ets_update_counter(Context *ctx, int argc, term argv[]); static term nif_ets_take(Context *ctx, int argc, term argv[]); static term nif_ets_delete(Context *ctx, int argc, term argv[]); static term nif_ets_delete_object(Context *ctx, int argc, term argv[]); +static term nif_persistent_term_get(Context *ctx, int argc, term argv[]); +static term nif_persistent_term_put(Context *ctx, int argc, term argv[]); +static term nif_persistent_term_put_new(Context *ctx, int argc, term argv[]); +static term nif_persistent_term_erase(Context *ctx, int argc, term argv[]); +static term nif_persistent_term_info(Context *ctx, int argc, term argv[]); static term nif_erlang_pid_to_list(Context *ctx, int argc, term argv[]); static term nif_erlang_port_to_list(Context *ctx, int argc, term argv[]); static term nif_erlang_ref_to_list(Context *ctx, int argc, term argv[]); @@ -799,6 +805,31 @@ static const struct Nif ets_delete_object_nif = { .nif_ptr = nif_ets_delete_object }; +static const struct Nif persistent_term_get_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_persistent_term_get +}; + +static const struct Nif persistent_term_put_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_persistent_term_put +}; + +static const struct Nif persistent_term_put_new_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_persistent_term_put_new +}; + +static const struct Nif persistent_term_erase_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_persistent_term_erase +}; + +static const struct Nif persistent_term_info_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_persistent_term_info +}; + static const struct Nif atomvm_add_avm_pack_binary_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_atomvm_add_avm_pack_binary @@ -4489,6 +4520,137 @@ static term nif_ets_delete_object(Context *ctx, int argc, term argv[]) } } +static term nif_persistent_term_get(Context *ctx, int argc, term argv[]) +{ + PersistentTerm *persistent_term = &ctx->global->persistent_term; + + if (argc == 0) { + term ret = term_invalid_term(); + persistent_term_result_t result = persistent_term_get_all_maybe_gc(persistent_term, &ret, ctx); + switch (result) { + case PersistentTermOk: + return ret; + case PersistentTermAllocationError: + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + default: + UNREACHABLE(); + } + } + + term value = term_invalid_term(); + persistent_term_result_t result = persistent_term_get(persistent_term, argv[0], &value, ctx->global); + + switch (result) { + case PersistentTermOk: + return value; + case PersistentTermNotFound: + if (argc == 2) { + return argv[1]; + } + RAISE_ERROR(BADARG_ATOM); + case PersistentTermAllocationError: + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + default: + UNREACHABLE(); + } +} + +static term nif_persistent_term_put(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + persistent_term_result_t result = persistent_term_put( + &ctx->global->persistent_term, + argv[0], + argv[1], + false, + ctx->global); + + switch (result) { + case PersistentTermOk: + return OK_ATOM; + case PersistentTermAllocationError: + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + default: + UNREACHABLE(); + } +} + +static term nif_persistent_term_put_new(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + persistent_term_result_t result = persistent_term_put( + &ctx->global->persistent_term, + argv[0], + argv[1], + true, + ctx->global); + + switch (result) { + case PersistentTermOk: + return OK_ATOM; + case PersistentTermExists: + RAISE_ERROR(BADARG_ATOM); + case PersistentTermAllocationError: + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + default: + UNREACHABLE(); + } +} + +static term nif_persistent_term_erase(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + bool removed; + persistent_term_result_t result = persistent_term_erase( + &ctx->global->persistent_term, + argv[0], + &removed, + ctx->global); + + switch (result) { + case PersistentTermOk: + return removed ? TRUE_ATOM : FALSE_ATOM; + case PersistentTermAllocationError: + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + default: + UNREACHABLE(); + } +} + +static term nif_persistent_term_info(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + UNUSED(argv); + + size_t count; + size_t memory; + persistent_term_info(&ctx->global->persistent_term, &count, &memory); + + avm_int64_t count_i = (avm_int64_t) count; + avm_int64_t memory_i = (avm_int64_t) memory; + size_t heap_size = TERM_MAP_SIZE(2) + term_boxed_integer_size(count_i) + term_boxed_integer_size(memory_i); + if (UNLIKELY(memory_ensure_free_opt(ctx, heap_size, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + term map = term_alloc_map(2, &ctx->heap); + term_set_map_assoc( + map, + 0, + globalcontext_make_atom(ctx->global, ATOM_STR("\x5", "count")), + term_make_maybe_boxed_int64(count_i, &ctx->heap)); + term_set_map_assoc( + map, + 1, + MEMORY_ATOM, + term_make_maybe_boxed_int64(memory_i, &ctx->heap)); + + return map; +} + static term nif_erts_debug_flat_size(Context *ctx, int argc, term argv[]) { UNUSED(ctx); diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf index c7ce64cf7e..b84e45069e 100644 --- a/src/libAtomVM/nifs.gperf +++ b/src/libAtomVM/nifs.gperf @@ -176,6 +176,13 @@ ets:update_counter/4, &ets_update_counter_nif ets:delete/1, &ets_delete_nif ets:delete/2, &ets_delete_nif ets:delete_object/2, &ets_delete_object_nif +persistent_term:erase/1, &persistent_term_erase_nif +persistent_term:get/0, &persistent_term_get_nif +persistent_term:get/1, &persistent_term_get_nif +persistent_term:get/2, &persistent_term_get_nif +persistent_term:info/0, &persistent_term_info_nif +persistent_term:put/2, &persistent_term_put_nif +persistent_term:put_new/2, &persistent_term_put_new_nif atomvm:add_avm_pack_binary/2, &atomvm_add_avm_pack_binary_nif atomvm:add_avm_pack_file/2, &atomvm_add_avm_pack_file_nif atomvm:close_avm_pack/2, &atomvm_close_avm_pack_nif diff --git a/src/libAtomVM/persistent_term.c b/src/libAtomVM/persistent_term.c new file mode 100644 index 0000000000..7f35ddf371 --- /dev/null +++ b/src/libAtomVM/persistent_term.c @@ -0,0 +1,360 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2026 Peter M. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#include "persistent_term.h" + +#include +#include + +#include "context.h" +#include "globalcontext.h" +#include "memory.h" +#include "smp.h" +#include "term.h" +#include "term_hash.h" +#include "utils.h" + +#ifndef AVM_NO_SMP +#define SMP_RDLOCK(persistent_term) smp_spinlock_lock(&(persistent_term)->lock) +#define SMP_WRLOCK(persistent_term) smp_spinlock_lock(&(persistent_term)->lock) +#define SMP_UNLOCK(persistent_term) smp_spinlock_unlock(&(persistent_term)->lock) +#else +#define SMP_RDLOCK(persistent_term) UNUSED(persistent_term) +#define SMP_WRLOCK(persistent_term) UNUSED(persistent_term) +#define SMP_UNLOCK(persistent_term) UNUSED(persistent_term) +#endif + +struct PersistentTermEntry +{ + struct PersistentTermEntry *next; + term key; + term value; + Heap *heap; + size_t memory; +}; + +static persistent_term_result_t find_entry( + PersistentTerm *persistent_term, + term key, + struct PersistentTermEntry ***out_link, + struct PersistentTermEntry **out_entry, + GlobalContext *global); +static struct PersistentTermEntry *entry_new(term key, term value); +static void entry_destroy(struct PersistentTermEntry *entry, GlobalContext *global); +static void retire_entry(PersistentTerm *persistent_term, struct PersistentTermEntry *entry); +static bool term_is_equal(term a, term b, GlobalContext *global, persistent_term_result_t *result); + +void persistent_term_init(PersistentTerm *persistent_term) +{ + persistent_term->count = 0; + persistent_term->memory = 0; + persistent_term->retired_entries = NULL; + for (size_t i = 0; i < PERSISTENT_TERM_NUM_BUCKETS; i++) { + persistent_term->buckets[i] = NULL; + } + +#ifndef AVM_NO_SMP + smp_spinlock_init(&persistent_term->lock); +#endif +} + +void persistent_term_destroy(PersistentTerm *persistent_term, GlobalContext *global) +{ + SMP_WRLOCK(persistent_term); + for (size_t i = 0; i < PERSISTENT_TERM_NUM_BUCKETS; i++) { + struct PersistentTermEntry *entry = persistent_term->buckets[i]; + while (entry != NULL) { + struct PersistentTermEntry *next = entry->next; + entry_destroy(entry, global); + entry = next; + } + persistent_term->buckets[i] = NULL; + } + + struct PersistentTermEntry *entry = persistent_term->retired_entries; + while (entry != NULL) { + struct PersistentTermEntry *next = entry->next; + entry_destroy(entry, global); + entry = next; + } + persistent_term->retired_entries = NULL; + persistent_term->count = 0; + persistent_term->memory = 0; + SMP_UNLOCK(persistent_term); +} + +persistent_term_result_t persistent_term_put( + PersistentTerm *persistent_term, + term key, + term value, + bool put_new, + GlobalContext *global) +{ + SMP_WRLOCK(persistent_term); + + struct PersistentTermEntry **link; + struct PersistentTermEntry *entry; + persistent_term_result_t result = find_entry(persistent_term, key, &link, &entry, global); + if (UNLIKELY(result != PersistentTermOk)) { + SMP_UNLOCK(persistent_term); + return result; + } + + if (entry != NULL) { + bool equal = term_is_equal(entry->value, value, global, &result); + if (UNLIKELY(result != PersistentTermOk)) { + SMP_UNLOCK(persistent_term); + return result; + } + + if (equal) { + SMP_UNLOCK(persistent_term); + return PersistentTermOk; + } + + if (put_new) { + SMP_UNLOCK(persistent_term); + return PersistentTermExists; + } + } + + struct PersistentTermEntry *new_entry = entry_new(key, value); + if (IS_NULL_PTR(new_entry)) { + SMP_UNLOCK(persistent_term); + return PersistentTermAllocationError; + } + + if (entry == NULL) { + uint32_t idx = term_hash(key, global) % PERSISTENT_TERM_NUM_BUCKETS; + new_entry->next = persistent_term->buckets[idx]; + persistent_term->buckets[idx] = new_entry; + persistent_term->count++; + persistent_term->memory += new_entry->memory; + } else { + new_entry->next = entry->next; + *link = new_entry; + persistent_term->memory += new_entry->memory; + retire_entry(persistent_term, entry); + } + + SMP_UNLOCK(persistent_term); + return PersistentTermOk; +} + +persistent_term_result_t persistent_term_get( + PersistentTerm *persistent_term, + term key, + term *value, + GlobalContext *global) +{ + assert(value != NULL); + + SMP_RDLOCK(persistent_term); + + struct PersistentTermEntry *entry; + persistent_term_result_t result = find_entry(persistent_term, key, NULL, &entry, global); + if (UNLIKELY(result != PersistentTermOk)) { + SMP_UNLOCK(persistent_term); + return result; + } + + if (entry == NULL) { + SMP_UNLOCK(persistent_term); + return PersistentTermNotFound; + } + + *value = entry->value; + SMP_UNLOCK(persistent_term); + return PersistentTermOk; +} + +persistent_term_result_t persistent_term_erase( + PersistentTerm *persistent_term, + term key, + bool *removed, + GlobalContext *global) +{ + assert(removed != NULL); + + *removed = false; + + SMP_WRLOCK(persistent_term); + + struct PersistentTermEntry **link; + struct PersistentTermEntry *entry; + persistent_term_result_t result = find_entry(persistent_term, key, &link, &entry, global); + if (UNLIKELY(result != PersistentTermOk)) { + SMP_UNLOCK(persistent_term); + return result; + } + + if (entry == NULL) { + SMP_UNLOCK(persistent_term); + return PersistentTermOk; + } + + *link = entry->next; + persistent_term->count--; + retire_entry(persistent_term, entry); + + *removed = true; + SMP_UNLOCK(persistent_term); + return PersistentTermOk; +} + +persistent_term_result_t persistent_term_get_all_maybe_gc( + PersistentTerm *persistent_term, + term *ret, + Context *ctx) +{ + assert(ret != NULL); + + SMP_RDLOCK(persistent_term); + + size_t needed = 0; + for (size_t i = 0; i < PERSISTENT_TERM_NUM_BUCKETS; i++) { + for (struct PersistentTermEntry *entry = persistent_term->buckets[i]; entry != NULL; entry = entry->next) { + needed += CONS_SIZE + TUPLE_SIZE(2) + memory_estimate_usage(entry->key); + } + } + + if (UNLIKELY(memory_ensure_free_opt(ctx, needed, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { + SMP_UNLOCK(persistent_term); + return PersistentTermAllocationError; + } + + term list = term_nil(); + for (size_t i = 0; i < PERSISTENT_TERM_NUM_BUCKETS; i++) { + for (struct PersistentTermEntry *entry = persistent_term->buckets[i]; entry != NULL; entry = entry->next) { + term tuple = term_alloc_tuple(2, &ctx->heap); + term key = memory_copy_term_tree(&ctx->heap, entry->key); + term_put_tuple_element(tuple, 0, key); + term_put_tuple_element(tuple, 1, entry->value); + list = term_list_prepend(tuple, list, &ctx->heap); + } + } + + *ret = list; + SMP_UNLOCK(persistent_term); + return PersistentTermOk; +} + +void persistent_term_info(PersistentTerm *persistent_term, size_t *count, size_t *memory) +{ + assert(count != NULL); + assert(memory != NULL); + + SMP_RDLOCK(persistent_term); + *count = persistent_term->count; + *memory = persistent_term->memory; + SMP_UNLOCK(persistent_term); +} + +static persistent_term_result_t find_entry( + PersistentTerm *persistent_term, + term key, + struct PersistentTermEntry ***out_link, + struct PersistentTermEntry **out_entry, + GlobalContext *global) +{ + assert(out_entry != NULL); + + *out_entry = NULL; + + uint32_t idx = term_hash(key, global) % PERSISTENT_TERM_NUM_BUCKETS; + struct PersistentTermEntry **link = &persistent_term->buckets[idx]; + while (*link != NULL) { + persistent_term_result_t result = PersistentTermOk; + bool equal = term_is_equal((*link)->key, key, global, &result); + if (UNLIKELY(result != PersistentTermOk)) { + return result; + } + + if (equal) { + if (out_link != NULL) { + *out_link = link; + } + *out_entry = *link; + return PersistentTermOk; + } + + link = &(*link)->next; + } + + if (out_link != NULL) { + *out_link = link; + } + return PersistentTermOk; +} + +static struct PersistentTermEntry *entry_new(term key, term value) +{ + struct PersistentTermEntry *entry = malloc(sizeof(struct PersistentTermEntry)); + if (IS_NULL_PTR(entry)) { + return NULL; + } + + Heap *heap = malloc(sizeof(Heap)); + if (IS_NULL_PTR(heap)) { + free(entry); + return NULL; + } + + size_t size = memory_estimate_usage(key) + memory_estimate_usage(value); + if (UNLIKELY(memory_init_heap(heap, size) != MEMORY_GC_OK)) { + free(heap); + free(entry); + return NULL; + } + + entry->key = memory_copy_term_tree(heap, key); + entry->value = memory_copy_term_tree(heap, value); + entry->heap = heap; + entry->memory = sizeof(struct PersistentTermEntry) + sizeof(Heap) + sizeof(HeapFragment) + + ((size_t) (heap->heap_ptr - heap->heap_start) * sizeof(term)); + entry->next = NULL; + + return entry; +} + +static void entry_destroy(struct PersistentTermEntry *entry, GlobalContext *global) +{ + memory_destroy_heap(entry->heap, global); + free(entry->heap); + free(entry); +} + +static void retire_entry(PersistentTerm *persistent_term, struct PersistentTermEntry *entry) +{ + entry->next = persistent_term->retired_entries; + persistent_term->retired_entries = entry; +} + +static bool term_is_equal(term a, term b, GlobalContext *global, persistent_term_result_t *result) +{ + TermCompareResult compare_result = term_compare(a, b, TermCompareExact, global); + if (UNLIKELY(compare_result == TermCompareMemoryAllocFail)) { + *result = PersistentTermAllocationError; + return false; + } + + *result = PersistentTermOk; + return compare_result == TermEquals; +} diff --git a/src/libAtomVM/persistent_term.h b/src/libAtomVM/persistent_term.h new file mode 100644 index 0000000000..e069db0a5e --- /dev/null +++ b/src/libAtomVM/persistent_term.h @@ -0,0 +1,92 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2026 Peter M. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef _PERSISTENT_TERM_H_ +#define _PERSISTENT_TERM_H_ + +#include +#include + +#include "smp.h" +#include "term_typedef.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PERSISTENT_TERM_NUM_BUCKETS 32 + +struct Context; +struct GlobalContext; +struct PersistentTermEntry; + +typedef enum +{ + PersistentTermOk, + PersistentTermNotFound, + PersistentTermExists, + PersistentTermAllocationError +} persistent_term_result_t; + +typedef struct PersistentTerm +{ + size_t count; + size_t memory; + struct PersistentTermEntry *buckets[PERSISTENT_TERM_NUM_BUCKETS]; + struct PersistentTermEntry *retired_entries; +#ifndef AVM_NO_SMP + SpinLock lock; +#endif +} PersistentTerm; + +void persistent_term_init(PersistentTerm *persistent_term); +void persistent_term_destroy(PersistentTerm *persistent_term, struct GlobalContext *global); + +persistent_term_result_t persistent_term_put( + PersistentTerm *persistent_term, + term key, + term value, + bool put_new, + struct GlobalContext *global); + +persistent_term_result_t persistent_term_get( + PersistentTerm *persistent_term, + term key, + term *value, + struct GlobalContext *global); + +persistent_term_result_t persistent_term_erase( + PersistentTerm *persistent_term, + term key, + bool *removed, + struct GlobalContext *global); + +persistent_term_result_t persistent_term_get_all_maybe_gc( + PersistentTerm *persistent_term, + term *ret, + struct Context *ctx); + +void persistent_term_info(PersistentTerm *persistent_term, size_t *count, size_t *memory); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/libAtomVM/term_hash.c b/src/libAtomVM/term_hash.c new file mode 100644 index 0000000000..a207103996 --- /dev/null +++ b/src/libAtomVM/term_hash.c @@ -0,0 +1,265 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2026 Peter M. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#include "term_hash.h" + +#include + +#include "atom_table.h" +#include "globalcontext.h" +#include "module.h" + +// +// hash function +// +// Conceptually similar to (but not identical to) the `make_hash` algorithm described in +// https://github.com/erlang/otp/blob/cbd1378ee1fde835e55614bac9290b281bafe49a/erts/emulator/beam/utils.c#L644 +// +// Also described in character folding algorithm (PJW Hash) +// https://en.wikipedia.org/wiki/Hash_function#Character_folding +// +// TODO: implement erlang:phash2 using the OTP algorithm +// + +// some large (close to 2^24) primes taken from +// http://compoasso.free.fr/primelistweb/page/prime/liste_online_en.php + +#define LARGE_PRIME_INITIAL 16777259 +#define LARGE_PRIME_ATOM 16777643 +#define LARGE_PRIME_INTEGER 16777781 +#define LARGE_PRIME_FLOAT 16777973 +#define LARGE_PRIME_PID 16778147 +#define LARGE_PRIME_REF 16778441 +#define LARGE_PRIME_BINARY 16780483 +#define LARGE_PRIME_TUPLE 16778821 +#define LARGE_PRIME_LIST 16779179 +#define LARGE_PRIME_MAP 16779449 +#define LARGE_PRIME_PORT 16778077 +#define LARGE_PRIME_FUNCTION 16780621 + +static uint32_t hash_term_incr(term t, uint32_t h, GlobalContext *global); + +static uint32_t hash_uint32(uint32_t n, uint32_t h, uint32_t prime) +{ + while (n) { + h = h * prime + (n & 0xFF); + n >>= 8; + } + return h * prime; +} + +static uint32_t hash_uint64(uint64_t n, uint32_t h, uint32_t prime) +{ + while (n) { + h = h * prime + (n & 0xFF); + n >>= 8; + } + return h * prime; +} + +static uint32_t hash_atom(term t, uint32_t h, GlobalContext *global) +{ + size_t len; + const uint8_t *data = atom_table_get_atom_string(global->atom_table, term_to_atom_index(t), &len); + for (size_t i = 0; i < len; ++i) { + h = h * LARGE_PRIME_ATOM + data[i]; + } + return h * LARGE_PRIME_ATOM; +} + +static uint32_t hash_integer(term t, uint32_t h, GlobalContext *global) +{ + UNUSED(global); + return hash_uint64((uint64_t) term_maybe_unbox_int64(t), h, LARGE_PRIME_INTEGER); +} + +static uint32_t hash_float(term t, uint32_t h, GlobalContext *global) +{ + UNUSED(global); + avm_float_t f = term_to_float(t); + // Normalize -0.0 to +0.0 so that hash is consistent with term_compare (-0.0 == +0.0). + if (f == 0.0) { + f = 0.0; + } + uint8_t *data = (uint8_t *) &f; + size_t len = sizeof(avm_float_t); + for (size_t i = 0; i < len; ++i) { + h = h * LARGE_PRIME_FLOAT + data[i]; + } + return h * LARGE_PRIME_FLOAT; +} + +static uint32_t hash_local_pid(term t, uint32_t h, GlobalContext *global) +{ + UNUSED(global); + return hash_uint32((uint32_t) term_to_local_process_id(t), h, LARGE_PRIME_PID); +} + +static uint32_t hash_local_port(term t, uint32_t h, GlobalContext *global) +{ + UNUSED(global); + return hash_uint32((uint32_t) term_to_local_process_id(t), h, LARGE_PRIME_PORT); +} + +static uint32_t hash_external_pid(term t, uint32_t h, GlobalContext *global) +{ + UNUSED(global); + return hash_uint32((uint32_t) term_get_external_pid_process_id(t), h, LARGE_PRIME_PID); +} + +static uint32_t hash_external_port(term t, uint32_t h, GlobalContext *global) +{ + UNUSED(global); + return hash_uint32((uint32_t) term_get_external_port_number(t), h, LARGE_PRIME_PORT); +} + +static uint32_t hash_local_reference(term t, uint32_t h, GlobalContext *global) +{ + UNUSED(global); + return hash_uint64(term_to_ref_ticks(t), h, LARGE_PRIME_REF); +} + +static uint32_t hash_external_reference(term t, uint32_t h, GlobalContext *global) +{ + UNUSED(global); + uint32_t l = term_get_external_reference_len(t); + const uint32_t *words = term_get_external_reference_words(t); + for (uint32_t i = 0; i < l; i++) { + uint32_t n = words[i]; + while (n) { + h = h * LARGE_PRIME_REF + (n & 0xFF); + n >>= 8; + } + } + return h * LARGE_PRIME_REF; +} + +static uint32_t hash_binary(term t, uint32_t h, GlobalContext *global) +{ + UNUSED(global); + size_t len = (size_t) term_binary_size(t); + uint8_t *data = (uint8_t *) term_binary_data(t); + for (size_t i = 0; i < len; ++i) { + h = h * LARGE_PRIME_BINARY + data[i]; + } + return h * LARGE_PRIME_BINARY; +} + +static uint32_t hash_function(term t, uint32_t h, GlobalContext *global) +{ + const term *boxed_value = term_to_const_term_ptr(t); + + if (term_is_external_fun(t)) { + h = h * LARGE_PRIME_FUNCTION + 1; + h = h * LARGE_PRIME_FUNCTION + hash_term_incr(boxed_value[1], h, global); + h = h * LARGE_PRIME_FUNCTION + hash_term_incr(boxed_value[2], h, global); + h = h * LARGE_PRIME_FUNCTION + hash_term_incr(boxed_value[3], h, global); + return h * LARGE_PRIME_FUNCTION; + } + + h = h * LARGE_PRIME_FUNCTION; + + Module *fun_module = (Module *) boxed_value[1]; + term module_name_atom = module_get_name(fun_module); + uint32_t fun_index = term_to_int32(boxed_value[2]); + uint32_t arity; + uint32_t old_index; + uint32_t old_uniq; + module_get_fun_arity_old_index_uniq(fun_module, fun_index, &arity, &old_index, &old_uniq); + UNUSED(arity); + UNUSED(old_index); + + h = h * LARGE_PRIME_FUNCTION + hash_term_incr(module_name_atom, h, global); + h = hash_uint32(fun_index, h, LARGE_PRIME_FUNCTION); + h = hash_uint32(old_uniq, h, LARGE_PRIME_FUNCTION); + + uint32_t num_freeze = module_get_fun_freeze(fun_module, fun_index); + h = hash_uint32(num_freeze, h, LARGE_PRIME_FUNCTION); + for (uint32_t i = 0; i < num_freeze; i++) { + h = h * LARGE_PRIME_FUNCTION + hash_term_incr(boxed_value[i + 3], h, global); + } + + return h * LARGE_PRIME_FUNCTION; +} + +static uint32_t hash_term_incr(term t, uint32_t h, GlobalContext *global) +{ + if (term_is_atom(t)) { + return hash_atom(t, h, global); + } else if (term_is_any_integer(t)) { + return hash_integer(t, h, global); + } else if (term_is_float(t)) { + return hash_float(t, h, global); + } else if (term_is_local_pid(t)) { + return hash_local_pid(t, h, global); + } else if (term_is_external_pid(t)) { + return hash_external_pid(t, h, global); + } else if (term_is_local_port(t)) { + return hash_local_port(t, h, global); + } else if (term_is_external_port(t)) { + return hash_external_port(t, h, global); + } else if (term_is_local_reference(t)) { + return hash_local_reference(t, h, global); + } else if (term_is_external_reference(t)) { + return hash_external_reference(t, h, global); + } else if (term_is_function(t)) { + return hash_function(t, h, global); + } else if (term_is_binary(t)) { + return hash_binary(t, h, global); + } else if (term_is_tuple(t)) { + size_t arity = term_get_tuple_arity(t); + for (size_t i = 0; i < arity; ++i) { + term elt = term_get_tuple_element(t, (int) i); + h = h * LARGE_PRIME_TUPLE + hash_term_incr(elt, h, global); + } + return h * LARGE_PRIME_TUPLE; + } else if (term_is_list(t)) { + while (term_is_nonempty_list(t)) { + term elt = term_get_list_head(t); + h = h * LARGE_PRIME_LIST + hash_term_incr(elt, h, global); + t = term_get_list_tail(t); + if (term_is_nil(t)) { + h = h * LARGE_PRIME_LIST; + break; + } else if (!term_is_list(t)) { + h = h * LARGE_PRIME_LIST + hash_term_incr(t, h, global); + break; + } + } + return h * LARGE_PRIME_LIST; + } else if (term_is_map(t)) { + size_t size = term_get_map_size(t); + for (size_t i = 0; i < size; ++i) { + term key = term_get_map_key(t, (avm_uint_t) i); + h = h * LARGE_PRIME_MAP + hash_term_incr(key, h, global); + term value = term_get_map_value(t, (avm_uint_t) i); + h = h * LARGE_PRIME_MAP + hash_term_incr(value, h, global); + } + return h * LARGE_PRIME_MAP; + } else { + fprintf(stderr, "term_hash: unsupported term type\n"); + return h; + } +} + +uint32_t term_hash(term t, GlobalContext *global) +{ + return hash_term_incr(t, LARGE_PRIME_INITIAL, global); +} diff --git a/src/libAtomVM/term_hash.h b/src/libAtomVM/term_hash.h new file mode 100644 index 0000000000..c693e00fbe --- /dev/null +++ b/src/libAtomVM/term_hash.h @@ -0,0 +1,51 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2026 Peter M. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef _TERM_HASH_H_ +#define _TERM_HASH_H_ + +#include + +#include "term.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct GlobalContext; + +/** + * @brief Calculate a stable hash for an Erlang term. + * + * @details This hash is intended for AtomVM internal hash tables. It is + * conceptually similar to the term hashing used by OTP, but it is not part of + * the Erlang API and must not be exposed as erlang:phash2. + * + * @param t term to hash + * @param global global context used to resolve atom strings + * @return 32-bit hash value + */ +uint32_t term_hash(term t, struct GlobalContext *global); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt index a9aa020c64..4777427aa2 100644 --- a/tests/erlang_tests/CMakeLists.txt +++ b/tests/erlang_tests/CMakeLists.txt @@ -643,6 +643,7 @@ compile_erlang(maps_nifs) compile_erlang(test_raw_raise) compile_erlang(test_ets) +compile_erlang(test_persistent_term) compile_erlang(test_node) compile_erlang(test_code_server_nifs) @@ -1207,6 +1208,7 @@ set(erlang_test_beams test_raw_raise.beam test_ets.beam + test_persistent_term.beam test_node.beam test_list_to_bitstring.beam diff --git a/tests/erlang_tests/test_persistent_term.erl b/tests/erlang_tests/test_persistent_term.erl new file mode 100644 index 0000000000..48ee7aa5c1 --- /dev/null +++ b/tests/erlang_tests/test_persistent_term.erl @@ -0,0 +1,134 @@ +% +% This file is part of AtomVM. +% +% Copyright 2026 Peter M. +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(test_persistent_term). + +-export([start/0]). + +start() -> + cleanup(), + ok = test_get_put_erase(), + % put_new/2 is OTP 28.4+ only + case erlang:function_exported(persistent_term, put_new, 2) of + true -> ok = test_put_new(); + false -> ok + end, + ok = test_complex_keys(), + ok = test_fun_keys(), + ok = test_info_and_get_all(), + cleanup(), + 0. + +test_get_put_erase() -> + Key = {?MODULE, basic}, + assert_badarg(fun() -> persistent_term:get(Key) end), + default = persistent_term:get(Key, default), + false = persistent_term:erase(Key), + + Big = <<1:80/unit:8>>, + OldValue = {old, [Big, <<"small">>]}, + ok = persistent_term:put(Key, OldValue), + OldValue = persistent_term:get(Key), + + Retained = persistent_term:get(Key), + ok = persistent_term:put(Key, {new, value}), + {new, value} = persistent_term:get(Key), + OldValue = Retained, + + true = persistent_term:erase(Key), + false = persistent_term:erase(Key), + missing = persistent_term:get(Key, missing), + OldValue = Retained, + ok. + +test_put_new() -> + Key = {?MODULE, put_new}, + ok = persistent_term:put_new(Key, first), + ok = persistent_term:put_new(Key, first), + first = persistent_term:get(Key), + assert_badarg(fun() -> persistent_term:put_new(Key, second) end), + first = persistent_term:get(Key), + true = persistent_term:erase(Key), + ok. + +test_complex_keys() -> + Key = {{?MODULE, complex}, [self(), <<"bin">>], #{a => 1, <<"b">> => {c, d}}}, + ok = persistent_term:put(Key, complex_value), + complex_value = persistent_term:get(Key), + true = persistent_term:erase(Key), + complex_value = persistent_term:get(Key, complex_value), + ok. + +test_fun_keys() -> + LocalFun = fun identity/1, + ExternalFun = fun erlang:length/1, + Key = {?MODULE, fun_key, LocalFun, ExternalFun}, + EquivalentKey = {?MODULE, fun_key, fun identity/1, fun erlang:length/1}, + ok = persistent_term:put(Key, fun_value), + fun_value = persistent_term:get(EquivalentKey), + true = persistent_term:erase(EquivalentKey), + missing = persistent_term:get(Key, missing), + ok. + +test_info_and_get_all() -> + Key1 = {?MODULE, info_1}, + Key2 = {?MODULE, info_2}, + #{count := Count0} = persistent_term:info(), + + ok = persistent_term:put(Key1, value1), + ok = persistent_term:put(Key2, {value2, [1, 2, 3]}), + + #{count := Count1, memory := Memory1} = persistent_term:info(), + true = Count1 >= Count0 + 2, + true = is_integer(Memory1), + true = Memory1 > 0, + + ok = persistent_term:put(Key1, {value1, replaced}), + #{count := Count2, memory := Memory2} = persistent_term:info(), + Count1 = Count2, + true = Memory2 > Memory1, + + All = persistent_term:get(), + true = lists:member({Key1, {value1, replaced}}, All), + true = lists:member({Key2, {value2, [1, 2, 3]}}, All), + + true = persistent_term:erase(Key1), + #{count := Count3, memory := Memory3} = persistent_term:info(), + Count3 = Count2 - 1, + %% AtomVM retains retired entries on a dead list until VM shutdown so + %% previously returned terms stay valid without a global GC pass; + %% OTP may reclaim that memory immediately. + true = (Memory3 =< Memory2), + true = persistent_term:erase(Key2), + ok. + +cleanup() -> + _ = persistent_term:erase({?MODULE, basic}), + _ = persistent_term:erase({?MODULE, put_new}), + _ = persistent_term:erase({?MODULE, info_1}), + _ = persistent_term:erase({?MODULE, info_2}), + ok. + +assert_badarg(Fun) -> + {'EXIT', {badarg, _}} = (catch Fun()), + ok. + +identity(Value) -> + Value. diff --git a/tests/test.c b/tests/test.c index 2aae87a981..121192cd65 100644 --- a/tests/test.c +++ b/tests/test.c @@ -626,6 +626,7 @@ struct Test tests[] = { TEST_CASE(test_raw_raise), TEST_CASE(test_ets), + TEST_CASE(test_persistent_term), TEST_CASE(test_node), TEST_CASE(bigint), From 5ab948f8692b12870abc435abb25607c34a8c149 Mon Sep 17 00:00:00 2001 From: Peter M Date: Sun, 31 May 2026 13:54:33 +0200 Subject: [PATCH 2/3] Fix CodeQL warning in 64-bit term hash folding Use an explicit 64-bit intermediate when folding bytes from uint64_t values into the 32-bit term hash accumulator. This avoids CodeQL's "multiplication result converted to larger type" warning while preserving the intended uint32_t wraparound behavior of the hash. Signed-off-by: Peter M --- src/libAtomVM/term_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libAtomVM/term_hash.c b/src/libAtomVM/term_hash.c index a207103996..a2149426d8 100644 --- a/src/libAtomVM/term_hash.c +++ b/src/libAtomVM/term_hash.c @@ -68,7 +68,7 @@ static uint32_t hash_uint32(uint32_t n, uint32_t h, uint32_t prime) static uint32_t hash_uint64(uint64_t n, uint32_t h, uint32_t prime) { while (n) { - h = h * prime + (n & 0xFF); + h = (uint32_t) ((uint64_t) h * prime + (n & 0xFF)); n >>= 8; } return h * prime; From fda1420e90bfdee80862a18d47ad8c93672b53e7 Mon Sep 17 00:00:00 2001 From: Peter M Date: Tue, 2 Jun 2026 07:50:45 +0200 Subject: [PATCH 3/3] Refine persistent_term locking and get_all ownership Use the project RWLock API directly for persistent_term instead of wrapping spinlock calls in local read/write lock macros. Also reduce the persistent_term critical sections by precomputing the bucket hash and allocating replacement entries before taking the write lock. Finally, stop copying keys in persistent_term:get/0. The returned list now allocates only the outer list and tuple cells on the caller heap, while both keys and values reference the persistent-term entry heap directly. This matches get/1 semantics, since retired entries are retained so previously returned persistent terms remain valid. Add a regression assertion that a persistent_term:get/0 result remains valid after the corresponding entries are erased. Signed-off-by: Peter M --- src/libAtomVM/persistent_term.c | 100 ++++++++++---------- src/libAtomVM/persistent_term.h | 2 +- tests/erlang_tests/test_persistent_term.erl | 2 + 3 files changed, 54 insertions(+), 50 deletions(-) diff --git a/src/libAtomVM/persistent_term.c b/src/libAtomVM/persistent_term.c index 7f35ddf371..355becc9a0 100644 --- a/src/libAtomVM/persistent_term.c +++ b/src/libAtomVM/persistent_term.c @@ -31,16 +31,6 @@ #include "term_hash.h" #include "utils.h" -#ifndef AVM_NO_SMP -#define SMP_RDLOCK(persistent_term) smp_spinlock_lock(&(persistent_term)->lock) -#define SMP_WRLOCK(persistent_term) smp_spinlock_lock(&(persistent_term)->lock) -#define SMP_UNLOCK(persistent_term) smp_spinlock_unlock(&(persistent_term)->lock) -#else -#define SMP_RDLOCK(persistent_term) UNUSED(persistent_term) -#define SMP_WRLOCK(persistent_term) UNUSED(persistent_term) -#define SMP_UNLOCK(persistent_term) UNUSED(persistent_term) -#endif - struct PersistentTermEntry { struct PersistentTermEntry *next; @@ -52,6 +42,7 @@ struct PersistentTermEntry static persistent_term_result_t find_entry( PersistentTerm *persistent_term, + uint32_t bucket_index, term key, struct PersistentTermEntry ***out_link, struct PersistentTermEntry **out_entry, @@ -71,13 +62,13 @@ void persistent_term_init(PersistentTerm *persistent_term) } #ifndef AVM_NO_SMP - smp_spinlock_init(&persistent_term->lock); + persistent_term->lock = smp_rwlock_create(); #endif } void persistent_term_destroy(PersistentTerm *persistent_term, GlobalContext *global) { - SMP_WRLOCK(persistent_term); + SMP_RWLOCK_WRLOCK(persistent_term->lock); for (size_t i = 0; i < PERSISTENT_TERM_NUM_BUCKETS; i++) { struct PersistentTermEntry *entry = persistent_term->buckets[i]; while (entry != NULL) { @@ -97,7 +88,11 @@ void persistent_term_destroy(PersistentTerm *persistent_term, GlobalContext *glo persistent_term->retired_entries = NULL; persistent_term->count = 0; persistent_term->memory = 0; - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); +#ifndef AVM_NO_SMP + smp_rwlock_destroy(persistent_term->lock); + persistent_term->lock = NULL; +#endif } persistent_term_result_t persistent_term_put( @@ -107,44 +102,48 @@ persistent_term_result_t persistent_term_put( bool put_new, GlobalContext *global) { - SMP_WRLOCK(persistent_term); + uint32_t bucket_index = term_hash(key, global) % PERSISTENT_TERM_NUM_BUCKETS; + + struct PersistentTermEntry *new_entry = entry_new(key, value); + if (IS_NULL_PTR(new_entry)) { + return PersistentTermAllocationError; + } + + SMP_RWLOCK_WRLOCK(persistent_term->lock); struct PersistentTermEntry **link; struct PersistentTermEntry *entry; - persistent_term_result_t result = find_entry(persistent_term, key, &link, &entry, global); + persistent_term_result_t result = find_entry(persistent_term, bucket_index, key, &link, &entry, global); if (UNLIKELY(result != PersistentTermOk)) { - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); + entry_destroy(new_entry, global); return result; } if (entry != NULL) { bool equal = term_is_equal(entry->value, value, global, &result); if (UNLIKELY(result != PersistentTermOk)) { - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); + entry_destroy(new_entry, global); return result; } if (equal) { - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); + entry_destroy(new_entry, global); return PersistentTermOk; } if (put_new) { - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); + entry_destroy(new_entry, global); return PersistentTermExists; } } - struct PersistentTermEntry *new_entry = entry_new(key, value); - if (IS_NULL_PTR(new_entry)) { - SMP_UNLOCK(persistent_term); - return PersistentTermAllocationError; - } - if (entry == NULL) { - uint32_t idx = term_hash(key, global) % PERSISTENT_TERM_NUM_BUCKETS; - new_entry->next = persistent_term->buckets[idx]; - persistent_term->buckets[idx] = new_entry; + new_entry->next = persistent_term->buckets[bucket_index]; + persistent_term->buckets[bucket_index] = new_entry; persistent_term->count++; persistent_term->memory += new_entry->memory; } else { @@ -154,7 +153,7 @@ persistent_term_result_t persistent_term_put( retire_entry(persistent_term, entry); } - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); return PersistentTermOk; } @@ -166,22 +165,24 @@ persistent_term_result_t persistent_term_get( { assert(value != NULL); - SMP_RDLOCK(persistent_term); + uint32_t bucket_index = term_hash(key, global) % PERSISTENT_TERM_NUM_BUCKETS; + + SMP_RWLOCK_RDLOCK(persistent_term->lock); struct PersistentTermEntry *entry; - persistent_term_result_t result = find_entry(persistent_term, key, NULL, &entry, global); + persistent_term_result_t result = find_entry(persistent_term, bucket_index, key, NULL, &entry, global); if (UNLIKELY(result != PersistentTermOk)) { - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); return result; } if (entry == NULL) { - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); return PersistentTermNotFound; } *value = entry->value; - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); return PersistentTermOk; } @@ -195,18 +196,20 @@ persistent_term_result_t persistent_term_erase( *removed = false; - SMP_WRLOCK(persistent_term); + uint32_t bucket_index = term_hash(key, global) % PERSISTENT_TERM_NUM_BUCKETS; + + SMP_RWLOCK_WRLOCK(persistent_term->lock); struct PersistentTermEntry **link; struct PersistentTermEntry *entry; - persistent_term_result_t result = find_entry(persistent_term, key, &link, &entry, global); + persistent_term_result_t result = find_entry(persistent_term, bucket_index, key, &link, &entry, global); if (UNLIKELY(result != PersistentTermOk)) { - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); return result; } if (entry == NULL) { - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); return PersistentTermOk; } @@ -215,7 +218,7 @@ persistent_term_result_t persistent_term_erase( retire_entry(persistent_term, entry); *removed = true; - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); return PersistentTermOk; } @@ -226,17 +229,17 @@ persistent_term_result_t persistent_term_get_all_maybe_gc( { assert(ret != NULL); - SMP_RDLOCK(persistent_term); + SMP_RWLOCK_RDLOCK(persistent_term->lock); size_t needed = 0; for (size_t i = 0; i < PERSISTENT_TERM_NUM_BUCKETS; i++) { for (struct PersistentTermEntry *entry = persistent_term->buckets[i]; entry != NULL; entry = entry->next) { - needed += CONS_SIZE + TUPLE_SIZE(2) + memory_estimate_usage(entry->key); + needed += CONS_SIZE + TUPLE_SIZE(2); } } if (UNLIKELY(memory_ensure_free_opt(ctx, needed, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); return PersistentTermAllocationError; } @@ -244,15 +247,14 @@ persistent_term_result_t persistent_term_get_all_maybe_gc( for (size_t i = 0; i < PERSISTENT_TERM_NUM_BUCKETS; i++) { for (struct PersistentTermEntry *entry = persistent_term->buckets[i]; entry != NULL; entry = entry->next) { term tuple = term_alloc_tuple(2, &ctx->heap); - term key = memory_copy_term_tree(&ctx->heap, entry->key); - term_put_tuple_element(tuple, 0, key); + term_put_tuple_element(tuple, 0, entry->key); term_put_tuple_element(tuple, 1, entry->value); list = term_list_prepend(tuple, list, &ctx->heap); } } *ret = list; - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); return PersistentTermOk; } @@ -261,14 +263,15 @@ void persistent_term_info(PersistentTerm *persistent_term, size_t *count, size_t assert(count != NULL); assert(memory != NULL); - SMP_RDLOCK(persistent_term); + SMP_RWLOCK_RDLOCK(persistent_term->lock); *count = persistent_term->count; *memory = persistent_term->memory; - SMP_UNLOCK(persistent_term); + SMP_RWLOCK_UNLOCK(persistent_term->lock); } static persistent_term_result_t find_entry( PersistentTerm *persistent_term, + uint32_t bucket_index, term key, struct PersistentTermEntry ***out_link, struct PersistentTermEntry **out_entry, @@ -278,8 +281,7 @@ static persistent_term_result_t find_entry( *out_entry = NULL; - uint32_t idx = term_hash(key, global) % PERSISTENT_TERM_NUM_BUCKETS; - struct PersistentTermEntry **link = &persistent_term->buckets[idx]; + struct PersistentTermEntry **link = &persistent_term->buckets[bucket_index]; while (*link != NULL) { persistent_term_result_t result = PersistentTermOk; bool equal = term_is_equal((*link)->key, key, global, &result); diff --git a/src/libAtomVM/persistent_term.h b/src/libAtomVM/persistent_term.h index e069db0a5e..14e2b9fbd2 100644 --- a/src/libAtomVM/persistent_term.h +++ b/src/libAtomVM/persistent_term.h @@ -52,7 +52,7 @@ typedef struct PersistentTerm struct PersistentTermEntry *buckets[PERSISTENT_TERM_NUM_BUCKETS]; struct PersistentTermEntry *retired_entries; #ifndef AVM_NO_SMP - SpinLock lock; + RWLock *lock; #endif } PersistentTerm; diff --git a/tests/erlang_tests/test_persistent_term.erl b/tests/erlang_tests/test_persistent_term.erl index 48ee7aa5c1..f2f202c5cf 100644 --- a/tests/erlang_tests/test_persistent_term.erl +++ b/tests/erlang_tests/test_persistent_term.erl @@ -117,6 +117,8 @@ test_info_and_get_all() -> %% OTP may reclaim that memory immediately. true = (Memory3 =< Memory2), true = persistent_term:erase(Key2), + true = lists:member({Key1, {value1, replaced}}, All), + true = lists:member({Key2, {value2, [1, 2, 3]}}, All), ok. cleanup() ->