diff --git a/src/base/strings/BUILD.bazel b/src/base/strings/BUILD.bazel index 059e6ec9eb..383bf0e70b 100644 --- a/src/base/strings/BUILD.bazel +++ b/src/base/strings/BUILD.bazel @@ -75,6 +75,58 @@ mozc_cc_test( ], ) +mozc_cc_library( + name = "const_init_immutable_string", + hdrs = ["const_init_immutable_string.h"], + defines = mozc_select( + client = [], + default = ["MOZC_NO_ATOMIC_FLAG_WAIT"], + ), + deps = ["//base/strings/internal:const_init_string_helpers"] + mozc_select( + client = [], + default = [ + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/synchronization", + ], + ), +) + +mozc_cc_test( + name = "const_init_immutable_string_test", + size = "small", + srcs = ["const_init_immutable_string_test.cc"], + deps = [ + ":const_init_immutable_string", + "//testing:gunit_main", + ], +) + +mozc_cc_library( + name = "const_init_mutable_string", + hdrs = ["const_init_mutable_string.h"], + defines = mozc_select( + client = [], + default = ["MOZC_NO_ATOMIC_FLAG_WAIT"], + ), + deps = ["//base/strings/internal:const_init_string_helpers"] + mozc_select( + client = [], + default = [ + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/synchronization", + ], + ), +) + +mozc_cc_test( + name = "const_init_mutable_string_test", + size = "small", + srcs = ["const_init_mutable_string_test.cc"], + deps = [ + ":const_init_mutable_string", + "//testing:gunit_main", + ], +) + mozc_cc_library( name = "japanese", srcs = [ diff --git a/src/base/strings/const_init_immutable_string.h b/src/base/strings/const_init_immutable_string.h new file mode 100644 index 0000000000..fdb22d5dfe --- /dev/null +++ b/src/base/strings/const_init_immutable_string.h @@ -0,0 +1,235 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef MOZC_BASE_STRINGS_CONST_INIT_IMMUTABLE_STRING_H_ +#define MOZC_BASE_STRINGS_CONST_INIT_IMMUTABLE_STRING_H_ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(MOZC_NO_ATOMIC_FLAG_WAIT) +#include "absl/base/const_init.h" +#include "absl/synchronization/mutex.h" +#endif // defined(MOZC_NO_ATOMIC_FLAG_WAIT) + +#include "base/strings/internal/const_init_string_helpers.h" + +namespace mozc { + +// A utility class to deal with constant global strings whose value is known +// only at runtime. It has the following capabilities: +// +// 1. It allows the library users to lazily initialize the string by calling +// GetOrInit() only after it becomes ready, e.g. only after dependent +// modules are fully loaded. +// 2. It is thread-safe, meaning that multiple threads can call GetOrInit() +// concurrently without causing data race, with an assumption that the +// idempotent_initializer is thread-safe and *idempotent* (i.e., it always +// returns the same value when called multiple times). +// 3. It guarantees that the string is null-terminated. +// 4. It accepts the constinit keyword. +// +// Synchronization model: +// --------------------- +// The initializer is invoked without any class-held state. This avoids lock +// order inversion when the initializer triggers foreign code that acquires +// its own locks -- e.g. a LoadLibrary() call from within DllMain on Windows, +// which Microsoft documents as a classic deadlock pattern: +// +// https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-best-practices#deadlocks-caused-by-lock-order-inversion +// +// Racing threads may each invoke the initializer; per the IdempotentInitializer +// contract that is acceptable. +// +// Once the string is published it cannot be replaced. See +// ConstInitMutableString for a variant that additionally supports thread-safe +// Set(). +// +// Cavets on destruction: +// --------------------- +// * Without MOZC_NO_ATOMIC_FLAG_WAIT, this class is guaranteed to be +// trivially destructible and can be safely used in the global scope without +// worrying about destruction order at exit. +// * With MOZC_NO_ATOMIC_FLAG_WAIT (e.g. server-side configurations where +// std::atomic_flag::wait is not allowed), it falls back to absl::Mutex, +// which offers better integration with TSAN. In this case it is no longer +// trivially destructible, which means atexit-style destructions will happen +// for globally instantiated objects. As explained in +// absl/base/const_init.h, strictly speaking this can fall into undefined +// behavior and we are just relying on known toolchain-specific behaviors +// that are not guaranteed by the standard. +// +// Storage model: +// ------------- +// * Values that fit in fixed_array_size characters (including the terminating +// NUL) live in an inline array. +// * Larger values live in a heap buffer. When Set() replaces a heap-backed +// value, the prior heap allocation is freed before Set() returns. +// * The currently-installed heap buffer (if any) is intentionally leaked at +// process exit. +#if !defined(MOZC_NO_ATOMIC_FLAG_WAIT) + +// Trivially-destructible variant using `std::atomic_flag::wait`. +template +class ConstInitImmutableString { + public: + using StringT = std::basic_string; + using StringViewT = std::basic_string_view; + + // A data initializer that is guaranteed to return the same value no matter + // how many times it is called. It must also be reentrant and safe to call + // from multiple threads concurrently. + using IdempotentInitializer = std::add_pointer_t; + + ConstInitImmutableString() = delete; + ~ConstInitImmutableString() = default; + + consteval explicit ConstInitImmutableString(IdempotentInitializer init) + : idempotent_initializer_(init) {} + + [[nodiscard]] StringViewT GetOrInit() { + // Fast path: latch observed. Synchronization is on init_done_; the acquire + // on test() already publishes all writes prior to the publisher's + // release on init_done_.test_and_set(), so result_ptr_ can can be loaded + // with relaxed ordering. + if (init_done_.test(std::memory_order::acquire)) [[likely]] { + return StringViewT(result_ptr_.load(std::memory_order::relaxed), + result_size_); + } + // Invoke the initializer and stage any heap fallback *outside* of + // init_started_. Holding init_started_ across foreign code would introduce + // the Loader-Lock-style lock-order-inversion deadlock described above; + // staging outside also lets a throwing initializer on one thread leave the + // instance recoverable for other threads. + const StringT value = idempotent_initializer_(); + std::unique_ptr heap_fallback = + const_init_string_internal::StageHeapFallback(value, + std::size(value_)); + if (!init_started_.test_and_set(std::memory_order::acquire)) { + // Won the publish race. Commit the staged value. + CharT* dest = const_init_string_internal::CommitStagedValue( + value, heap_fallback, value_.data()); + result_size_ = value.size(); + result_ptr_.store(dest, std::memory_order::relaxed); + init_done_.test_and_set(std::memory_order::release); + init_done_.notify_all(); + return StringViewT(dest, value.size()); + } + // Lost the publish race. Block at the OS level until the winner publishes. + init_done_.wait(false, std::memory_order::acquire); + return StringViewT(result_ptr_.load(std::memory_order::relaxed), + result_size_); + } + + private: + // Hot fields: all three are touched on the fast path. Keep them together at + // the front so they share a cache line regardless of `fixed_array_size`. + std::atomic_flag init_done_ = {}; + std::atomic result_ptr_ = nullptr; + size_t result_size_ = 0; + // Cold fields: only the slow init path touches these. + const IdempotentInitializer idempotent_initializer_; + std::atomic_flag init_started_ = {}; + std::array value_ = {}; +}; + +static_assert( + std::is_trivially_destructible_v>); +static_assert( + std::is_trivially_destructible_v>); + +#else // !defined(MOZC_NO_ATOMIC_FLAG_WAIT) + +// absl::Mutex-based variant, which offers better integration with TSAN by +// giving up trivial destructibility and relying on toolchain-specific behaviors +// that are not guaranteed by the standard. +template +class ConstInitImmutableString { + public: + using StringT = std::basic_string; + using StringViewT = std::basic_string_view; + + // A data initializer that is guaranteed to return the same value no matter + // how many times it is called. It must also be reentrant and safe to call + // from multiple threads concurrently. + using IdempotentInitializer = std::add_pointer_t; + + ConstInitImmutableString() = delete; + ~ConstInitImmutableString() = default; + + consteval explicit ConstInitImmutableString(IdempotentInitializer init) + : idempotent_initializer_(init) {} + + [[nodiscard]] StringViewT GetOrInit() { + // Fast path: publication observed (no mutex_ involvement). + if (const CharT* p = result_ptr_.load(std::memory_order::acquire)) + [[likely]] { + return StringViewT(p, result_size_); + } + // Invoke the initializer and stage any heap fallback *outside* of mutex_. + // See the class comment for the Loader-Lock-style lock-order-inversion + // hazard this avoids. + const StringT value = idempotent_initializer_(); + std::unique_ptr heap_fallback = + const_init_string_internal::StageHeapFallback(value, + std::size(value_)); + absl::MutexLock l(mutex_); + if (const CharT* p = result_ptr_.load(std::memory_order::relaxed)) { + return StringViewT(p, result_size_); + } + CharT* dest = const_init_string_internal::CommitStagedValue( + value, heap_fallback, value_.data()); + result_size_ = value.size(); + result_ptr_.store(dest, std::memory_order::release); + return StringViewT(dest, value.size()); + } + + private: + // Hot fields: all two are touched on the fast path. Keep them together at + // the front so they share a cache line regardless of `fixed_array_size`. + std::atomic result_ptr_ = nullptr; + size_t result_size_ = 0; + // Cold fields: only the slow init path touches these. + const IdempotentInitializer idempotent_initializer_; + absl::Mutex mutex_{absl::kConstInit}; + std::array value_ = {}; +}; + +#endif // !defined(MOZC_NO_ATOMIC_FLAG_WAIT) + +} // namespace mozc + +#endif // MOZC_BASE_STRINGS_CONST_INIT_IMMUTABLE_STRING_H_ diff --git a/src/base/strings/const_init_immutable_string_test.cc b/src/base/strings/const_init_immutable_string_test.cc new file mode 100644 index 0000000000..55100a435b --- /dev/null +++ b/src/base/strings/const_init_immutable_string_test.cc @@ -0,0 +1,78 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/strings/const_init_immutable_string.h" + +#include +#include + +#include "testing/gunit.h" + +namespace mozc::strings { +namespace { + +constinit ConstInitImmutableString<256> g_value_simple_init( + []() -> std::string { return "Mozc"; }); + +TEST(ConstInitImmutableStringTest, SimpleInit) { + EXPECT_EQ(g_value_simple_init.GetOrInit(), "Mozc"); + EXPECT_EQ(g_value_simple_init.GetOrInit(), "Mozc"); +} + +TEST(ConstInitImmutableStringTest, NullTermination) { + const auto value = g_value_simple_init.GetOrInit(); + EXPECT_EQ(value.data()[value.size()], '\0'); +} + +constexpr std::string_view kLongValue = + "this string is longer than the inline buffer"; + +constinit ConstInitImmutableString<8> g_value_overflow([]() -> std::string { + return std::string("this string is longer than the inline buffer"); +}); + +TEST(ConstInitImmutableStringTest, HeapFallback) { + const auto value = g_value_overflow.GetOrInit(); + EXPECT_EQ(value, kLongValue); + EXPECT_EQ(value.data()[value.size()], '\0'); + // Repeated calls return the same pointer (stable storage). + EXPECT_EQ(g_value_overflow.GetOrInit().data(), value.data()); +} + +constinit ConstInitImmutableString<256, wchar_t> g_value_wide( + []() -> std::wstring { return L"WideMozc"; }); + +TEST(ConstInitImmutableStringTest, WideChar) { + EXPECT_EQ(g_value_wide.GetOrInit(), std::wstring_view(L"WideMozc")); + const auto value = g_value_wide.GetOrInit(); + EXPECT_EQ(value.data()[value.size()], L'\0'); +} + +} // namespace +} // namespace mozc::strings diff --git a/src/base/strings/const_init_mutable_string.h b/src/base/strings/const_init_mutable_string.h new file mode 100644 index 0000000000..2d6499018f --- /dev/null +++ b/src/base/strings/const_init_mutable_string.h @@ -0,0 +1,162 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef MOZC_BASE_STRINGS_CONST_INIT_MUTABLE_STRING_H_ +#define MOZC_BASE_STRINGS_CONST_INIT_MUTABLE_STRING_H_ + +#include +#include +#include +#include +#include +#include + +#if !defined(MOZC_NO_ATOMIC_FLAG_WAIT) +#include +#include +#else // !defined(MOZC_NO_ATOMIC_FLAG_WAIT) +#include "absl/base/const_init.h" +#include "absl/synchronization/mutex.h" +#endif // !defined(MOZC_NO_ATOMIC_FLAG_WAIT) + +#include "base/strings/internal/const_init_string_helpers.h" + +namespace mozc { + +// A variant of ConstInitImmutableString that additionally supports thread-safe +// Set(). The same synchronization model, caveats on destruction, and storage +// model apply -- see the comments in ConstInitImmutableString for details. +// +// Semantics: +// --------- +// * Get() returns a snapshot of the most recent Set() value, or an empty +// string if Set() has never been called. Empty thus doubles as the +// "never set" sentinel; callers that need lazy default behaviour should +// compute the default themselves on observing empty and call Set() to +// publish it. +// * Set(value) atomically replaces the stored value. +template +class ConstInitMutableString { + public: + using StringT = std::basic_string; + using StringViewT = std::basic_string_view; + + consteval ConstInitMutableString() noexcept = default; + ~ConstInitMutableString() = default; + + void Set(StringViewT value); + StringT Get(); + + private: +#if !defined(MOZC_NO_ATOMIC_FLAG_WAIT) + // Inlined BasicLockable mutex used to serialize Set() and Get(). Trivially + // destructible and `constexpr`-default-constructible (via + // `std::atomic_flag`'s C++20 default ctor) so it composes with the + // `consteval` ctor above. + class CommittingMutex { + public: + void lock() noexcept { + while (f_.test_and_set(std::memory_order::acquire)) { + // Block until another thread clears the flag. The acquire on the next + // test_and_set() synchronizes with the releasing unlock(), so the + // wait-load itself can be relaxed. + f_.wait(true, std::memory_order::relaxed); + } + } + void unlock() noexcept { + f_.clear(std::memory_order::release); + f_.notify_one(); + } + + private: + // std::atomic_flag deletes copy/move, so this class inherits that. + std::atomic_flag f_ = {}; + }; + CommittingMutex committing_; + using LockGuard = std::lock_guard; +#else // !defined(MOZC_NO_ATOMIC_FLAG_WAIT) + absl::Mutex committing_{absl::kConstInit}; + using LockGuard = absl::MutexLock; +#endif // !defined(MOZC_NO_ATOMIC_FLAG_WAIT) + + std::atomic initialized_ = {}; + CharT* current_ptr_ = nullptr; + size_t current_size_ = 0; + std::array value_ = {}; +}; + +template +void ConstInitMutableString::Set(StringViewT value) { + std::unique_ptr heap_fallback = + const_init_string_internal::StageHeapFallback(value, + std::size(value_)); + + std::unique_ptr old_heap; + { + LockGuard l(committing_); + if (current_ptr_ != nullptr && current_ptr_ != value_.data()) { + old_heap.reset(current_ptr_); + } + current_ptr_ = const_init_string_internal::CommitStagedValue( + value, heap_fallback, value_.data()); + current_size_ = value.size(); + initialized_.store(true, std::memory_order::release); + } +} + +template +auto ConstInitMutableString::Get() -> StringT { + // Lock-free fast exit when nothing has ever been set. + if (!initialized_.load(std::memory_order::acquire)) { + return StringT(); + } + // Main-path: acquire the lock to synchronize with Set() and return a snapshot + // of the current value. + LockGuard l(committing_); + return StringT(current_ptr_, current_size_); +} + +#if !defined(MOZC_NO_ATOMIC_FLAG_WAIT) +// Verify the trivial destructibility contract for both supported CharT +// instantiations so misuse (e.g. accidentally adding a non-trivial member) is +// caught at compile time even if no instance is constructed. Only enforced in +// the atomic_flag arm; the absl::Mutex arm intentionally carries the mutex +// destructor at exit (see the class comment). +static_assert( + std::is_trivially_destructible_v>); +static_assert( + std::is_trivially_destructible_v>); +#endif // !defined(MOZC_NO_ATOMIC_FLAG_WAIT) + +} // namespace mozc + +#endif // MOZC_BASE_STRINGS_CONST_INIT_MUTABLE_STRING_H_ diff --git a/src/base/strings/const_init_mutable_string_test.cc b/src/base/strings/const_init_mutable_string_test.cc new file mode 100644 index 0000000000..9668d4e754 --- /dev/null +++ b/src/base/strings/const_init_mutable_string_test.cc @@ -0,0 +1,82 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/strings/const_init_mutable_string.h" + +#include + +#include "testing/gunit.h" + +namespace mozc::strings { +namespace { + +constinit ConstInitMutableString<256> g_value_no_init; + +TEST(ConstInitMutableStringTest, EmptyByDefault) { + EXPECT_EQ(g_value_no_init.Get(), ""); +} + +constinit ConstInitMutableString<256> g_value_set_get; + +TEST(ConstInitMutableStringTest, SetThenGet) { + g_value_set_get.Set("hello"); + EXPECT_EQ(g_value_set_get.Get(), "hello"); + g_value_set_get.Set("world"); + EXPECT_EQ(g_value_set_get.Get(), "world"); +} + +constinit ConstInitMutableString<8> g_value_overflow; + +TEST(ConstInitMutableStringTest, HeapFallbackOnSet) { + const std::string long_value = "this string is longer than the inline buffer"; + g_value_overflow.Set(long_value); + EXPECT_EQ(g_value_overflow.Get(), long_value); + // Replacing a heap-backed value with another heap-backed value must free + // the prior allocation (validated under ASan); behaviorally we just check + // that the new value is observable. + const std::string longer_value = long_value + " and then some"; + g_value_overflow.Set(longer_value); + EXPECT_EQ(g_value_overflow.Get(), longer_value); + // Replacing a heap-backed value with one that fits inline. + g_value_overflow.Set("short"); + EXPECT_EQ(g_value_overflow.Get(), "short"); +} + +constinit ConstInitMutableString<256, wchar_t> g_value_wide; + +TEST(ConstInitMutableStringTest, WideChar) { + EXPECT_EQ(g_value_wide.Get(), std::wstring()); + g_value_wide.Set(L"WideMozc"); + EXPECT_EQ(g_value_wide.Get(), std::wstring(L"WideMozc")); + g_value_wide.Set(L"WideOverride"); + EXPECT_EQ(g_value_wide.Get(), std::wstring(L"WideOverride")); +} + +} // namespace +} // namespace mozc::strings diff --git a/src/base/strings/internal/BUILD.bazel b/src/base/strings/internal/BUILD.bazel index fdbfb831db..97ab6cf400 100644 --- a/src/base/strings/internal/BUILD.bazel +++ b/src/base/strings/internal/BUILD.bazel @@ -82,6 +82,12 @@ mozc_cc_binary( ], ) +mozc_cc_library( + name = "const_init_string_helpers", + srcs = ["const_init_string_helpers.cc"], + hdrs = ["const_init_string_helpers.h"], +) + mozc_cc_library( name = "utf8_internal", srcs = ["utf8_internal.cc"], diff --git a/src/base/strings/internal/const_init_string_helpers.cc b/src/base/strings/internal/const_init_string_helpers.cc new file mode 100644 index 0000000000..4889d772bf --- /dev/null +++ b/src/base/strings/internal/const_init_string_helpers.cc @@ -0,0 +1,79 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "base/strings/internal/const_init_string_helpers.h" + +#include +#include +#include +#include + +namespace mozc::const_init_string_internal { + +template +std::unique_ptr StageHeapFallback(std::basic_string_view value, + size_t inline_capacity) { + const size_t size_with_null = value.size() + 1; + if (size_with_null <= inline_capacity) { + return nullptr; + } + // `make_unique_for_overwrite` (default-init) over `make_unique` + // (value-init) avoids zero-filling the buffer before `std::copy_n` + // overwrites it. + auto heap = std::make_unique_for_overwrite(size_with_null); + std::copy_n(value.data(), value.size(), heap.get()); + heap[value.size()] = CharT(0); + return heap; +} + +template +CharT* CommitStagedValue(std::basic_string_view value, + std::unique_ptr& heap_fallback, + CharT* inline_buffer) { + if (heap_fallback) { + return heap_fallback.release(); + } + std::copy_n(value.data(), value.size(), inline_buffer); + inline_buffer[value.size()] = CharT(0); + return inline_buffer; +} + +// Explicit instantiations: only `char` and `wchar_t` are supported. +template std::unique_ptr StageHeapFallback( + std::basic_string_view, size_t); +template std::unique_ptr StageHeapFallback( + std::basic_string_view, size_t); + +template char* CommitStagedValue(std::basic_string_view, + std::unique_ptr&, char*); +template wchar_t* CommitStagedValue(std::basic_string_view, + std::unique_ptr&, + wchar_t*); + +} // namespace mozc::const_init_string_internal diff --git a/src/base/strings/internal/const_init_string_helpers.h b/src/base/strings/internal/const_init_string_helpers.h new file mode 100644 index 0000000000..5c44a0faad --- /dev/null +++ b/src/base/strings/internal/const_init_string_helpers.h @@ -0,0 +1,72 @@ +// Copyright 2010-2021, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef MOZC_BASE_STRINGS_INTERNAL_CONST_INIT_STRING_HELPERS_H_ +#define MOZC_BASE_STRINGS_INTERNAL_CONST_INIT_STRING_HELPERS_H_ + +#include +#include +#include +#include + +namespace mozc::const_init_string_internal { + +// The character types supported by the const-init string family. Used to +// constrain template parameters in the public class headers and in the +// helper templates below. +template +concept SupportedChar = std::same_as || std::same_as; + +// `StageHeapFallback` and `CommitStagedValue` are declarations only; +// definitions and explicit instantiations for `char` and `wchar_t` +// live in const_init_string_helpers.cc. Instantiating with any other +// character type is a link error by design. + +// Stages a copy of `value` for publication. If `value.size() + 1` +// characters (including a NUL terminator) fit in `inline_capacity`, +// returns a null pointer and the caller is expected to commit to its +// own inline buffer; otherwise returns a fresh, NUL-terminated heap +// copy. +template +std::unique_ptr StageHeapFallback(std::basic_string_view value, + size_t inline_capacity); + +// Commits the staged value to its destination: if `heap_fallback` is +// non-null its pointer is released and returned; otherwise `value` is +// copied into `inline_buffer` (which the caller must have sized to +// accommodate `value.size() + 1` characters; see `StageHeapFallback`) +// and a pointer to it is returned. The result is always NUL-terminated. +template +CharT* CommitStagedValue(std::basic_string_view value, + std::unique_ptr& heap_fallback, + CharT* inline_buffer); + +} // namespace mozc::const_init_string_internal + +#endif // MOZC_BASE_STRINGS_INTERNAL_CONST_INIT_STRING_HELPERS_H_