From a5da7963b41ad89c7f2aa7789cbd1dbdb2dea0af Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Fri, 5 Dec 2025 15:42:52 +0800 Subject: [PATCH 01/17] feat: enable ggml-hexagon backend for linux-arm64 --- .github/workflows/build-release.yml | 21 ++++++- .gitignore | 5 ++ CMakeLists.txt | 24 +++++++- cmake/aarch64-linux-gnu.toolchain.cmake | 13 ++++ lib/binding.ts | 2 +- package.json | 1 + .../README.md | 16 +++++ .../package.json | 53 ++++++++++++++++ .../README.md | 16 +++++ .../package.json | 53 ++++++++++++++++ scripts/build-linux.sh | 52 ++++++++++++++++ scripts/build-windows.ps1 | 28 +++++++++ scripts/prepare-linux.sh | 42 ++++++++++++- scripts/prepare-windows.ps1 | 54 ++++++++++++++++ scripts/whisper.cpp.patch | 61 +++++++++++++++++++ tsconfig.json | 2 +- 16 files changed, 437 insertions(+), 6 deletions(-) create mode 100644 cmake/aarch64-linux-gnu.toolchain.cmake create mode 100644 packages/node-whisper-linux-arm64-snapdragon/README.md create mode 100644 packages/node-whisper-linux-arm64-snapdragon/package.json create mode 100644 packages/node-whisper-win32-arm64-snapdragon/README.md create mode 100644 packages/node-whisper-win32-arm64-snapdragon/package.json diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 2ee3f0d..5017241 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -30,6 +30,10 @@ on: NPM_TOKEN: required: true +env: + HEXAGON_SDK_VERSION: '6.4.0.2' + OPENCL_VERSION: '2025.07.22' + jobs: build: runs-on: ${{ matrix.os }} @@ -113,6 +117,19 @@ jobs: target: vulkan package: node-whisper-win32-arm64-vulkan toolchain: mingw-clang + # Linux arm64 Snapdragon (cross-compiled on x86_64) + - os: ubuntu-24.04 + arch: arm64 + target: snapdragon + package: node-whisper-linux-arm64-snapdragon + cross-compile: true + # TODO: uncomment this when Windows arm64 Snapdragon is supported + # # Windows arm64 Snapdragon + # - os: windows-latest + # arch: arm64 + # target: snapdragon + # package: node-whisper-win32-arm64-snapdragon + # toolchain: mingw-clang steps: - name: Run this job? id: run @@ -186,7 +203,9 @@ jobs: uses: actions/upload-artifact@v4 with: name: bin-${{ matrix.package }} - path: packages/${{ matrix.package }} + path: | + build + packages/${{ matrix.package }} retention-days: ${{ inputs.artifacts-retention-days }} - name: Publish to NPM if: steps.run.outputs.should-run == 'true' && (github.event.inputs.publish == 'YES' || inputs.publish == 'YES') diff --git a/.gitignore b/.gitignore index b19428c..623c6f6 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,8 @@ tmp.sess *.node package-lock.json .cache +*.dll +*.so +*.dylib + +.python-version diff --git a/CMakeLists.txt b/CMakeLists.txt index eac81ec..cb28c5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,8 @@ else() endif() if (TO_PACKAGE) - set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/packages/node-whisper-${PLATFORM}-${ARCH}${VARIANT}) + set(PACKAGE_NAME "node-whisper-${PLATFORM}-${ARCH}${VARIANT}") + set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/packages/${PACKAGE_NAME}) else() set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/build/Release) endif() @@ -148,7 +149,7 @@ if (NOT MSVC AND CMAKE_SYSTEM_NAME STREQUAL "Windows") add_library(win_dynamic_load ${WIN_DYNAMIC_LOAD_SRC}) set_target_properties(win_dynamic_load PROPERTIES COMPILE_FLAGS "-Wno-implicit-function-declaration") - + unset(CMAKE_JS_SRC) unset(CMAKE_JS_LIB) unset(CMAKE_JS_NODELIB_DEF) @@ -159,6 +160,13 @@ if (NOT MSVC AND CMAKE_SYSTEM_NAME STREQUAL "Windows") set(CMAKE_JS_LIB win_dynamic_load) endif() +if (TO_PACKAGE AND GGML_HEXAGON) + set(NODE_RPATH "node_modules/@fugood/${PACKAGE_NAME}") + set(ELECTRON_ASAR_RPATH "resources/app.asar.unpacked/node_modules/@fugood/${PACKAGE_NAME}") + set(ELECTRON_RES_RPATH "resources/node_modules/@fugood/${PACKAGE_NAME}") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-rpath,${NODE_RPATH} -Wl,-rpath,${ELECTRON_ASAR_RPATH} -Wl,-rpath,${ELECTRON_RES_RPATH}") +endif() + add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC}) set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node") target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} whisper ggml ${CMAKE_THREAD_LIBS_INIT}) @@ -208,3 +216,15 @@ if (GGML_CLBLAST AND TO_PACKAGE) ) endif() endif() + +if (GGML_HEXAGON) + get_target_property(HTP_LIBS_DIR ggml-hexagon BINARY_DIR) + add_custom_command( + TARGET copy_assets + COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v73.so ${PLATFORM_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v75.so ${PLATFORM_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v79.so ${PLATFORM_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v81.so ${PLATFORM_BINARY_DIR} + COMMENT "Copying HTP libraries to bin folder" + ) +endif() diff --git a/cmake/aarch64-linux-gnu.toolchain.cmake b/cmake/aarch64-linux-gnu.toolchain.cmake new file mode 100644 index 0000000..43e7576 --- /dev/null +++ b/cmake/aarch64-linux-gnu.toolchain.cmake @@ -0,0 +1,13 @@ +SET(CMAKE_SYSTEM_NAME Linux) +SET(CMAKE_SYSTEM_PROCESSOR aarch64) + +set(ARCH_PREFIX aarch64-linux-gnu) + +SET(CMAKE_C_COMPILER ${ARCH_PREFIX}-gcc) +SET(CMAKE_CXX_COMPILER ${ARCH_PREFIX}-g++) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +SET(CROSS_COMPILE TRUE) diff --git a/lib/binding.ts b/lib/binding.ts index b916eb2..376b08a 100644 --- a/lib/binding.ts +++ b/lib/binding.ts @@ -140,7 +140,7 @@ export interface Module { WhisperVadContext: WhisperVadContext } -export type LibVariant = 'default' | 'vulkan' | 'cuda' +export type LibVariant = 'default' | 'vulkan' | 'cuda' | 'snapdragon' const getPlatformPackageName = (variant?: LibVariant): string => { const platform = process.platform diff --git a/package.json b/package.json index 0dbe60f..7f828f5 100644 --- a/package.json +++ b/package.json @@ -57,6 +57,7 @@ "@fugood/node-whisper-linux-arm64": "1.0.8", "@fugood/node-whisper-linux-arm64-vulkan": "1.0.8", "@fugood/node-whisper-linux-arm64-cuda": "1.0.8", + "@fugood/node-whisper-linux-arm64-snapdragon": "1.0.8", "@fugood/node-whisper-win32-x64": "1.0.8", "@fugood/node-whisper-win32-x64-vulkan": "1.0.8", "@fugood/node-whisper-win32-x64-cuda": "1.0.8", diff --git a/packages/node-whisper-linux-arm64-snapdragon/README.md b/packages/node-whisper-linux-arm64-snapdragon/README.md new file mode 100644 index 0000000..e973652 --- /dev/null +++ b/packages/node-whisper-linux-arm64-snapdragon/README.md @@ -0,0 +1,16 @@ +# @fugood/node-whisper-linux-arm64-snapdragon + +Native module for whisper.node targeting linux-arm64-snapdragon. + +This package contains the pre-compiled native module for the specified platform and architecture with Qualcomm Snapdragon OpenCL and Hexagon support. + +## Installation + +This package is typically installed automatically as a dependency of `@fugood/whisper.node`. + +## Platform Support + +- **OS**: linux +- **Architecture**: arm64 +- **Variant**: snapdragon +- **Backends**: OpenCL, Hexagon NPU diff --git a/packages/node-whisper-linux-arm64-snapdragon/package.json b/packages/node-whisper-linux-arm64-snapdragon/package.json new file mode 100644 index 0000000..ea46e68 --- /dev/null +++ b/packages/node-whisper-linux-arm64-snapdragon/package.json @@ -0,0 +1,53 @@ +{ + "name": "@fugood/node-whisper-linux-arm64-snapdragon", + "version": "1.0.8", + "description": "Native module for An another Node binding of whisper.cpp (linux-arm64-snapdragon)", + "main": "index.node", + "os": [ + "linux" + ], + "cpu": [ + "arm64" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/mybigday/whisper.node.git" + }, + "keywords": [ + "whisper", + "speech-recognition", + "audio", + "transcription", + "voice-activity-detection", + "vad", + "cpp", + "node-addon", + "whisper.cpp", + "linux", + "arm64", + "native", + "qualcomm", + "snapdragon", + "opencl", + "hexagon" + ], + "authors": [ + "ggml / whisper.cpp contributors", + "Hans ", + "Jhen " + ], + "license": "MIT", + "bugs": { + "url": "https://github.com/mybigday/whisper.node/issues" + }, + "homepage": "https://github.com/mybigday/whisper.node#readme", + "publishConfig": { + "registry": "https://registry.npmjs.org", + "access": "public" + }, + "files": [ + "index.node", + "*.so" + ], + "variant": "snapdragon" +} diff --git a/packages/node-whisper-win32-arm64-snapdragon/README.md b/packages/node-whisper-win32-arm64-snapdragon/README.md new file mode 100644 index 0000000..7409e23 --- /dev/null +++ b/packages/node-whisper-win32-arm64-snapdragon/README.md @@ -0,0 +1,16 @@ +# @fugood/node-whisper-win32-arm64-snapdragon + +Native module for whisper.node targeting win32-arm64-snapdragon. + +This package contains the pre-compiled native module for the specified platform and architecture with Qualcomm Snapdragon OpenCL and Hexagon support. + +## Installation + +This package is typically installed automatically as a dependency of `@fugood/whisper.node`. + +## Platform Support + +- **OS**: win32 +- **Architecture**: arm64 +- **Variant**: snapdragon +- **Backends**: OpenCL, Hexagon NPU diff --git a/packages/node-whisper-win32-arm64-snapdragon/package.json b/packages/node-whisper-win32-arm64-snapdragon/package.json new file mode 100644 index 0000000..d173d2e --- /dev/null +++ b/packages/node-whisper-win32-arm64-snapdragon/package.json @@ -0,0 +1,53 @@ +{ + "name": "@fugood/node-whisper-win32-arm64-snapdragon", + "version": "1.0.8", + "description": "Native module for An another Node binding of whisper.cpp (win32-arm64-snapdragon)", + "main": "index.node", + "os": [ + "win32" + ], + "cpu": [ + "arm64" + ], + "repository": { + "type": "git", + "url": "git+https://github.com/mybigday/whisper.node.git" + }, + "keywords": [ + "whisper", + "speech-recognition", + "audio", + "transcription", + "voice-activity-detection", + "vad", + "cpp", + "node-addon", + "whisper.cpp", + "win32", + "arm64", + "native", + "qualcomm", + "snapdragon", + "opencl", + "hexagon" + ], + "authors": [ + "ggml / whisper.cpp contributors", + "Hans ", + "Jhen " + ], + "license": "MIT", + "bugs": { + "url": "https://github.com/mybigday/whisper.node/issues" + }, + "homepage": "https://github.com/mybigday/whisper.node#readme", + "publishConfig": { + "registry": "https://registry.npmjs.org", + "access": "public" + }, + "files": [ + "index.node", + "*.dll" + ], + "variant": "snapdragon" +} diff --git a/scripts/build-linux.sh b/scripts/build-linux.sh index 01eeb8f..e68114d 100755 --- a/scripts/build-linux.sh +++ b/scripts/build-linux.sh @@ -7,6 +7,8 @@ set -e ARCH=${ARCH:-$(uname -m)} TARGET=${TARGET:-"default"} +HEXAGON_SDK_VERSION=${HEXAGON_SDK_VERSION:-"6.4.0.2"} + while [[ "$#" -gt 0 ]]; do case $1 in -a|--arch) ARCH="$2"; shift ;; @@ -64,4 +66,54 @@ else --CDVARIANT=cuda \ --CDCMAKE_CUDA_ARCHITECTURES=87 # > Jetson Orin series fi + + # snapdragon + if [ $TARGET == "all" ] || [ $TARGET == "snapdragon" ]; then + if [ $ARCH != "arm64" ]; then + echo "Snapdragon is only supported on arm64" >&2 + exit 1 + fi + + source externals/Hexagon_SDK/Hexagon_SDK/$HEXAGON_SDK_VERSION/setup_sdk_env.source + + ARGS=( + -a $ARCH \ + --CDTO_PACKAGE=ON \ + --CDCLANG_USE_GOMP=ON \ + --CDGGML_NATIVE=OFF \ + --CDGGML_OPENMP=0 \ + # --CDGGML_OPENCL=1 \ + # --CDGGML_OPENCL_SMALL_ALLOC=ON \ + # --CDGGML_OPENCL_USE_ADRENO_KERNELS=ON \ + # --CDGGML_OPENCL_EMBED_KERNELS=ON \ + --CDGGML_HEXAGON=1 \ + --CDHEXAGON_SDK_ROOT="$HEXAGON_SDK_ROOT" \ + --CDPREBUILT_LIB_DIR=UbuntuARM_aarch64 \ + --CDVARIANT=snapdragon \ + ) + + # Check if cross-compilation is needed + if [ $(uname -m) == "x86_64" ] && [ $ARCH == "arm64" ]; then + echo "Cross-compiling for arm64 using GCC..." + + ARGS+=( --CDCMAKE_TOOLCHAIN_FILE="$(realpath cmake/aarch64-linux-gnu.toolchain.cmake)" ) + + # # Try to find OpenCL library for arm64 + # OPENCL_LIB_PATH="" + # if [ -f "externals/opencl-arm64/lib/libOpenCL.so" ]; then + # OPENCL_LIB_PATH="$(realpath externals/opencl-arm64/lib/libOpenCL.so)" + # fi + + # # Build with toolchain + # if [ -n "$OPENCL_LIB_PATH" ]; then + # ARGS+=( + # --CDOpenCL_INCLUDE_DIR="$(realpath externals/OpenCL-Headers)" \ + # --CDOpenCL_LIBRARY="$OPENCL_LIB_PATH" + # ) + # fi + else + ARGS=( -C "${ARGS[@]}" ) + fi + npx cmake-js rebuild "${ARGS[@]}" + fi fi diff --git a/scripts/build-windows.ps1 b/scripts/build-windows.ps1 index 69b6025..ef24cbb 100644 --- a/scripts/build-windows.ps1 +++ b/scripts/build-windows.ps1 @@ -11,6 +11,11 @@ $env:CMAKE_BUILD_PARALLEL_LEVEL = [Environment]::ProcessorCount $nativeArch = [System.Runtime.InteropServices.RuntimeInformation]::ProcessArchitecture +$hexagonSdkVersion = $env:HEXAGON_SDK_VERSION +if ($hexagonSdkVersion -eq $null) { + $hexagonSdkVersion = "6.4.0.2" +} + if ($arch -eq "native") { if ($nativeArch -eq "Arm64") { $arch = "arm64" @@ -89,3 +94,26 @@ if ($target -eq "all" -or $target -eq "cuda") { throw "build failed" } } + +# Snapdragon + +if ($target -eq "all" -or $target -eq "snapdragon") { + . "externals/Hexagon_SDK/Hexagon_SDK/$hexagonSdkVersion/setup_sdk_env.ps1" + + # # Set OpenCL paths (headers are in OpenCL-Headers, lib is in OpenCL-SDK) + # $openclIncludePath = Resolve-Path "externals/OpenCL-Headers" + # $openclLibPath = Resolve-Path "externals/OpenCL-SDK/lib/OpenCL.lib" + + npx cmake-js rebuild -C -a $arch $cmakeArgs ` + --CDVARIANT=snapdragon ` + --CDGGML_OPENMP=0 ` + # --CDGGML_OPENCL=1 ` + --CDGGML_HEXAGON=1 ` + --CDHEXAGON_SDK_ROOT="$HEXAGON_SDK_ROOT" ` + --CDPREBUILT_LIB_DIR=windows_aarch64 + # --CDOpenCL_LIBRARY="$openclLibPath" ` + # --CDOpenCL_INCLUDE_DIR="$openclIncludePath" + if ($LASTEXITCODE -ne 0) { + throw "build failed" + } +} diff --git a/scripts/prepare-linux.sh b/scripts/prepare-linux.sh index 18eb9cd..47dccb1 100755 --- a/scripts/prepare-linux.sh +++ b/scripts/prepare-linux.sh @@ -15,6 +15,9 @@ export DEBIAN_FRONTEND=noninteractive ARCH=${ARCH:-$(uname -m)} TARGET=${TARGET:-"default"} +OPENCL_VERSION=${OPENCL_VERSION:-"2024.10.24"} +HEXAGON_SDK_VERSION=${HEXAGON_SDK_VERSION:-"6.4.0.2"} + while [[ "$#" -gt 0 ]]; do case $1 in -a|--arch) ARCH="$2"; shift ;; @@ -25,4 +28,41 @@ while [[ "$#" -gt 0 ]]; do done run_as_root apt-get update -run_as_root apt-get install -qy lsb-release wget llvm clang lld cmake ninja-build libomp-dev ccache + +if [ $TARGET == "snapdragon" ]; then + run_as_root apt-get install -qy lsb-release wget llvm clang lld cmake ninja-build libomp-dev ccache unzip gcc-aarch64-linux-gnu g++-aarch64-linux-gnu binutils-aarch64-linux-gnu + + # if [ ! -f "externals/opencl-arm64/lib/libOpenCL.so" ]; then + # mkdir -p externals/opencl-arm64 + # git clone --depth 1 --branch v$OPENCL_VERSION https://github.com/KhronosGroup/OpenCL-Headers.git externals/OpenCL-Headers + # git clone --depth 1 --branch v$OPENCL_VERSION https://github.com/KhronosGroup/OpenCL-ICD-Loader.git externals/OpenCL-ICD-Loader + + # # Calculate absolute paths before changing directory + # TOOLCHAIN_FILE="$(realpath cmake/aarch64-linux-gnu.toolchain.cmake)" + # HEADERS_DIR="$(realpath externals/OpenCL-Headers)" + # INSTALL_DIR="$(realpath externals/opencl-arm64)" + + # cd externals/OpenCL-ICD-Loader + # cmake -S . -B build-arm64 \ + # -DCMAKE_TOOLCHAIN_FILE="$TOOLCHAIN_FILE" \ + # -DOPENCL_ICD_LOADER_HEADERS_DIR="$HEADERS_DIR" \ + # -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ + # -DCMAKE_BUILD_TYPE=Release + # cmake --build build-arm64 + # cmake --install build-arm64 + # cd ../.. + # fi + + # Download and extract Hexagon SDK + if [ ! -d "externals/Hexagon_SDK" ]; then + echo "Downloading Hexagon SDK..." + mkdir -p externals + wget -O externals/Hexagon_SDK_lnx.zip https://softwarecenter.qualcomm.com/api/download/software/sdks/Hexagon_SDK/Linux/Debian/$HEXAGON_SDK_VERSION/Hexagon_SDK_lnx.zip + echo "Extracting Hexagon SDK..." + unzip -q externals/Hexagon_SDK_lnx.zip -d externals/Hexagon_SDK + fi + + source externals/Hexagon_SDK/Hexagon_SDK/$HEXAGON_SDK_VERSION/setup_sdk_env.source +else + run_as_root apt-get install -qy lsb-release wget llvm clang lld cmake ninja-build libomp-dev ccache +fi diff --git a/scripts/prepare-windows.ps1 b/scripts/prepare-windows.ps1 index c15ac5e..4c2e642 100644 --- a/scripts/prepare-windows.ps1 +++ b/scripts/prepare-windows.ps1 @@ -8,6 +8,16 @@ $ErrorActionPreference='Stop' $nativeArch = [System.Runtime.InteropServices.RuntimeInformation]::ProcessArchitecture +$openclVersion = $env:OPENCL_VERSION +if ($openclVersion -eq $null) { + $openclVersion = "2024.10.24" +} + +$hexagonSdkVersion = $env:HEXAGON_SDK_VERSION +if ($hexagonSdkVersion -eq $null) { + $hexagonSdkVersion = "6.4.0.2" +} + if ($arch -eq "native") { if ($nativeArch -eq "Arm64") { $arch = "arm64" @@ -51,3 +61,47 @@ if ($toolchain -eq "mingw-clang") { choco install ninja -y } + +if ($target -eq "snapdragon") { + # Download Hexagon SDK + $sdkPath = "externals/Hexagon_SDK" + if (-Not (Test-Path $sdkPath)) { + Write-Host "Downloading Hexagon SDK..." + New-Item -ItemType Directory -Force -Path "externals" | Out-Null + Invoke-WebRequest -Uri "https://softwarecenter.qualcomm.com/api/download/software/sdks/Hexagon_SDK/Windows/$hexagonSdkVersion/Hexagon_SDK_WinNT.zip" -OutFile "externals/Hexagon_SDK_WinNT.zip" + Write-Host "Extracting Hexagon SDK..." + Expand-Archive -Path "externals/Hexagon_SDK_WinNT.zip" -DestinationPath "externals/Hexagon_SDK" -Force + } + + . "externals/Hexagon_SDK/Hexagon_SDK/$hexagonSdkVersion/setup_sdk_env.ps1" + + # # Download OpenCL SDK + # $openclPath = "externals/OpenCL-SDK" + # if (-Not (Test-Path $openclPath)) { + # Write-Host "Downloading OpenCL SDK..." + # New-Item -ItemType Directory -Force -Path "externals" | Out-Null + # New-Item -ItemType Directory -Force -Path "externals/OpenCL-SDK" | Out-Null + + # # Clone OpenCL-Headers + # git clone --depth 1 --branch v$openclVersion https://github.com/KhronosGroup/OpenCL-Headers.git externals/OpenCL-Headers + + # # Clone OpenCL-ICD-Loader + # git clone --depth 1 --branch v$openclVersion https://github.com/KhronosGroup/OpenCL-ICD-Loader.git externals/OpenCL-ICD-Loader + + # # Build OpenCL-ICD-Loader for ARM64 + # Write-Host "Building OpenCL ICD Loader for ARM64..." + # cmake -S externals/OpenCL-ICD-Loader -B externals/OpenCL-ICD-Loader/build ` + # -A ARM64 ` + # -DOPENCL_ICD_LOADER_HEADERS_DIR="$(Resolve-Path 'externals/OpenCL-Headers')" ` + # -DCMAKE_INSTALL_PREFIX="$(Resolve-Path 'externals/OpenCL-SDK')" + # cmake --build externals/OpenCL-ICD-Loader/build --config Release + # cmake --install externals/OpenCL-ICD-Loader/build --config Release + # } + + # $env:OpenCL_INCLUDE_DIR = "$(Resolve-Path 'externals/OpenCL-Headers')" + # $env:OpenCL_LIBRARY = "$(Resolve-Path 'externals/OpenCL-SDK/lib/OpenCL.lib')" + # if ($env:GITHUB_ENV -ne $null) { + # Add-Content -Path $env:GITHUB_ENV -Value "OpenCL_INCLUDE_DIR=$env:OpenCL_INCLUDE_DIR" + # Add-Content -Path $env:GITHUB_ENV -Value "OpenCL_LIBRARY=$env:OpenCL_LIBRARY" + # } +} diff --git a/scripts/whisper.cpp.patch b/scripts/whisper.cpp.patch index ec34ae4..6221a16 100644 --- a/scripts/whisper.cpp.patch +++ b/scripts/whisper.cpp.patch @@ -11,6 +11,67 @@ index e52e050a..c1000c16 100644 else() check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E) if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") +diff --git a/whisper.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/whisper.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +index cabd301a..f1eb0f56 100644 +--- a/whisper.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp ++++ b/whisper.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +@@ -3481,11 +3481,25 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev + GGML_UNUSED(dev); + } + ++// ~2GB per session for now ++#define GGML_HEXAGON_SESSION_MEMORY_DEFAULT (2ULL * 1024 * 1024 * 1024) ++// Max to 3.5GB ++#define GGML_HEXAGON_SESSION_MEMORY_MAX (3ULL * 1024 * 1024 * 1024 + 512ULL * 1024 * 1024) ++ + static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { +- // ~2GB per session for now +- *free = 2ULL * 1024 * 1024 * 1024; +- *total = *free; ++ const char * str_mem = getenv("GGML_HEXAGON_SESSION_MEMORY"); ++ if (str_mem) { ++ *free = std::stoull(str_mem); ++ if (*free < GGML_HEXAGON_SESSION_MEMORY_DEFAULT) { ++ *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT; ++ } else if (*free > GGML_HEXAGON_SESSION_MEMORY_MAX) { ++ *free = GGML_HEXAGON_SESSION_MEMORY_MAX; ++ } ++ } else { ++ *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT; ++ } + ++ *total = *free; + GGML_UNUSED(dev); + } + +@@ -3666,10 +3680,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) { + } + } + ++#if defined(__ANDROID__) + if(opt_arch < 75) { + opt_ndev = 1; +- GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n"); ++ GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75 for Android.\n"); + } ++#else ++ if(opt_arch < 73) { ++ opt_ndev = 1; ++ GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v73 for Linux and Windows.\n"); ++ } ++#endif + + GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch); + +@@ -3682,6 +3703,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) { + } catch (std::exception const &exc) { + GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i); + devices[i].context = nullptr; ++ opt_ndev = i; ++ break; + } + } + } diff --git a/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt index de01336c..29b1a043 100644 --- a/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt diff --git a/tsconfig.json b/tsconfig.json index 89e9970..4714e05 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -106,5 +106,5 @@ // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ "skipLibCheck": true /* Skip type checking all .d.ts files. */ }, - "exclude": ["build/", "whisper.cpp/"] + "exclude": ["build/", "whisper.cpp/", "externals/"] } From 1939fcc7bece5473dbfeb0b99443fb271b4b188a Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Fri, 5 Dec 2025 16:02:22 +0800 Subject: [PATCH 02/17] fix: patch --- scripts/whisper.cpp.patch | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/whisper.cpp.patch b/scripts/whisper.cpp.patch index 6221a16..3ae8e0f 100644 --- a/scripts/whisper.cpp.patch +++ b/scripts/whisper.cpp.patch @@ -12,10 +12,18 @@ index e52e050a..c1000c16 100644 check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E) if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") diff --git a/whisper.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/whisper.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp -index cabd301a..f1eb0f56 100644 +index cabd301a..22361297 100644 --- a/whisper.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp +++ b/whisper.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp -@@ -3481,11 +3481,25 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include + + #ifdef _WIN32 +@@ -3481,11 +3482,25 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev GGML_UNUSED(dev); } @@ -44,7 +52,7 @@ index cabd301a..f1eb0f56 100644 GGML_UNUSED(dev); } -@@ -3666,10 +3680,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) { +@@ -3666,10 +3681,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) { } } @@ -63,7 +71,7 @@ index cabd301a..f1eb0f56 100644 GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch); -@@ -3682,6 +3703,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) { +@@ -3682,6 +3704,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) { } catch (std::exception const &exc) { GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i); devices[i].context = nullptr; From 8b38e3590d9b5ed805a7f02f06cb76b2b29bdfed Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 09:18:36 +0800 Subject: [PATCH 03/17] feat: build opencl --- scripts/build-linux.sh | 34 +++++++++++----------- scripts/build-windows.ps1 | 12 ++++---- scripts/prepare-linux.sh | 36 +++++++++++------------ scripts/prepare-windows.ps1 | 58 ++++++++++++++++++------------------- 4 files changed, 70 insertions(+), 70 deletions(-) diff --git a/scripts/build-linux.sh b/scripts/build-linux.sh index e68114d..6e91001 100755 --- a/scripts/build-linux.sh +++ b/scripts/build-linux.sh @@ -82,10 +82,10 @@ else --CDCLANG_USE_GOMP=ON \ --CDGGML_NATIVE=OFF \ --CDGGML_OPENMP=0 \ - # --CDGGML_OPENCL=1 \ - # --CDGGML_OPENCL_SMALL_ALLOC=ON \ - # --CDGGML_OPENCL_USE_ADRENO_KERNELS=ON \ - # --CDGGML_OPENCL_EMBED_KERNELS=ON \ + --CDGGML_OPENCL=1 \ + --CDGGML_OPENCL_SMALL_ALLOC=ON \ + --CDGGML_OPENCL_USE_ADRENO_KERNELS=ON \ + --CDGGML_OPENCL_EMBED_KERNELS=ON \ --CDGGML_HEXAGON=1 \ --CDHEXAGON_SDK_ROOT="$HEXAGON_SDK_ROOT" \ --CDPREBUILT_LIB_DIR=UbuntuARM_aarch64 \ @@ -98,19 +98,19 @@ else ARGS+=( --CDCMAKE_TOOLCHAIN_FILE="$(realpath cmake/aarch64-linux-gnu.toolchain.cmake)" ) - # # Try to find OpenCL library for arm64 - # OPENCL_LIB_PATH="" - # if [ -f "externals/opencl-arm64/lib/libOpenCL.so" ]; then - # OPENCL_LIB_PATH="$(realpath externals/opencl-arm64/lib/libOpenCL.so)" - # fi - - # # Build with toolchain - # if [ -n "$OPENCL_LIB_PATH" ]; then - # ARGS+=( - # --CDOpenCL_INCLUDE_DIR="$(realpath externals/OpenCL-Headers)" \ - # --CDOpenCL_LIBRARY="$OPENCL_LIB_PATH" - # ) - # fi + # Try to find OpenCL library for arm64 + OPENCL_LIB_PATH="" + if [ -f "externals/opencl-arm64/lib/libOpenCL.so" ]; then + OPENCL_LIB_PATH="$(realpath externals/opencl-arm64/lib/libOpenCL.so)" + fi + + # Build with toolchain + if [ -n "$OPENCL_LIB_PATH" ]; then + ARGS+=( + --CDOpenCL_INCLUDE_DIR="$(realpath externals/OpenCL-Headers)" \ + --CDOpenCL_LIBRARY="$OPENCL_LIB_PATH" + ) + fi else ARGS=( -C "${ARGS[@]}" ) fi diff --git a/scripts/build-windows.ps1 b/scripts/build-windows.ps1 index ef24cbb..7f51485 100644 --- a/scripts/build-windows.ps1 +++ b/scripts/build-windows.ps1 @@ -100,19 +100,19 @@ if ($target -eq "all" -or $target -eq "cuda") { if ($target -eq "all" -or $target -eq "snapdragon") { . "externals/Hexagon_SDK/Hexagon_SDK/$hexagonSdkVersion/setup_sdk_env.ps1" - # # Set OpenCL paths (headers are in OpenCL-Headers, lib is in OpenCL-SDK) - # $openclIncludePath = Resolve-Path "externals/OpenCL-Headers" - # $openclLibPath = Resolve-Path "externals/OpenCL-SDK/lib/OpenCL.lib" + # Set OpenCL paths (headers are in OpenCL-Headers, lib is in OpenCL-SDK) + $openclIncludePath = Resolve-Path "externals/OpenCL-Headers" + $openclLibPath = Resolve-Path "externals/OpenCL-SDK/lib/OpenCL.lib" npx cmake-js rebuild -C -a $arch $cmakeArgs ` --CDVARIANT=snapdragon ` --CDGGML_OPENMP=0 ` - # --CDGGML_OPENCL=1 ` + --CDGGML_OPENCL=1 ` --CDGGML_HEXAGON=1 ` --CDHEXAGON_SDK_ROOT="$HEXAGON_SDK_ROOT" ` --CDPREBUILT_LIB_DIR=windows_aarch64 - # --CDOpenCL_LIBRARY="$openclLibPath" ` - # --CDOpenCL_INCLUDE_DIR="$openclIncludePath" + --CDOpenCL_LIBRARY="$openclLibPath" ` + --CDOpenCL_INCLUDE_DIR="$openclIncludePath" if ($LASTEXITCODE -ne 0) { throw "build failed" } diff --git a/scripts/prepare-linux.sh b/scripts/prepare-linux.sh index 47dccb1..567a11b 100755 --- a/scripts/prepare-linux.sh +++ b/scripts/prepare-linux.sh @@ -32,26 +32,26 @@ run_as_root apt-get update if [ $TARGET == "snapdragon" ]; then run_as_root apt-get install -qy lsb-release wget llvm clang lld cmake ninja-build libomp-dev ccache unzip gcc-aarch64-linux-gnu g++-aarch64-linux-gnu binutils-aarch64-linux-gnu - # if [ ! -f "externals/opencl-arm64/lib/libOpenCL.so" ]; then - # mkdir -p externals/opencl-arm64 - # git clone --depth 1 --branch v$OPENCL_VERSION https://github.com/KhronosGroup/OpenCL-Headers.git externals/OpenCL-Headers - # git clone --depth 1 --branch v$OPENCL_VERSION https://github.com/KhronosGroup/OpenCL-ICD-Loader.git externals/OpenCL-ICD-Loader + if [ ! -f "externals/opencl-arm64/lib/libOpenCL.so" ]; then + mkdir -p externals/opencl-arm64 + git clone --depth 1 --branch v$OPENCL_VERSION https://github.com/KhronosGroup/OpenCL-Headers.git externals/OpenCL-Headers + git clone --depth 1 --branch v$OPENCL_VERSION https://github.com/KhronosGroup/OpenCL-ICD-Loader.git externals/OpenCL-ICD-Loader - # # Calculate absolute paths before changing directory - # TOOLCHAIN_FILE="$(realpath cmake/aarch64-linux-gnu.toolchain.cmake)" - # HEADERS_DIR="$(realpath externals/OpenCL-Headers)" - # INSTALL_DIR="$(realpath externals/opencl-arm64)" + # Calculate absolute paths before changing directory + TOOLCHAIN_FILE="$(realpath cmake/aarch64-linux-gnu.toolchain.cmake)" + HEADERS_DIR="$(realpath externals/OpenCL-Headers)" + INSTALL_DIR="$(realpath externals/opencl-arm64)" - # cd externals/OpenCL-ICD-Loader - # cmake -S . -B build-arm64 \ - # -DCMAKE_TOOLCHAIN_FILE="$TOOLCHAIN_FILE" \ - # -DOPENCL_ICD_LOADER_HEADERS_DIR="$HEADERS_DIR" \ - # -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ - # -DCMAKE_BUILD_TYPE=Release - # cmake --build build-arm64 - # cmake --install build-arm64 - # cd ../.. - # fi + cd externals/OpenCL-ICD-Loader + cmake -S . -B build-arm64 \ + -DCMAKE_TOOLCHAIN_FILE="$TOOLCHAIN_FILE" \ + -DOPENCL_ICD_LOADER_HEADERS_DIR="$HEADERS_DIR" \ + -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ + -DCMAKE_BUILD_TYPE=Release + cmake --build build-arm64 + cmake --install build-arm64 + cd ../.. + fi # Download and extract Hexagon SDK if [ ! -d "externals/Hexagon_SDK" ]; then diff --git a/scripts/prepare-windows.ps1 b/scripts/prepare-windows.ps1 index 4c2e642..a0d3e83 100644 --- a/scripts/prepare-windows.ps1 +++ b/scripts/prepare-windows.ps1 @@ -75,33 +75,33 @@ if ($target -eq "snapdragon") { . "externals/Hexagon_SDK/Hexagon_SDK/$hexagonSdkVersion/setup_sdk_env.ps1" - # # Download OpenCL SDK - # $openclPath = "externals/OpenCL-SDK" - # if (-Not (Test-Path $openclPath)) { - # Write-Host "Downloading OpenCL SDK..." - # New-Item -ItemType Directory -Force -Path "externals" | Out-Null - # New-Item -ItemType Directory -Force -Path "externals/OpenCL-SDK" | Out-Null - - # # Clone OpenCL-Headers - # git clone --depth 1 --branch v$openclVersion https://github.com/KhronosGroup/OpenCL-Headers.git externals/OpenCL-Headers - - # # Clone OpenCL-ICD-Loader - # git clone --depth 1 --branch v$openclVersion https://github.com/KhronosGroup/OpenCL-ICD-Loader.git externals/OpenCL-ICD-Loader - - # # Build OpenCL-ICD-Loader for ARM64 - # Write-Host "Building OpenCL ICD Loader for ARM64..." - # cmake -S externals/OpenCL-ICD-Loader -B externals/OpenCL-ICD-Loader/build ` - # -A ARM64 ` - # -DOPENCL_ICD_LOADER_HEADERS_DIR="$(Resolve-Path 'externals/OpenCL-Headers')" ` - # -DCMAKE_INSTALL_PREFIX="$(Resolve-Path 'externals/OpenCL-SDK')" - # cmake --build externals/OpenCL-ICD-Loader/build --config Release - # cmake --install externals/OpenCL-ICD-Loader/build --config Release - # } - - # $env:OpenCL_INCLUDE_DIR = "$(Resolve-Path 'externals/OpenCL-Headers')" - # $env:OpenCL_LIBRARY = "$(Resolve-Path 'externals/OpenCL-SDK/lib/OpenCL.lib')" - # if ($env:GITHUB_ENV -ne $null) { - # Add-Content -Path $env:GITHUB_ENV -Value "OpenCL_INCLUDE_DIR=$env:OpenCL_INCLUDE_DIR" - # Add-Content -Path $env:GITHUB_ENV -Value "OpenCL_LIBRARY=$env:OpenCL_LIBRARY" - # } + # Download OpenCL SDK + $openclPath = "externals/OpenCL-SDK" + if (-Not (Test-Path $openclPath)) { + Write-Host "Downloading OpenCL SDK..." + New-Item -ItemType Directory -Force -Path "externals" | Out-Null + New-Item -ItemType Directory -Force -Path "externals/OpenCL-SDK" | Out-Null + + # Clone OpenCL-Headers + git clone --depth 1 --branch v$openclVersion https://github.com/KhronosGroup/OpenCL-Headers.git externals/OpenCL-Headers + + # Clone OpenCL-ICD-Loader + git clone --depth 1 --branch v$openclVersion https://github.com/KhronosGroup/OpenCL-ICD-Loader.git externals/OpenCL-ICD-Loader + + # Build OpenCL-ICD-Loader for ARM64 + Write-Host "Building OpenCL ICD Loader for ARM64..." + cmake -S externals/OpenCL-ICD-Loader -B externals/OpenCL-ICD-Loader/build ` + -A ARM64 ` + -DOPENCL_ICD_LOADER_HEADERS_DIR="$(Resolve-Path 'externals/OpenCL-Headers')" ` + -DCMAKE_INSTALL_PREFIX="$(Resolve-Path 'externals/OpenCL-SDK')" + cmake --build externals/OpenCL-ICD-Loader/build --config Release + cmake --install externals/OpenCL-ICD-Loader/build --config Release + } + + $env:OpenCL_INCLUDE_DIR = "$(Resolve-Path 'externals/OpenCL-Headers')" + $env:OpenCL_LIBRARY = "$(Resolve-Path 'externals/OpenCL-SDK/lib/OpenCL.lib')" + if ($env:GITHUB_ENV -ne $null) { + Add-Content -Path $env:GITHUB_ENV -Value "OpenCL_INCLUDE_DIR=$env:OpenCL_INCLUDE_DIR" + Add-Content -Path $env:GITHUB_ENV -Value "OpenCL_LIBRARY=$env:OpenCL_LIBRARY" + } } From dbfb443f42e1941a6074e062dcab05ffe2b25387 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 09:18:57 +0800 Subject: [PATCH 04/17] feat: add env handler --- lib/binding.ts | 54 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/lib/binding.ts b/lib/binding.ts index 376b08a..ba92b91 100644 --- a/lib/binding.ts +++ b/lib/binding.ts @@ -1,13 +1,13 @@ export interface NativeContextOptions { - filePath: string, - useFlashAttn?: boolean, - useGpu?: boolean, + filePath: string + useFlashAttn?: boolean + useGpu?: boolean } export interface NativeVadContextOptions { - filePath: string, - useGpu?: boolean, - nThreads?: number, + filePath: string + useGpu?: boolean + nThreads?: number } export interface TranscribeOptions { @@ -56,17 +56,17 @@ export interface TranscribeResult { export interface VadOptions { /** Probability threshold to consider as speech (Default: 0.5) */ - threshold?: number, + threshold?: number /** Min duration for a valid speech segment in ms (Default: 250) */ - minSpeechDurationMs?: number, + minSpeechDurationMs?: number /** Min silence duration to consider speech as ended in ms (Default: 100) */ - minSilenceDurationMs?: number, + minSilenceDurationMs?: number /** Max duration of a speech segment before forcing a new segment in seconds (Default: 30) */ - maxSpeechDurationS?: number, + maxSpeechDurationS?: number /** Padding added before and after speech segments in ms (Default: 30) */ - speechPadMs?: number, + speechPadMs?: number /** Overlap in seconds when copying audio samples from speech segment (Default: 0.1) */ - samplesOverlap?: number, + samplesOverlap?: number } export interface VadSegment { @@ -109,7 +109,7 @@ export interface WhisperContext { bench(nThreads: number): Promise release(): Promise getModelInfo(): object - + // static methods toggleNativeLog( enable: boolean, @@ -120,14 +120,17 @@ export interface WhisperContext { export interface WhisperVadContext { new (options: NativeVadContextOptions): WhisperVadContext detectSpeech(filePath: string, options?: VadOptions): Promise - detectSpeechFile(filePath: string, options?: VadOptions): Promise + detectSpeechFile( + filePath: string, + options?: VadOptions, + ): Promise detectSpeechData( audioData: ArrayBuffer, options?: VadOptions, ): Promise release(): Promise getModelInfo(): object - + // static methods toggleNativeLog( enable: boolean, @@ -160,8 +163,25 @@ const loadPlatformPackage = async ( } export const loadModule = async (variant?: LibVariant): Promise => { - // Try to load the requested variant - let module = await loadPlatformPackage(getPlatformPackageName(variant)) + const packageName = getPlatformPackageName(variant) + + // Set ADSP_LIBRARY_PATH for load HTP libs + if (variant === 'snapdragon') { + const adspLibraryPath = process.env.ADSP_LIBRARY_PATH + if (!adspLibraryPath) { + try { + process.env.ADSP_LIBRARY_PATH = path.dirname( + require.resolve(packageName), + ) + } catch { + /* no-op */ + } + } + const nDev = process.env.GGML_HEXAGON_NDEV + if (!nDev) process.env.GGML_HEXAGON_NDEV = '16' + } + + let module = await loadPlatformPackage(packageName) if (module) { return module } From df97246a9cf010e3fbfa422a6a2d220244ed25da Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 14:32:45 +0800 Subject: [PATCH 05/17] fix(hexagon): binary scratchpad sizing (test) --- scripts/whisper.cpp.patch | 73 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/scripts/whisper.cpp.patch b/scripts/whisper.cpp.patch index 3ae8e0f..d72b408 100644 --- a/scripts/whisper.cpp.patch +++ b/scripts/whisper.cpp.patch @@ -80,6 +80,79 @@ index cabd301a..22361297 100644 } } } +diff --git a/whisper.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c b/whisper.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +index 8ed7f67d..bf026708 100644 +--- a/whisper.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c ++++ b/whisper.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c +@@ -81,6 +81,7 @@ static void binary_job_f32_per_thread(struct htp_ops_context * octx, + const size_t src0_row_size = nb01; + const size_t src1_row_size = nb11; + const size_t dst_row_size = nb1; ++ const size_t src1_spad_stride = octx->src1_spad.size_per_thread; + + const uint32_t src0_nrows = ne01 * ne02 * ne03; // src0 rows + const uint32_t src1_nrows = ne11 * ne12 * ne13; // src1 rows +@@ -109,7 +110,7 @@ static void binary_job_f32_per_thread(struct htp_ops_context * octx, + + hvx_elemwise_f32_func func_HVX = (1 == opt_path) ? func_table_HVX_opt[op] : func_table_HVX[op]; + +- uint8_t * restrict spad_data_th = spad_data + (ith * src0_row_size); ++ uint8_t * restrict spad_data_th = spad_data + (ith * src1_spad_stride); + + const uint8_t * restrict src0_ptr = (const uint8_t *) src0->data + (src0_start_row * src0_row_size); + uint8_t * restrict dst_ptr = (uint8_t *) dst->data + (src0_start_row * dst_row_size); +@@ -171,6 +172,7 @@ static void binary_add_id_job_f32_per_thread(struct htp_ops_context * octx, + const size_t src0_row_size = nb01; + const size_t src1_row_size = nb11; + const size_t dst_row_size = nb1; ++ const size_t src0_spad_stride = octx->src0_spad.size_per_thread; + + const uint32_t src0_nrows = ne01 * ne02 * ne03; // src0 rows + +@@ -193,6 +195,7 @@ static void binary_add_id_job_f32_per_thread(struct htp_ops_context * octx, + const uint8_t * restrict data_src0 = (const uint8_t *) src0->data; + const uint8_t * restrict data_src1 = (const uint8_t *) src1->data; + uint8_t * restrict data_dst = (uint8_t *) dst->data; ++ uint8_t * restrict spad_data_th = spad_data + ith * src0_spad_stride; + + const uint32_t ne02_ne01 = ne02 * ne01; + for (uint32_t ir = src0_start_row; ir < src0_end_row; ir++) { +@@ -219,9 +222,9 @@ static void binary_add_id_job_f32_per_thread(struct htp_ops_context * octx, + const uint32_t nr0 = ne00 / ne10; + if (nr0 > 1) { + for (uint32_t r = 0; r < nr0; r++) { +- memcpy(spad_data + r * nb10, (const uint8_t *) src1_ptr, nb10); ++ memcpy(spad_data_th + r * nb10, (const uint8_t *) src1_ptr, nb10); + } +- func_HVX((const uint8_t *) src0_ptr, (const uint8_t *) spad_data, (uint8_t *) dst_ptr, ne00); ++ func_HVX((const uint8_t *) src0_ptr, (const uint8_t *) spad_data_th, (uint8_t *) dst_ptr, ne00); + } else { + func_HVX((const uint8_t *) src0_ptr, (const uint8_t *) src1_ptr, (uint8_t *) dst_ptr, ne00); + } +@@ -298,10 +301,20 @@ static int execute_op_binary_f32(struct htp_ops_context * octx) { + const size_t src1_row_size = src1->nb[1]; + const size_t dst_row_size = dst->nb[1]; + ++ const size_t dst_spad_stride = htp_round_up(dst_row_size, 128); ++ const size_t src0_spad_stride = htp_round_up(src0_row_size, 128); ++ // src1 scratchpad must be large enough to hold a full src0 row only when broadcasting ++ const bool broadcast_row = src0->ne[0] != src1->ne[0]; ++ const size_t src1_spad_stride = htp_round_up(broadcast_row ? src0_row_size : src1_row_size, 128); ++ + // VTCM scratchpads for all tensors +- octx->dst_spad.size = htp_round_up(dst_row_size, 128) * n_threads; +- octx->src0_spad.size = htp_round_up(src0_row_size, 128) * n_threads; +- octx->src1_spad.size = htp_round_up(src1_row_size, 128) * n_threads; ++ octx->dst_spad.size_per_thread = dst_spad_stride; ++ octx->src0_spad.size_per_thread = src0_spad_stride; ++ octx->src1_spad.size_per_thread = src1_spad_stride; ++ ++ octx->dst_spad.size = dst_spad_stride * n_threads; ++ octx->src0_spad.size = src0_spad_stride * n_threads; ++ octx->src1_spad.size = src1_spad_stride * n_threads; + + size_t spad_size = octx->src0_spad.size + octx->src1_spad.size + octx->dst_spad.size; + diff --git a/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt index de01336c..29b1a043 100644 --- a/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt From dba3cad9a25a2de3dea15b891abcdf36fe55826d Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 14:35:27 +0800 Subject: [PATCH 06/17] feat: temp use my fork --- whisper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper.cpp b/whisper.cpp index 19ceec8..98c84d8 160000 --- a/whisper.cpp +++ b/whisper.cpp @@ -1 +1 @@ -Subproject commit 19ceec8eac980403b714d603e5ca31653cd42a3f +Subproject commit 98c84d859e60064a19716968eb4fd936cb0e5749 From 971090bec8aebbf93f951d37afed6108516b1a37 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 14:55:32 +0800 Subject: [PATCH 07/17] feat: attach ggml_threadpool --- src/WhisperContext.cpp | 8 ++++++++ src/common.cpp | 42 ++++++++++++++++++++++++++++++++++++++++++ src/common.hpp | 12 ++++++++++++ 3 files changed, 62 insertions(+) diff --git a/src/WhisperContext.cpp b/src/WhisperContext.cpp index 15f003a..a84b403 100644 --- a/src/WhisperContext.cpp +++ b/src/WhisperContext.cpp @@ -377,11 +377,19 @@ WhisperContext::WhisperContext(const Napi::CallbackInfo& info) : Napi::ObjectWra _sess = std::make_shared(modelPath, ctx); + // Initialize and attach threadpool + int n_threads = std::min(4, (int)std::thread::hardware_concurrency()); + if (options.Has("nThreads") && options.Get("nThreads").IsNumber()) { + n_threads = options.Get("nThreads").As().Int32Value(); + } + _sess->initThreadpool(n_threads); + // Build metadata _meta = Napi::Object::New(env); _meta.Set("filePath", modelPath); _meta.Set("useGpu", useGpu); _meta.Set("useFlashAttn", useFlashAttn); + _meta.Set("nThreads", n_threads); } WhisperContext::~WhisperContext() { diff --git a/src/common.cpp b/src/common.cpp index 9259ca7..1e20263 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1,6 +1,7 @@ #include "common.hpp" #include "whisper.h" #include "common-whisper.h" +#include "ggml-cpu.h" #include #include #include @@ -36,12 +37,53 @@ WhisperSession::WhisperSession(const std::string& modelPath, whisper_context* co } WhisperSession::~WhisperSession() { + freeThreadpool(); if (ctx) { whisper_free(ctx); ctx = nullptr; } } +void WhisperSession::initThreadpool(int n_threads) { + if (!ctx) return; + + auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); + if (!cpu_dev) return; + + auto * reg = ggml_backend_dev_backend_reg(cpu_dev); + if (!reg) return; + + auto * ggml_threadpool_new_fn = (decltype(ggml_threadpool_new) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_new"); + if (!ggml_threadpool_new_fn) return; + + struct ggml_threadpool_params tpp = ggml_threadpool_params_default(n_threads); + threadpool = ggml_threadpool_new_fn(&tpp); + + if (threadpool) { + whisper_attach_threadpool(ctx, threadpool, threadpool_batch); + } +} + +void WhisperSession::freeThreadpool() { + auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); + if (!cpu_dev) return; + + auto * reg = ggml_backend_dev_backend_reg(cpu_dev); + if (!reg) return; + + auto * ggml_threadpool_free_fn = (decltype(ggml_threadpool_free) *) ggml_backend_reg_get_proc_address(reg, "ggml_threadpool_free"); + if (!ggml_threadpool_free_fn) return; + + if (threadpool) { + ggml_threadpool_free_fn(threadpool); + threadpool = nullptr; + } + if (threadpool_batch) { + ggml_threadpool_free_fn(threadpool_batch); + threadpool_batch = nullptr; + } +} + bool WhisperSession::isValid() const { return ctx != nullptr; } diff --git a/src/common.hpp b/src/common.hpp index de31c3d..bdaf137 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -9,6 +9,8 @@ #include #include "whisper.h" +#include "ggml.h" +#include "ggml-backend.h" // Forward declarations struct whisper_context; @@ -42,12 +44,22 @@ class WhisperSession { whisper_context* ctx; std::mutex mtx; + // Thread pool management + ggml_threadpool_t threadpool = nullptr; + ggml_threadpool_t threadpool_batch = nullptr; + WhisperSession(const std::string& modelPath, whisper_context* context); ~WhisperSession(); + // Initialize and attach threadpool to the whisper context + void initThreadpool(int n_threads); + bool isValid() const; void lock(); void unlock(); + +private: + void freeThreadpool(); }; using WhisperSessionPtr = std::shared_ptr; From 40cc96ab3e069c2d9c5a467f2f4174e30c295519 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 15:19:31 +0800 Subject: [PATCH 08/17] feat: sync whisper.cpp fork --- whisper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper.cpp b/whisper.cpp index 98c84d8..751f1c9 160000 --- a/whisper.cpp +++ b/whisper.cpp @@ -1 +1 @@ -Subproject commit 98c84d859e60064a19716968eb4fd936cb0e5749 +Subproject commit 751f1c927d8e9eaacf760d4e9763fa241ea88e14 From f9016c0f4a2010ce09024c7b96583308642fe630 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 15:26:44 +0800 Subject: [PATCH 09/17] chore: cleanup patch (moved to whisper.cpp fork) --- scripts/whisper.cpp.patch | 73 --------------------------------------- 1 file changed, 73 deletions(-) diff --git a/scripts/whisper.cpp.patch b/scripts/whisper.cpp.patch index d72b408..3ae8e0f 100644 --- a/scripts/whisper.cpp.patch +++ b/scripts/whisper.cpp.patch @@ -80,79 +80,6 @@ index cabd301a..22361297 100644 } } } -diff --git a/whisper.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c b/whisper.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c -index 8ed7f67d..bf026708 100644 ---- a/whisper.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c -+++ b/whisper.cpp/ggml/src/ggml-hexagon/htp/binary-ops.c -@@ -81,6 +81,7 @@ static void binary_job_f32_per_thread(struct htp_ops_context * octx, - const size_t src0_row_size = nb01; - const size_t src1_row_size = nb11; - const size_t dst_row_size = nb1; -+ const size_t src1_spad_stride = octx->src1_spad.size_per_thread; - - const uint32_t src0_nrows = ne01 * ne02 * ne03; // src0 rows - const uint32_t src1_nrows = ne11 * ne12 * ne13; // src1 rows -@@ -109,7 +110,7 @@ static void binary_job_f32_per_thread(struct htp_ops_context * octx, - - hvx_elemwise_f32_func func_HVX = (1 == opt_path) ? func_table_HVX_opt[op] : func_table_HVX[op]; - -- uint8_t * restrict spad_data_th = spad_data + (ith * src0_row_size); -+ uint8_t * restrict spad_data_th = spad_data + (ith * src1_spad_stride); - - const uint8_t * restrict src0_ptr = (const uint8_t *) src0->data + (src0_start_row * src0_row_size); - uint8_t * restrict dst_ptr = (uint8_t *) dst->data + (src0_start_row * dst_row_size); -@@ -171,6 +172,7 @@ static void binary_add_id_job_f32_per_thread(struct htp_ops_context * octx, - const size_t src0_row_size = nb01; - const size_t src1_row_size = nb11; - const size_t dst_row_size = nb1; -+ const size_t src0_spad_stride = octx->src0_spad.size_per_thread; - - const uint32_t src0_nrows = ne01 * ne02 * ne03; // src0 rows - -@@ -193,6 +195,7 @@ static void binary_add_id_job_f32_per_thread(struct htp_ops_context * octx, - const uint8_t * restrict data_src0 = (const uint8_t *) src0->data; - const uint8_t * restrict data_src1 = (const uint8_t *) src1->data; - uint8_t * restrict data_dst = (uint8_t *) dst->data; -+ uint8_t * restrict spad_data_th = spad_data + ith * src0_spad_stride; - - const uint32_t ne02_ne01 = ne02 * ne01; - for (uint32_t ir = src0_start_row; ir < src0_end_row; ir++) { -@@ -219,9 +222,9 @@ static void binary_add_id_job_f32_per_thread(struct htp_ops_context * octx, - const uint32_t nr0 = ne00 / ne10; - if (nr0 > 1) { - for (uint32_t r = 0; r < nr0; r++) { -- memcpy(spad_data + r * nb10, (const uint8_t *) src1_ptr, nb10); -+ memcpy(spad_data_th + r * nb10, (const uint8_t *) src1_ptr, nb10); - } -- func_HVX((const uint8_t *) src0_ptr, (const uint8_t *) spad_data, (uint8_t *) dst_ptr, ne00); -+ func_HVX((const uint8_t *) src0_ptr, (const uint8_t *) spad_data_th, (uint8_t *) dst_ptr, ne00); - } else { - func_HVX((const uint8_t *) src0_ptr, (const uint8_t *) src1_ptr, (uint8_t *) dst_ptr, ne00); - } -@@ -298,10 +301,20 @@ static int execute_op_binary_f32(struct htp_ops_context * octx) { - const size_t src1_row_size = src1->nb[1]; - const size_t dst_row_size = dst->nb[1]; - -+ const size_t dst_spad_stride = htp_round_up(dst_row_size, 128); -+ const size_t src0_spad_stride = htp_round_up(src0_row_size, 128); -+ // src1 scratchpad must be large enough to hold a full src0 row only when broadcasting -+ const bool broadcast_row = src0->ne[0] != src1->ne[0]; -+ const size_t src1_spad_stride = htp_round_up(broadcast_row ? src0_row_size : src1_row_size, 128); -+ - // VTCM scratchpads for all tensors -- octx->dst_spad.size = htp_round_up(dst_row_size, 128) * n_threads; -- octx->src0_spad.size = htp_round_up(src0_row_size, 128) * n_threads; -- octx->src1_spad.size = htp_round_up(src1_row_size, 128) * n_threads; -+ octx->dst_spad.size_per_thread = dst_spad_stride; -+ octx->src0_spad.size_per_thread = src0_spad_stride; -+ octx->src1_spad.size_per_thread = src1_spad_stride; -+ -+ octx->dst_spad.size = dst_spad_stride * n_threads; -+ octx->src0_spad.size = src0_spad_stride * n_threads; -+ octx->src1_spad.size = src1_spad_stride * n_threads; - - size_t spad_size = octx->src0_spad.size + octx->src1_spad.size + octx->dst_spad.size; - diff --git a/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt index de01336c..29b1a043 100644 --- a/whisper.cpp/ggml/src/ggml-vulkan/CMakeLists.txt From 97704fc66917ce6682366c774d9653b1448f5744 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 17:29:24 +0800 Subject: [PATCH 10/17] fix: path --- lib/binding.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/binding.ts b/lib/binding.ts index ba92b91..1212027 100644 --- a/lib/binding.ts +++ b/lib/binding.ts @@ -1,3 +1,5 @@ +import path from 'path' + export interface NativeContextOptions { filePath: string useFlashAttn?: boolean From c442dd0a0bb15d1f521f7ae00e59cfdbbf8d8926 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 17:53:39 +0800 Subject: [PATCH 11/17] test: force to HTP0 --- src/WhisperContext.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/WhisperContext.cpp b/src/WhisperContext.cpp index a84b403..add1641 100644 --- a/src/WhisperContext.cpp +++ b/src/WhisperContext.cpp @@ -271,7 +271,7 @@ class WhisperVadWorker : public Napi::AsyncWorker { // Lock the session to ensure thread safety std::lock_guard lock(session_->mtx); - + if (!session_->ctx) { SetError("VAD context was destroyed"); return; @@ -283,7 +283,7 @@ class WhisperVadWorker : public Napi::AsyncWorker { // Calculate speech probability from VAD probabilities int n_probs = whisper_vad_n_probs(session_->ctx); float* probs = whisper_vad_probs(session_->ctx); - + if (n_probs > 0 && probs) { // Calculate average probability across all frames float prob_sum = 0.0f; @@ -300,17 +300,17 @@ class WhisperVadWorker : public Napi::AsyncWorker { // Get VAD segments using provided parameters whisper_vad_segments* segments = whisper_vad_segments_from_samples( session_->ctx, vadParams_, audioData_.data(), audioData_.size()); - + if (segments) { int n_segments = whisper_vad_segments_n_segments(segments); - + for (int i = 0; i < n_segments; i++) { float t0 = whisper_vad_segments_get_segment_t0(segments, i); float t1 = whisper_vad_segments_get_segment_t1(segments, i); - + segments_.push_back({t0, t1}); } - + whisper_vad_free_segments(segments); } } @@ -319,7 +319,7 @@ class WhisperVadWorker : public Napi::AsyncWorker { void OnOK() override { Napi::Env env = Env(); Napi::Array result = Napi::Array::New(env); - + // Create VadSegment[] - array of objects with t0 and t1 properties for (size_t i = 0; i < segments_.size(); i++) { Napi::Object segment = Napi::Object::New(env); @@ -327,7 +327,7 @@ class WhisperVadWorker : public Napi::AsyncWorker { segment.Set("t1", segments_[i].second); result.Set(i, segment); } - + Callback().Call({Env().Null(), result}); } @@ -366,7 +366,7 @@ WhisperContext::WhisperContext(const Napi::CallbackInfo& info) : Napi::ObjectWra // Initialize whisper context whisper_context_params cparams = whisper_context_default_params(); cparams.use_gpu = useGpu; - cparams.gpu_device = 0; + cparams.gpu_device = 1; // TEMP: HTP0 cparams.flash_attn = useFlashAttn; whisper_context* ctx = whisper_init_from_file_with_params(modelPath.c_str(), cparams); @@ -442,7 +442,7 @@ void WhisperContext::ToggleNativeLog(const Napi::CallbackInfo& info) { if (info.Length() < 1) return; bool enable = whisper_utils::getBool(info[0], false); - + if (enable) { // If enabling logging and a callback is provided, set it up if (info.Length() >= 2 && info[1].IsFunction()) { @@ -725,14 +725,14 @@ Napi::Value WhisperContext::TranscribeData(const Napi::CallbackInfo& info) { Napi::Value WhisperContext::AbortTranscribe(const Napi::CallbackInfo& info) { Napi::Env env = info.Env(); - + if (info.Length() < 1 || !info[0].IsNumber()) { Napi::TypeError::New(env, "Expected job ID").ThrowAsJavaScriptException(); return env.Null(); } - + int jobId = info[0].As().Int32Value(); - + { std::lock_guard lock(_cancelMutex); auto it = _cancelFlags.find(jobId); @@ -740,7 +740,7 @@ Napi::Value WhisperContext::AbortTranscribe(const Napi::CallbackInfo& info) { it->second->store(true); } } - + auto deferred = Napi::Promise::Deferred::New(env); deferred.Resolve(env.Undefined()); return deferred.Promise(); @@ -928,7 +928,7 @@ void WhisperVadContext::ToggleNativeLog(const Napi::CallbackInfo& info) { if (info.Length() < 1) return; bool enable = whisper_utils::getBool(info[0], false); - + if (enable) { // If enabling logging and a callback is provided, set it up if (info.Length() >= 2 && info[1].IsFunction()) { From cc19e3d6cbde4c483008cd259894bb16d31fb18a Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 6 Dec 2025 17:59:02 +0800 Subject: [PATCH 12/17] feat(examples): allow set lib variant & add useFlashAttn --- examples/basicTranscription.js | 12 ++++++++---- examples/bench.js | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/examples/basicTranscription.js b/examples/basicTranscription.js index 129b5d8..17d4c4f 100644 --- a/examples/basicTranscription.js +++ b/examples/basicTranscription.js @@ -31,10 +31,14 @@ async function main() { console.log(`Model: ${MODEL_PATH}`) // Initialize whisper context - const context = await initWhisper({ - filePath: MODEL_PATH, - useGpu: true, // Set to false if GPU is not available - }) + const context = await initWhisper( + { + filePath: MODEL_PATH, + useGpu: true, // Set to false if GPU is not available + // useFlashAttn: true, // Recommend for GPU + }, + process.env.WHISPER_LIB_VARIANT, // 'default' | 'vulkan' | 'cuda' | 'snapdragon' + ) console.log('Model loaded successfully!') console.log() diff --git a/examples/bench.js b/examples/bench.js index 7ee69bc..b47bbfc 100644 --- a/examples/bench.js +++ b/examples/bench.js @@ -19,10 +19,14 @@ async function main() { // Initialize whisper context console.log('Loading model...') - const context = await initWhisper({ - filePath: modelPath, - useGpu: true, // Set to false to benchmark CPU only - }) + const context = await initWhisper( + { + filePath: modelPath, + useGpu: true, // Set to false to benchmark CPU only + // useFlashAttn: true, // Recommend for GPU + }, + process.env.WHISPER_LIB_VARIANT, // 'default' | 'vulkan' | 'cuda' | 'snapdragon' + ) console.log('Model loaded!') console.log() From 8cf1bc4530bf69116f90f11103aa490490f221f3 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Mon, 8 Dec 2025 15:14:09 +0800 Subject: [PATCH 13/17] feat: sync whisper.cpp fork --- whisper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper.cpp b/whisper.cpp index 751f1c9..8eb92e0 160000 --- a/whisper.cpp +++ b/whisper.cpp @@ -1 +1 @@ -Subproject commit 751f1c927d8e9eaacf760d4e9763fa241ea88e14 +Subproject commit 8eb92e0aedd5a2e271455a5da72de974e0d10451 From 2c59d5642a7a8290e8df273ec81cc706fccef75f Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Mon, 8 Dec 2025 16:49:55 +0800 Subject: [PATCH 14/17] fix: add missing GGML_CPU_ARM_ARCH --- scripts/build-linux.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build-linux.sh b/scripts/build-linux.sh index 6e91001..7e40c7e 100755 --- a/scripts/build-linux.sh +++ b/scripts/build-linux.sh @@ -81,6 +81,7 @@ else --CDTO_PACKAGE=ON \ --CDCLANG_USE_GOMP=ON \ --CDGGML_NATIVE=OFF \ + --CDGGML_CPU_ARM_ARCH=armv8.2-a+dotprod+fp16 --CDGGML_OPENMP=0 \ --CDGGML_OPENCL=1 \ --CDGGML_OPENCL_SMALL_ALLOC=ON \ From f8fce5ae938ecba96e9f9ed76f410c0773492f81 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Mon, 8 Dec 2025 16:54:59 +0800 Subject: [PATCH 15/17] chore: add .env file for quick use on iq9075 --- scripts/.iq9075.env | 2 ++ 1 file changed, 2 insertions(+) create mode 100755 scripts/.iq9075.env diff --git a/scripts/.iq9075.env b/scripts/.iq9075.env new file mode 100755 index 0000000..4abd7f0 --- /dev/null +++ b/scripts/.iq9075.env @@ -0,0 +1,2 @@ +LD_PRELOAD=/lib/aarch64-linux-gnu/libllvm-qcom.so.1 +WHISPER_LIB_VARIANT=snapdragon From 43e6069af497a674c6a0647654a4c9b59552fca8 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Mon, 8 Dec 2025 17:13:44 +0800 Subject: [PATCH 16/17] feat: print timings on success --- src/WhisperContext.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/WhisperContext.cpp b/src/WhisperContext.cpp index add1641..e1109d1 100644 --- a/src/WhisperContext.cpp +++ b/src/WhisperContext.cpp @@ -158,6 +158,10 @@ class WhisperTranscribeWorker : public Napi::AsyncWorker { int result = whisper_full_parallel(session_->ctx, params_copy, audioData_.data(), audioData_.size(), nProcessors_); + if (result == 0) { + whisper_print_timings(session_->ctx); + } + // Wait for all pending callbacks to complete before returning { std::unique_lock lock(callbackCtx.callbackMutex); From c165a4b2cebc56fcb2a0c5bbb3c1bdd3a5a35010 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Mon, 8 Dec 2025 17:54:19 +0800 Subject: [PATCH 17/17] test: try disable OPENMP for all linux-arm64 variants --- scripts/build-linux.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/build-linux.sh b/scripts/build-linux.sh index 7e40c7e..a4ad8ee 100755 --- a/scripts/build-linux.sh +++ b/scripts/build-linux.sh @@ -42,6 +42,7 @@ else # default if [ $TARGET == "all" ] || [ $TARGET == "default" ]; then npx cmake-js rebuild -C --CDTO_PACKAGE=ON --CDCLANG_USE_GOMP=ON --CDGGML_NATIVE=OFF \ + --CDGGML_OPENMP=0 \ --CDGGML_CPU_ARM_ARCH=armv8.2-a+dotprod+fp16 fi @@ -49,6 +50,7 @@ else if [ $TARGET == "all" ] || [ $TARGET == "vulkan" ]; then npx cmake-js rebuild -C --CDTO_PACKAGE=ON \ --CDCLANG_USE_GOMP=ON \ + --CDGGML_OPENMP=0 \ --CDGGML_NATIVE=OFF \ --CDGGML_CPU_ARM_ARCH=armv8.2-a+dotprod+fp16 \ --CDGGML_VULKAN=1 \ @@ -60,6 +62,7 @@ else if [ $TARGET == "all" ] || [ $TARGET == "cuda" ]; then npx cmake-js rebuild -C --CDTO_PACKAGE=ON \ --CDCLANG_USE_GOMP=ON \ + --CDGGML_OPENMP=0 \ --CDGGML_NATIVE=OFF \ --CDGGML_CPU_ARM_ARCH=armv8.2-a+dotprod+fp16 \ --CDGGML_CUDA=1 \