easybuilders · Flamefire · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026 · Mar 2, 2026
diff --git a/easybuild/easyconfigs/a/accelerate/accelerate-1.10.0-foss-2023b-CUDA-12.4.0.eb b/easybuild/easyconfigs/a/accelerate/accelerate-1.10.0-foss-2023b-CUDA-12.4.0.eb
@@ -0,0 +1,36 @@
+easyblock = 'PythonBundle'
+
+name = 'accelerate'
+version = '1.10.0'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://github.com/huggingface/accelerate'
+description = """A simple way to launch, train, and use PyTorch models on almost any device and
+distributed configuration, automatic mixed precision (including fp8),
+and easy-to-configure FSDP and DeepSpeed support."""
+
+toolchain = {'name': 'foss', 'version': '2023b'}
+
+dependencies = [
+    ('Python', '3.11.5'),
+    ('Python-bundle-PyPI', '2023.10'),
+    ('SciPy-bundle', '2023.11'),
+    ('CUDA', '12.4.0', '', SYSTEM),
+    ('PyTorch-bundle', '2.3.0', versionsuffix),
+    ('PyYAML', '6.0.1'),
+    ('Safetensors', '0.4.4'),
+]
+
+exts_list = [
+    ('huggingface-hub', '0.30.2', {
+        'sources': ['huggingface_hub-%(version)s.tar.gz'],
+        'checksums': ['9a7897c5b6fd9dad3168a794a8998d6378210f5b9688d0dfc180b1a228dc2466'],
+    }),
+    (name, version, {
+        'checksums': ['8270568fda9036b5cccdc09703fef47872abccd56eb5f6d53b54ea5fb7581496'],
+    }),
+]
+
+sanity_check_commands = ['accelerate test']
+
+moduleclass = 'ai'
diff --git a/easybuild/easyconfigs/c/CUTLASS/CUTLASS-4.1.0-foss-2023b-CUDA-12.4.0.eb b/easybuild/easyconfigs/c/CUTLASS/CUTLASS-4.1.0-foss-2023b-CUDA-12.4.0.eb
@@ -0,0 +1,58 @@
+easyblock = 'CMakeMake'
+
+name = 'CUTLASS'
+version = '4.1.0'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://github.com/NVIDIA/cutlass'
+description = """CUTLASS is a collection of CUDA C++ template
+abstractions for implementing high-performance matrix-matrix
+multiplication (GEMM) and related computations at all levels and scales
+within CUDA. It incorporates strategies for hierarchical decomposition
+and data movement similar to those used to implement cuBLAS and cuDNN.
+CUTLASS decomposes these "moving parts" into reusable, modular software
+components abstracted by C++ template classes. Primitives for different
+levels of a conceptual parallelization hierarchy can be specialized and
+tuned via custom tiling sizes, data types, and other algorithmic policy.
+The resulting flexibility simplifies their use as building blocks within
+custom kernels and applications."""
+
+toolchain = {'name': 'foss', 'version': '2023b'}
+
+github_account = 'NVIDIA'
+source_urls = [GITHUB_LOWER_SOURCE]
+sources = [{'download_filename': V_VERSION_TAR_GZ, 'filename': SOURCE_TAR_GZ}]
+patches = [
+    'CUTLASS-4.1.0_fix-version.patch',
+    'CUTLASS-4.1.0_add-arch-guards-to-tests.patch',
+]
+checksums = [
+    {'CUTLASS-4.1.0.tar.gz': '8d4675b11e9e5207e3940eaac0f46db934ada371cbb3627c9fda642d912b6230'},
+    {'CUTLASS-4.1.0_fix-version.patch': 'e2c7f66e6fd298b3af5339e17c0c75ded7d726cdf6cde003f60263e27ae46495'},
+    {'CUTLASS-4.1.0_add-arch-guards-to-tests.patch':
+     '81cd18d83bdedf3ed1f7add68bbff1635cf9d76bb9e184efbc62cd95caee4275'},
+]
+
+builddependencies = [
+    ('CMake', '3.27.6'),
+    ('Python', '3.11.5'),
+]
+
+dependencies = [
+    ('CUDA', '12.4.0', '', SYSTEM),
+    ('cuDNN', '9.1.1.17', versionsuffix, SYSTEM),
+]
+
+_copts = [
+    '-DCUTLASS_NVCC_ARCHS="%(cuda_cc_cmake)s"',
+    '-DCUTLASS_ENABLE_CUBLAS=1',
+    '-DCUTLASS_ENABLE_CUDNN=1',
+]
+configopts = ' '.join(_copts)
+
+sanity_check_paths = {
+    'files': ['include/cutlass/cutlass.h', 'lib/libcutlass.%s' % SHLIB_EXT],
+    'dirs': ['lib/cmake'],
+}
+
+moduleclass = 'lib'
diff --git a/easybuild/easyconfigs/c/CuPy/CuPy-13.6.0-foss-2023b-CUDA-12.4.0.eb b/easybuild/easyconfigs/c/CuPy/CuPy-13.6.0-foss-2023b-CUDA-12.4.0.eb
@@ -0,0 +1,77 @@
+easyblock = 'PythonBundle'
+
+name = 'CuPy'
+version = '13.6.0'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://cupy.dev'
+description = "CuPy is an open-source array library accelerated with NVIDIA CUDA."
+
+toolchain = {'name': 'foss', 'version': '2023b'}
+
+builddependencies = [
+    ('hypothesis', '6.90.0'),
+    ('Cython', '3.0.10'),
+    ('setuptools', '80.9.0'),
+]
+
+dependencies = [
+    ('Python', '3.11.5'),
+    ('SciPy-bundle', '2023.11'),
+    ('CUDA', '12.4.0', '', SYSTEM),
+    ('NCCL', '2.20.5', versionsuffix),
+    ('cuTENSOR', '2.0.2.5', versionsuffix, SYSTEM),
+    ('cuSPARSELt', '0.8.0.4', versionsuffix, SYSTEM),  # docs say 0.7.0 or 0.7.1
+]
+
+# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
+cuda_compute_capabilities = ['5.0', '6.0', '7.0', '7.5', '8.0', '8.6', '9.0']
+
+exts_default_options = {'source_urls': [PYPI_LOWER_SOURCE]}
+
+_skip_tests = [
+    '--ignore tests/example_tests',  # examples are not included
+    '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_ltisys.py::Test_bode::test_from_state_space',
+    '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_fir_filter_design.py::TestFirls::test_firls',
+    '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_ltisys.py::TestPlacePoles::test_real_2',
+    # New failures in 13.6.0, they all seems to be related to on-the-fly compilation failing
+    '--deselect tests/cupy_tests/core_tests/test_raw.py::TestRawPicklable',   # ::test_raw_picklable
+    '--deselect tests/cupy_tests/fft_tests/test_callback.py::Test1dCallbacks',
+]
+
+# For testing with new versions of CuPy, please enable the slow testing setting below,
+# but switch to the much lighter fast testing before submitting the .eb file, so users
+# can install on GPUs with moderate RAM.
+
+# _parallel_tests, _test_type = 4, 'not slow'
+_parallel_tests, _test_type = 1, 'fast'
+
+exts_list = [
+    ('fastrlock', '0.8.3', {
+        'checksums': ['4af6734d92eaa3ab4373e6c9a1dd0d5ad1304e172b1521733c6c3b3d73c8fa5d'],
+    }),
+    ('cupy', version, {
+        'patches': [
+            'cupy-13.0.0_cusparselt_0.6.0.patch',
+            'cupy-13.0.0_eb_ccc.patch',
+            'CuPy-13.6.0-Disable_TestRaw_with_nvcc_backend.patch',
+        ],
+        'preinstallopts': 'CUPY_NUM_BUILD_JOBS=%(parallel)s EB_CCC="%(cuda_cc_cmake)s" ',
+        'runtest': 'export CUPY_TEST_GPU_LIMIT=1 CUPY_CACHE_DIR="%%(builddir)s" && '
+                   'pytest -n %s tests -k "%s" ' % (_parallel_tests, _test_type) + ' '.join(_skip_tests),
+        'testinstall': True,
+        'checksums': [
+            {'cupy-13.6.0.tar.gz': '3cba30ae3dd32b5d5c6536e710cb98015227cd4ba83c46b3f1825a7ae55b6667'},
+            {'cupy-13.0.0_cusparselt_0.6.0.patch': '09cb12d26e78079c50b06f17002bf54c66e5e4743b917c5a218d3fe90124d499'},
+            {'cupy-13.0.0_eb_ccc.patch': 'bfe8b46344759f58491f55418bd9c856d6f72d681ee5fef12820009f808d2db1'},
+            {'CuPy-13.6.0-Disable_TestRaw_with_nvcc_backend.patch':
+             '958d80059b085017ed8c8de55ed82a0d52fdf964482e8ccc13d401515979d4b7'},
+        ],
+    }),
+]
+
+sanity_check_commands = [
+    "python -c 'import cupy'",
+]
+
+moduleclass = 'lib'
diff --git a/easybuild/easyconfigs/c/cuSPARSELt/cuSPARSELt-0.8.0.4-CUDA-12.4.0.eb b/easybuild/easyconfigs/c/cuSPARSELt/cuSPARSELt-0.8.0.4-CUDA-12.4.0.eb
@@ -0,0 +1,35 @@
+easyblock = 'Tarball'
+
+name = 'cuSPARSELt'
+version = '0.8.0.4'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://docs.nvidia.com/cuda/cusparselt/index.html'
+description = """NVIDIA cuSPARSELt is a high-performance CUDA library dedicated to general matrix-matrix operations in
+which at least one operand is a sparse matrix"""
+
+toolchain = SYSTEM
+
+local_cudamajver = '12'
+
+local_arch = {'arm64': 'sbsa', 'aarch64': 'sbsa'}.get(ARCH, ARCH)
+source_urls = ['https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-%s/' % local_arch]
+sources = ['libcusparse_lt-linux-%s-%%(version)s_cuda%s-archive.tar.xz' %
+           (local_arch, local_cudamajver)]
+checksums = [{
+    'libcusparse_lt-linux-x86_64-%%(version)s_cuda%s-archive.tar.xz' % local_cudamajver:
+        '483954591766bade877becef126d53908d5fef5d7468b503736af37388669c08',
+    'libcusparse_lt-linux-sbsa-%%(version)s_cuda%s-archive.tar.xz' % local_cudamajver:
+        'b59e2f8ffd154b156b2d74ccd7cad7775385693bec8cb9562596060072c515f2',
+}]
+
+dependencies = [('CUDA', '12.4.0')]
+
+sanity_check_paths = {
+    'files': ['include/cusparseLt.h',
+              'lib/libcusparseLt.%s' % SHLIB_EXT,
+              'lib/libcusparseLt_static.a'],
+    'dirs': [],
+}
+
+moduleclass = 'lib'
diff --git a/easybuild/easyconfigs/c/cuTENSOR/cuTENSOR-2.0.2.5-CUDA-12.4.0.eb b/easybuild/easyconfigs/c/cuTENSOR/cuTENSOR-2.0.2.5-CUDA-12.4.0.eb
@@ -0,0 +1,40 @@
+easyblock = 'Tarball'
+
+name = 'cuTENSOR'
+version = '2.0.2.5'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://developer.nvidia.com/cutensor'
+description = """The cuTENSOR Library is a GPU-accelerated tensor linear algebra library providing tensor contraction,
+ reduction and elementwise operations."""
+
+toolchain = SYSTEM
+
+source_urls = [
+    'https://developer.download.nvidia.com/compute/cutensor/redist/libcutensor/linux-%(arch)s/'
+]
+sources = ['libcutensor-linux-%(arch)s-%(version)s-archive.tar.xz']
+
+checksums = [{
+    'libcutensor-linux-sbsa-%(version)s-archive.tar.xz':
+        '5163dd40f11f328e469a6d9b0056c8346f5d59ed538c18d6b954e4ae657c69cc',
+    'libcutensor-linux-x86_64-%(version)s-archive.tar.xz':
+        '0e957ae7b352f599de34b6fa1ba999b0617887f885d7436ac5737d71a6b83baa',
+}]
+
+local_cudamajver = '12'
+dependencies = [('CUDA', '12.4.0')]
+
+sanity_check_paths = {
+    'files': ['include/cutensor.h', 'include/cutensor/types.h',
+              'lib/%s/libcutensor.%s' % (local_cudamajver, SHLIB_EXT),
+              'lib/%s/libcutensor_static.a' % local_cudamajver],
+    'dirs': [],
+}
+
+modextrapaths = {
+    'LD_LIBRARY_PATH': ['lib/%s' % local_cudamajver],
+    'LIBRARY_PATH': ['lib/%s' % local_cudamajver],
+}
+
+moduleclass = 'lib'
diff --git a/easybuild/easyconfigs/d/DLPack/DLPack-1.2-GCCcore-13.2.0.eb b/easybuild/easyconfigs/d/DLPack/DLPack-1.2-GCCcore-13.2.0.eb
@@ -0,0 +1,27 @@
+easyblock = 'CMakeMake'
+
+name = 'DLPack'
+version = '1.2'
+
+homepage = 'https://dmlc.github.io/dlpack/latest/'
+description = """DLPack is a stable in-memory data structure for an ndarray
+system to interact with a variety of frameworks."""
+
+toolchain = {'name': 'GCCcore', 'version': '13.2.0'}
+
+github_account = 'dmlc'
+source_urls = [GITHUB_LOWER_SOURCE]
+sources = ['v%(version)s.tar.gz']
+checksums = ['58284a3b004a48450c958a23b30274527ebaf35a061124bbd4193fffa45efbd6']
+
+builddependencies = [
+    ('binutils', '2.40'),
+    ('CMake', '3.27.6'),
+]
+
+sanity_check_paths = {
+    'files': ['include/dlpack/dlpack.h', 'lib/cmake/dlpack/dlpackConfig.cmake'],
+    'dirs': [],
+}
+
+moduleclass = 'lib'