diff --git a/easybuild/easyconfigs/a/accelerate/accelerate-1.10.0-foss-2023b-CUDA-12.4.0.eb b/easybuild/easyconfigs/a/accelerate/accelerate-1.10.0-foss-2023b-CUDA-12.4.0.eb new file mode 100644 index 00000000000..ca32d9c6984 --- /dev/null +++ b/easybuild/easyconfigs/a/accelerate/accelerate-1.10.0-foss-2023b-CUDA-12.4.0.eb @@ -0,0 +1,36 @@ +easyblock = 'PythonBundle' + +name = 'accelerate' +version = '1.10.0' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://github.com/huggingface/accelerate' +description = """A simple way to launch, train, and use PyTorch models on almost any device and +distributed configuration, automatic mixed precision (including fp8), +and easy-to-configure FSDP and DeepSpeed support.""" + +toolchain = {'name': 'foss', 'version': '2023b'} + +dependencies = [ + ('Python', '3.11.5'), + ('Python-bundle-PyPI', '2023.10'), + ('SciPy-bundle', '2023.11'), + ('CUDA', '12.4.0', '', SYSTEM), + ('PyTorch-bundle', '2.3.0', versionsuffix), + ('PyYAML', '6.0.1'), + ('Safetensors', '0.4.4'), +] + +exts_list = [ + ('huggingface-hub', '0.30.2', { + 'sources': ['huggingface_hub-%(version)s.tar.gz'], + 'checksums': ['9a7897c5b6fd9dad3168a794a8998d6378210f5b9688d0dfc180b1a228dc2466'], + }), + (name, version, { + 'checksums': ['8270568fda9036b5cccdc09703fef47872abccd56eb5f6d53b54ea5fb7581496'], + }), +] + +sanity_check_commands = ['accelerate test'] + +moduleclass = 'ai' diff --git a/easybuild/easyconfigs/c/CUTLASS/CUTLASS-4.1.0-foss-2023b-CUDA-12.4.0.eb b/easybuild/easyconfigs/c/CUTLASS/CUTLASS-4.1.0-foss-2023b-CUDA-12.4.0.eb new file mode 100644 index 00000000000..ee1c2c569e9 --- /dev/null +++ b/easybuild/easyconfigs/c/CUTLASS/CUTLASS-4.1.0-foss-2023b-CUDA-12.4.0.eb @@ -0,0 +1,58 @@ +easyblock = 'CMakeMake' + +name = 'CUTLASS' +version = '4.1.0' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://github.com/NVIDIA/cutlass' +description = """CUTLASS is a collection of CUDA C++ template +abstractions for implementing high-performance matrix-matrix +multiplication (GEMM) and related computations at all levels and scales +within CUDA. It incorporates strategies for hierarchical decomposition +and data movement similar to those used to implement cuBLAS and cuDNN. +CUTLASS decomposes these "moving parts" into reusable, modular software +components abstracted by C++ template classes. Primitives for different +levels of a conceptual parallelization hierarchy can be specialized and +tuned via custom tiling sizes, data types, and other algorithmic policy. +The resulting flexibility simplifies their use as building blocks within +custom kernels and applications.""" + +toolchain = {'name': 'foss', 'version': '2023b'} + +github_account = 'NVIDIA' +source_urls = [GITHUB_LOWER_SOURCE] +sources = [{'download_filename': V_VERSION_TAR_GZ, 'filename': SOURCE_TAR_GZ}] +patches = [ + 'CUTLASS-4.1.0_fix-version.patch', + 'CUTLASS-4.1.0_add-arch-guards-to-tests.patch', +] +checksums = [ + {'CUTLASS-4.1.0.tar.gz': '8d4675b11e9e5207e3940eaac0f46db934ada371cbb3627c9fda642d912b6230'}, + {'CUTLASS-4.1.0_fix-version.patch': 'e2c7f66e6fd298b3af5339e17c0c75ded7d726cdf6cde003f60263e27ae46495'}, + {'CUTLASS-4.1.0_add-arch-guards-to-tests.patch': + '81cd18d83bdedf3ed1f7add68bbff1635cf9d76bb9e184efbc62cd95caee4275'}, +] + +builddependencies = [ + ('CMake', '3.27.6'), + ('Python', '3.11.5'), +] + +dependencies = [ + ('CUDA', '12.4.0', '', SYSTEM), + ('cuDNN', '9.1.1.17', versionsuffix, SYSTEM), +] + +_copts = [ + '-DCUTLASS_NVCC_ARCHS="%(cuda_cc_cmake)s"', + '-DCUTLASS_ENABLE_CUBLAS=1', + '-DCUTLASS_ENABLE_CUDNN=1', +] +configopts = ' '.join(_copts) + +sanity_check_paths = { + 'files': ['include/cutlass/cutlass.h', 'lib/libcutlass.%s' % SHLIB_EXT], + 'dirs': ['lib/cmake'], +} + +moduleclass = 'lib' diff --git a/easybuild/easyconfigs/c/CuPy/CuPy-13.6.0-foss-2023b-CUDA-12.4.0.eb b/easybuild/easyconfigs/c/CuPy/CuPy-13.6.0-foss-2023b-CUDA-12.4.0.eb new file mode 100644 index 00000000000..fe488db1d3e --- /dev/null +++ b/easybuild/easyconfigs/c/CuPy/CuPy-13.6.0-foss-2023b-CUDA-12.4.0.eb @@ -0,0 +1,77 @@ +easyblock = 'PythonBundle' + +name = 'CuPy' +version = '13.6.0' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://cupy.dev' +description = "CuPy is an open-source array library accelerated with NVIDIA CUDA." + +toolchain = {'name': 'foss', 'version': '2023b'} + +builddependencies = [ + ('hypothesis', '6.90.0'), + ('Cython', '3.0.10'), + ('setuptools', '80.9.0'), +] + +dependencies = [ + ('Python', '3.11.5'), + ('SciPy-bundle', '2023.11'), + ('CUDA', '12.4.0', '', SYSTEM), + ('NCCL', '2.20.5', versionsuffix), + ('cuTENSOR', '2.0.2.5', versionsuffix, SYSTEM), + ('cuSPARSELt', '0.8.0.4', versionsuffix, SYSTEM), # docs say 0.7.0 or 0.7.1 +] + +# default CUDA compute capabilities to use (override via --cuda-compute-capabilities) +cuda_compute_capabilities = ['5.0', '6.0', '7.0', '7.5', '8.0', '8.6', '9.0'] + +exts_default_options = {'source_urls': [PYPI_LOWER_SOURCE]} + +_skip_tests = [ + '--ignore tests/example_tests', # examples are not included + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_ltisys.py::Test_bode::test_from_state_space', + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_fir_filter_design.py::TestFirls::test_firls', + '--deselect tests/cupyx_tests/scipy_tests/signal_tests/test_ltisys.py::TestPlacePoles::test_real_2', + # New failures in 13.6.0, they all seems to be related to on-the-fly compilation failing + '--deselect tests/cupy_tests/core_tests/test_raw.py::TestRawPicklable', # ::test_raw_picklable + '--deselect tests/cupy_tests/fft_tests/test_callback.py::Test1dCallbacks', +] + +# For testing with new versions of CuPy, please enable the slow testing setting below, +# but switch to the much lighter fast testing before submitting the .eb file, so users +# can install on GPUs with moderate RAM. + +# _parallel_tests, _test_type = 4, 'not slow' +_parallel_tests, _test_type = 1, 'fast' + +exts_list = [ + ('fastrlock', '0.8.3', { + 'checksums': ['4af6734d92eaa3ab4373e6c9a1dd0d5ad1304e172b1521733c6c3b3d73c8fa5d'], + }), + ('cupy', version, { + 'patches': [ + 'cupy-13.0.0_cusparselt_0.6.0.patch', + 'cupy-13.0.0_eb_ccc.patch', + 'CuPy-13.6.0-Disable_TestRaw_with_nvcc_backend.patch', + ], + 'preinstallopts': 'CUPY_NUM_BUILD_JOBS=%(parallel)s EB_CCC="%(cuda_cc_cmake)s" ', + 'runtest': 'export CUPY_TEST_GPU_LIMIT=1 CUPY_CACHE_DIR="%%(builddir)s" && ' + 'pytest -n %s tests -k "%s" ' % (_parallel_tests, _test_type) + ' '.join(_skip_tests), + 'testinstall': True, + 'checksums': [ + {'cupy-13.6.0.tar.gz': '3cba30ae3dd32b5d5c6536e710cb98015227cd4ba83c46b3f1825a7ae55b6667'}, + {'cupy-13.0.0_cusparselt_0.6.0.patch': '09cb12d26e78079c50b06f17002bf54c66e5e4743b917c5a218d3fe90124d499'}, + {'cupy-13.0.0_eb_ccc.patch': 'bfe8b46344759f58491f55418bd9c856d6f72d681ee5fef12820009f808d2db1'}, + {'CuPy-13.6.0-Disable_TestRaw_with_nvcc_backend.patch': + '958d80059b085017ed8c8de55ed82a0d52fdf964482e8ccc13d401515979d4b7'}, + ], + }), +] + +sanity_check_commands = [ + "python -c 'import cupy'", +] + +moduleclass = 'lib' diff --git a/easybuild/easyconfigs/c/cuSPARSELt/cuSPARSELt-0.8.0.4-CUDA-12.4.0.eb b/easybuild/easyconfigs/c/cuSPARSELt/cuSPARSELt-0.8.0.4-CUDA-12.4.0.eb new file mode 100644 index 00000000000..e59c12b3bf4 --- /dev/null +++ b/easybuild/easyconfigs/c/cuSPARSELt/cuSPARSELt-0.8.0.4-CUDA-12.4.0.eb @@ -0,0 +1,35 @@ +easyblock = 'Tarball' + +name = 'cuSPARSELt' +version = '0.8.0.4' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://docs.nvidia.com/cuda/cusparselt/index.html' +description = """NVIDIA cuSPARSELt is a high-performance CUDA library dedicated to general matrix-matrix operations in +which at least one operand is a sparse matrix""" + +toolchain = SYSTEM + +local_cudamajver = '12' + +local_arch = {'arm64': 'sbsa', 'aarch64': 'sbsa'}.get(ARCH, ARCH) +source_urls = ['https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-%s/' % local_arch] +sources = ['libcusparse_lt-linux-%s-%%(version)s_cuda%s-archive.tar.xz' % + (local_arch, local_cudamajver)] +checksums = [{ + 'libcusparse_lt-linux-x86_64-%%(version)s_cuda%s-archive.tar.xz' % local_cudamajver: + '483954591766bade877becef126d53908d5fef5d7468b503736af37388669c08', + 'libcusparse_lt-linux-sbsa-%%(version)s_cuda%s-archive.tar.xz' % local_cudamajver: + 'b59e2f8ffd154b156b2d74ccd7cad7775385693bec8cb9562596060072c515f2', +}] + +dependencies = [('CUDA', '12.4.0')] + +sanity_check_paths = { + 'files': ['include/cusparseLt.h', + 'lib/libcusparseLt.%s' % SHLIB_EXT, + 'lib/libcusparseLt_static.a'], + 'dirs': [], +} + +moduleclass = 'lib' diff --git a/easybuild/easyconfigs/c/cuTENSOR/cuTENSOR-2.0.2.5-CUDA-12.4.0.eb b/easybuild/easyconfigs/c/cuTENSOR/cuTENSOR-2.0.2.5-CUDA-12.4.0.eb new file mode 100644 index 00000000000..dd4624817a7 --- /dev/null +++ b/easybuild/easyconfigs/c/cuTENSOR/cuTENSOR-2.0.2.5-CUDA-12.4.0.eb @@ -0,0 +1,40 @@ +easyblock = 'Tarball' + +name = 'cuTENSOR' +version = '2.0.2.5' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://developer.nvidia.com/cutensor' +description = """The cuTENSOR Library is a GPU-accelerated tensor linear algebra library providing tensor contraction, + reduction and elementwise operations.""" + +toolchain = SYSTEM + +source_urls = [ + 'https://developer.download.nvidia.com/compute/cutensor/redist/libcutensor/linux-%(arch)s/' +] +sources = ['libcutensor-linux-%(arch)s-%(version)s-archive.tar.xz'] + +checksums = [{ + 'libcutensor-linux-sbsa-%(version)s-archive.tar.xz': + '5163dd40f11f328e469a6d9b0056c8346f5d59ed538c18d6b954e4ae657c69cc', + 'libcutensor-linux-x86_64-%(version)s-archive.tar.xz': + '0e957ae7b352f599de34b6fa1ba999b0617887f885d7436ac5737d71a6b83baa', +}] + +local_cudamajver = '12' +dependencies = [('CUDA', '12.4.0')] + +sanity_check_paths = { + 'files': ['include/cutensor.h', 'include/cutensor/types.h', + 'lib/%s/libcutensor.%s' % (local_cudamajver, SHLIB_EXT), + 'lib/%s/libcutensor_static.a' % local_cudamajver], + 'dirs': [], +} + +modextrapaths = { + 'LD_LIBRARY_PATH': ['lib/%s' % local_cudamajver], + 'LIBRARY_PATH': ['lib/%s' % local_cudamajver], +} + +moduleclass = 'lib' diff --git a/easybuild/easyconfigs/d/DLPack/DLPack-1.2-GCCcore-13.2.0.eb b/easybuild/easyconfigs/d/DLPack/DLPack-1.2-GCCcore-13.2.0.eb new file mode 100644 index 00000000000..afb81cf369b --- /dev/null +++ b/easybuild/easyconfigs/d/DLPack/DLPack-1.2-GCCcore-13.2.0.eb @@ -0,0 +1,27 @@ +easyblock = 'CMakeMake' + +name = 'DLPack' +version = '1.2' + +homepage = 'https://dmlc.github.io/dlpack/latest/' +description = """DLPack is a stable in-memory data structure for an ndarray +system to interact with a variety of frameworks.""" + +toolchain = {'name': 'GCCcore', 'version': '13.2.0'} + +github_account = 'dmlc' +source_urls = [GITHUB_LOWER_SOURCE] +sources = ['v%(version)s.tar.gz'] +checksums = ['58284a3b004a48450c958a23b30274527ebaf35a061124bbd4193fffa45efbd6'] + +builddependencies = [ + ('binutils', '2.40'), + ('CMake', '3.27.6'), +] + +sanity_check_paths = { + 'files': ['include/dlpack/dlpack.h', 'lib/cmake/dlpack/dlpackConfig.cmake'], + 'dirs': [], +} + +moduleclass = 'lib' diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5-foss-2023b-CUDA-12.4.0.eb b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5-foss-2023b-CUDA-12.4.0.eb new file mode 100644 index 00000000000..c4869f07bd0 --- /dev/null +++ b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5-foss-2023b-CUDA-12.4.0.eb @@ -0,0 +1,133 @@ +easyblock = 'PythonBundle' + +name = 'DeepSpeed' +version = '0.14.5' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'http://www.deepspeed.ai/' +description = """ +DeepSpeed is a deep learning optimization library that makes distributed training easy, efficient, and effective. +""" + +toolchain = {'name': 'foss', 'version': '2023b'} + +builddependencies = [ + ('Ninja', '1.11.1'), + ('Transformers', '4.44.0'), +] +local_pytorch_version = '2.3.0' +dependencies = [ + ('Python', '3.11.5'), + ('CUDA', '12.4.0', '', SYSTEM), + ('NCCL', '2.20.5', '-CUDA-%(cudaver)s'), + ('CuPy', '13.6.0', '-CUDA-%(cudaver)s'), + ('Triton', '2.3.1', '-CUDA-%(cudaver)s'), + ('accelerate', '1.10.0', '-CUDA-%(cudaver)s'), + ('PyTorch', local_pytorch_version, '-CUDA-%(cudaver)s'), + ('PyTorch-bundle', local_pytorch_version, '-CUDA-%(cudaver)s'), + ('mpi4py', '3.1.5'), + ('DLPack', '1.2'), + ('py-cpuinfo', '9.0.0'), + ('pydantic', '2.7.4'), + ('tqdm', '4.66.2'), + ('pdsh', '2.36'), + ('Seaborn', '0.13.2'), # dependency for mup + ('libaio', '0.3.113'), # for async_io (builddep only?) +] + +local_excluded_ds_tests = ( + 'TestTensorBoard', + 'TestWandb', + 'TestCometMonitor', + 'TestQuantizedInt', # Downloads model from internet + 'test_fp_quant[256-qbits8-bf16]', # Error of 0.00909423828125 > 0.004 + 'test_DS4Sci_EvoformerAttention[tensor_shape1-dtype1]', # Error of 0.05859375 > 0.05 +) + +components = [ + ('CUTLASS', '4.1.0', { + 'easyblock': 'Tarball', + 'source_urls': ['https://github.com/NVIDIA/cutlass/archive/refs/tags'], + 'sources': [{'download_filename': V_VERSION_TAR_GZ, 'filename': SOURCE_TAR_GZ}], + 'checksums': ['8d4675b11e9e5207e3940eaac0f46db934ada371cbb3627c9fda642d912b6230'], + 'start_dir': '%(namelower)s-%(version)s', + 'target_dir': 'extra/cutlass', + }), +] + +local_cutlass_path = '%(installdir)s/extra/cutlass' +local_cutlass_opt = f"export CUTLASS_PATH='{local_cutlass_path}' && " + +github_account = 'microsoft' +exts_list = [ + ('hjson', '3.1.0', { + 'checksums': ['55af475a27cf83a7969c808399d7bccdec8fb836a07ddbd574587593b9cdcf75'], + }), + ('nvidia-ml-py', '12.535.161', { + 'checksums': ['2bcc31ff7a0ea291ed8d7fc39b149391a42c2fb1cb4256c935e692de488b4d17'], + 'modulename': 'pynvml', + }), + ('mup', '1.0.0', { + 'checksums': ['9639e3d19f90e754f985ed444542ed2f8a049f3c0488fcb6efe150f30922cf74'], + }), + ('qtorch', '0.3.0', { + 'checksums': ['3fc2e9b27d58d18304ac46511ea03a3eb20f852944f6a5b6ef71b974c2da20bf'], + 'preinstallopts': "TORCH_CUDA_ARCH_LIST='%(cuda_cc_semicolon_sep)s' ", + }), + ('DeepSpeed', '0.14.5', { + 'source_urls': [GITHUB_SOURCE], + # Test suite not available on pypi + 'sources': [{'download_filename': V_VERSION_TAR_GZ, 'filename': SOURCE_TAR_GZ}], + 'patches': [ + 'DeepSpeed-0.14.5_avoid-access-to-home.patch', + 'DeepSpeed-0.14.5_fix-test-parameterize.patch', + 'DeepSpeed-0.14.5_no-ninja-dep.patch', + 'DeepSpeed-0.14.5_pdsh-env-vars.patch', + 'DeepSpeed-0.14.5_pic-compile.patch', + 'DeepSpeed-0.14.5_test-nvme-offload.patch', + ], + 'checksums': [ + {'DeepSpeed-0.14.5.tar.gz': '9f5622715cbd89c7382bfecf7fb188419ad3f2af7764dc6de35917abc6390cce'}, + {'DeepSpeed-0.14.5_avoid-access-to-home.patch': + 'edb39720a27b74170c87c8c51ecb8be6fd6fe2fa346f2a10b343a73884c5c412'}, + {'DeepSpeed-0.14.5_fix-test-parameterize.patch': + '1df9c7ceeca0b37aff85390b7bd25e266ddf88c5b1380980e6c13a064840d1d8'}, + {'DeepSpeed-0.14.5_no-ninja-dep.patch': + 'e974a928b03a180da4e67da2f347c25968cb41f9c6037a9796ab776a9a4b0547'}, + {'DeepSpeed-0.14.5_pdsh-env-vars.patch': + '02556620ac643d273a2fa9c019d437cd874a6c19759fa59baaa0e9a41d0a5240'}, + {'DeepSpeed-0.14.5_pic-compile.patch': '1b9c070b77cf24351bff29bab7d23baacde31c7ea211a4bc75732ac38a99d6b0'}, + {'DeepSpeed-0.14.5_test-nvme-offload.patch': + '1592097867c5d4594a434cca727df134fcaa0e3ea8c595eb5951856a501cf422'}, + ], + 'jit_only_ops': [ + 'SPARSE_ATTN', 'FP_QUANTIZER', 'CUTLASS_OPS', 'RAGGED_DEVICE_OPS', + # Cannot be prebuilt in several scenarious, + # e.g. multiple GPU archs (cuda-compute-capabilities), no GPU present: + # See https://github.com/deepspeedai/DeepSpeed/pull/7760 + 'EVOFORMER_ATTN', + ], + 'preinstallopts': ' && '.join(( + # Use this version and no suffix + 'echo "%(version)s" > version.txt', + 'echo "" > build.txt', + # Disable use of git during build + 'sed -i "s/command_exists(\'git\')/False/" setup.py', + local_cutlass_opt, + )), + 'pretestopts': local_cutlass_opt, + 'testinstall': True, + 'runtest': ' && '.join(( + 'ln -s $PWD/tests/ ../tests', + 'cd ..', + 'export DS_UNITTEST_TIMEOUT=1200', + f"pytest tests/unit/ -k 'not {' and not '.join(local_excluded_ds_tests)}' --durations=0", + )), + }), +] + +modextravars = { + 'CUTLASS_PATH': local_cutlass_path, +} + +moduleclass = 'ai' diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_avoid-access-to-home.patch b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_avoid-access-to-home.patch new file mode 100644 index 00000000000..35704b40e88 --- /dev/null +++ b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_avoid-access-to-home.patch @@ -0,0 +1,60 @@ +From 9d17116fcdb44b81eb00d3bce91431dc35cd69b1 Mon Sep 17 00:00:00 2001 +From: "Joshua C. Randall" +Date: Wed, 4 Sep 2024 19:22:07 +0100 +Subject: [PATCH] print warning if actual triton cache dir is on NFS, not just + for default (#6487) + +move the logic that prints a warning when triton cache dir is on NFS to +act on the actual calculated cache_dir rather than on the default. + +this means that: +- when the default directory (in the user's home directory) is on NFS +but `TRITON_CACHE_DIR` is set to a non-NFS directory, no warning will be +printed whereas prior to this change a spurious and confusing warning +was printed +- when the user's home directory is not on NFS but `TRITON_CACHE_DIR` is +set to an NFS directory, a warning will be printed whereas prior to this +change no warning would be printed + +fixes #6486 + +Adapted to use $TRITON_HOME by Alexander Grund (TU Dresden) + +diff --git a/deepspeed/ops/transformer/inference/triton/matmul_ext.py b/deepspeed/ops/transformer/inference/triton/matmul_ext.py +index c77d8a8e11c0..412c8740a216 100644 +--- a/deepspeed/ops/transformer/inference/triton/matmul_ext.py ++++ b/deepspeed/ops/transformer/inference/triton/matmul_ext.py +@@ -40,13 +40,18 @@ class TritonCacheDir: + _warning_printed = False + + @staticmethod +- def default_cache_dir(): +- tmp_path = os.path.join(Path.home(), ".triton", "autotune") +- if is_nfs_path(tmp_path) and not TritonCacheDir._warning_printed: ++ def warn_if_nfs(cache_dir): ++ if is_nfs_path(cache_dir) and not TritonCacheDir._warning_printed: + print( +- f"Warning: The default cache directory for DeepSpeed Triton autotune, {tmp_path}, appears to be on an NFS system. While this is generally acceptable, if you experience slowdowns or hanging when DeepSpeed exits, it is recommended to set the TRITON_CACHE_DIR environment variable to a non-NFS path." ++ f"Warning: The cache directory for DeepSpeed Triton autotune, {cache_dir}, appears to be on an NFS system. While this is generally acceptable, if you experience slowdowns or hanging when DeepSpeed exits, it is recommended to set the TRITON_CACHE_DIR environment variable to a non-NFS path." + ) + TritonCacheDir._warning_printed = True ++ return ++ ++ @staticmethod ++ def default_cache_dir(): ++ tt_home = os.environ.get('TRITON_HOME') or os.path.join(Path.home(), ".triton") ++ tmp_path = os.path.join(tt_home, "autotune") + return tmp_path + + +@@ -80,9 +84,9 @@ def __init__(self, key): + self.lock_path = None + # if caching is enabled, get the lock and bin path + self.cache_dir = os.environ.get('TRITON_CACHE_DIR', TritonCacheDir.default_cache_dir()) ++ TritonCacheDir.warn_if_nfs(self.cache_dir) + if self.cache_dir: + os.makedirs(self.cache_dir, exist_ok=True) +- if self.cache_dir: + self.file_path = os.path.join(self.cache_dir, self.key + ".pickle") + self.lock_path = self.file_path + ".lock" + diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_fix-test-parameterize.patch b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_fix-test-parameterize.patch new file mode 100644 index 00000000000..8776d6e3d61 --- /dev/null +++ b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_fix-test-parameterize.patch @@ -0,0 +1,82 @@ +Avoid this failure during tests: +> assert not 'unit/runtime/half_precision/onebit/test_onebit.py::TestOneBitLambFP16Pipeline::test[topo_config0]' + +Reason is that pytest-xdist doesn't seem to work well with having a dict in `pytest.mark.parameterize` +See https://github.com/pytest-dev/pytest-xdist/issues/922 + +It is not required at all here +diff --git a/tests/unit/runtime/half_precision/onebit/test_onebit.py b/tests/unit/runtime/half_precision/onebit/test_onebit.py +--- a/tests/unit/runtime/half_precision/onebit/test_onebit.py ++++ b/tests/unit/runtime/half_precision/onebit/test_onebit.py +@@ -37,6 +37,8 @@ if get_accelerator().device_name() == 'hpu': + pytest.skip("1-bit compression is not supported by HPU.", allow_module_level=True) + + ++topo_config = {"num_pp": 2, "num_dp": 2} ++ + @pytest.mark.parametrize("dtype", [torch.float32, torch.float16], ids=["fp32", "fp16"]) + class TestOneBitAdamBasic(DistributedTest): + world_size = 2 +@@ -342,19 +344,10 @@ class TestOneBitAdamCheckpointing(DistributedTest): + model.save_checkpoint(save_folder, tag=None) + + +-@pytest.mark.parametrize( +- "topo_config", +- [ +- { +- "num_pp": 2, +- "num_dp": 2 +- }, +- ], +-) + class TestOneBitAdamFP16Pipeline(DistributedTest): + world_size = 4 + +- def test(self, topo_config): ++ def test(self): + if not get_accelerator().is_fp16_supported(): + pytest.skip("fp16 is not supported") + config_dict = { +@@ -709,19 +702,10 @@ class TestZeroOneAdamCheckpointing(DistributedTest): + model.save_checkpoint(save_folder, tag=None) + + +-@pytest.mark.parametrize( +- "topo_config", +- [ +- { +- "num_pp": 2, +- "num_dp": 2 +- }, +- ], +-) + class TestZeroOneAdamFP16Pipeline(DistributedTest): + world_size = 4 + +- def test(self, topo_config): ++ def test(self): + if not get_accelerator().is_fp16_supported(): + pytest.skip("fp16 is not supported") + config_dict = { +@@ -1105,19 +1089,10 @@ class TestOneBitLambCheckpointing(DistributedTest): + model.save_checkpoint(save_folder, tag=None) + + +-@pytest.mark.parametrize( +- "topo_config", +- [ +- { +- "num_pp": 2, +- "num_dp": 2 +- }, +- ], +-) + class TestOneBitLambFP16Pipeline(DistributedTest): + world_size = 4 + +- def test(self, topo_config): ++ def test(self): + if not get_accelerator().is_fp16_supported(): + pytest.skip("fp16 is not supported") + config_dict = { diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_no-ninja-dep.patch b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_no-ninja-dep.patch new file mode 100644 index 00000000000..d4e2a0d6699 --- /dev/null +++ b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_no-ninja-dep.patch @@ -0,0 +1,67 @@ +Patch away dependency on ninja python package by checking returncode of `ninja --version`. + +Author: Viktor Rehnberg (Chalmers University of Technology) +Adapted by Alexander Grund (TU Dresden) + +diff --git a/deepspeed/env_report.py b/deepspeed/env_report.py +--- a/deepspeed/env_report.py ++++ b/deepspeed/env_report.py +@@ -59,11 +59,7 @@ def op_report(verbose=True): + + + def ninja_installed(): +- try: +- import ninja # noqa: F401 # type: ignore +- except ImportError: +- return False +- return True ++ return subprocess.run(["ninja", "--version"], check=False).returncode == 0 + + + def nvcc_version(): +diff --git a/op_builder/builder.py b/op_builder/builder.py +--- a/op_builder/builder.py ++++ b/op_builder/builder.py +@@ -533,9 +533,7 @@ class OpBuilder(ABC): + raise RuntimeError( + f"Unable to JIT load the {self.name} op due to it not being compatible due to hardware/software issue. {self.error_log}" + ) +- try: +- import ninja # noqa: F401 # type: ignore +- except ImportError: ++ if subprocess.run(["ninja", "--version"], check=False).returncode != 0: + raise RuntimeError(f"Unable to JIT load the {self.name} op due to ninja not being installed.") + + if isinstance(self, CUDAOpBuilder) and not self.is_rocm_pytorch(): +diff --git a/op_builder/xpu/builder.py b/op_builder/xpu/builder.py +--- a/op_builder/xpu/builder.py ++++ b/op_builder/xpu/builder.py +@@ -6,6 +6,7 @@ + import os + import time + import importlib ++import subprocess + + try: + # is op_builder from deepspeed or a 3p version? this should only succeed if it's deepspeed +@@ -86,9 +87,7 @@ class SYCLOpBuilder(OpBuilder): + raise RuntimeError( + f"Unable to JIT load the {self.name} op due to it not being compatible due to hardware/software issue. {self.error_log}" + ) +- try: +- import ninja # noqa: F401 +- except ImportError: ++ if subprocess.run(["ninja", "--version"], check=False).returncode != 0: + raise RuntimeError(f"Unable to JIT load the {self.name} op due to ninja not being installed.") + + self.jit_mode = True +diff --git a/requirements/requirements.txt b/requirements/requirements.txt +index 80c9f9b3..eed77fa3 100755 +--- a/requirements/requirements.txt ++++ b/requirements/requirements.txt +@@ -1,5 +1,4 @@ + hjson +-ninja + numpy + nvidia-ml-py + packaging>=20.0 diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_pdsh-env-vars.patch b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_pdsh-env-vars.patch new file mode 100644 index 00000000000..be3e25a5a71 --- /dev/null +++ b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_pdsh-env-vars.patch @@ -0,0 +1,28 @@ +Add software relevant environment variables + +The multinode runner launches processes with pdsh, if LD_LIBRARY_PATH is +not included in these exports then the python .so file may not be found. +Also including what seemed important and was added from loading DeepSpeed. +(Couldn't add everything, then argument list becomes too long). + +See https://github.com/easybuilders/easybuild-easyconfigs/pull/21438#issuecomment-2373540098 + +Note: Those are prefixes of variables to be included. + +Author: Viktor Rehnberg (Chalmers University of Technology) + +diff --git a/deepspeed/launcher/runner.py b/deepspeed/launcher/runner.py +--- a/deepspeed/launcher/runner.py ++++ b/deepspeed/launcher/runner.py +@@ -32,6 +32,11 @@ from deepspeed.accelerator import get_accelerator + + DLTS_HOSTFILE = "/job/hostfile" + EXPORT_ENVS = ['MLFLOW', 'PYTHON', 'MV2', 'UCX'] ++EXPORT_ENVS += [ # Extra based on what's added by module load DeepSpeed ++ 'LD_LIBRARY_PATH', 'PATH', 'EB', 'TRITON', 'CUDA', # important ++ 'ACLOCAL', 'CMAKE', 'CPATH', 'LIBRARY_PATH', 'MPL', 'NCCL', ++ 'PKG_CONFIG_PATH', 'XDG_DATA_DIRS', ++] + EXPORT_ENVS += NEBULA_EXPORT_ENVS + DEEPSPEED_ENVIRONMENT_NAME = os.getenv("DS_ENV_FILE", ".deepspeed_env") + DEEPSPEED_ENVIRONMENT_PATHS = [os.path.expanduser("~"), '.'] diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_pic-compile.patch b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_pic-compile.patch new file mode 100644 index 00000000000..707bc826e88 --- /dev/null +++ b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_pic-compile.patch @@ -0,0 +1,141 @@ +From 90afd671dadf9fd6a7a221428f2c04c16d637494 Mon Sep 17 00:00:00 2001 +From: Viktor Rehnberg +Date: Thu, 23 May 2024 07:09:53 +0000 +Subject: [PATCH] Compile with PIC + +--- + op_builder/builder.py | 15 ++++++++++----- + op_builder/cpu/builder.py | 3 ++- + op_builder/fused_adam.py | 4 +++- + op_builder/fused_lamb.py | 4 +++- + op_builder/fused_lion.py | 4 +++- + op_builder/xpu/builder.py | 3 ++- + 6 files changed, 23 insertions(+), 10 deletions(-) + +diff --git a/op_builder/builder.py b/op_builder/builder.py +index ec7566aa..f08e1799 100644 +--- a/op_builder/builder.py ++++ b/op_builder/builder.py +@@ -288,13 +288,13 @@ class OpBuilder(ABC): + ''' + Returns optional list of compiler flags to forward to nvcc when building CUDA sources + ''' +- return [] ++ return ['-Xcompiler', '-fPIC'] + + def cxx_args(self): + ''' + Returns optional list of compiler flags to forward to the build + ''' +- return [] ++ return ['-fPIC'] + + def is_compatible(self, verbose=True): + ''' +@@ -746,15 +746,18 @@ class CUDAOpBuilder(OpBuilder): + ) + + def cxx_args(self): ++ args = super().cxx_args() + if sys.platform == "win32": +- return ['-O2'] ++ args += ['-O2'] + else: +- return ['-O3', '-std=c++17', '-g', '-Wno-reorder'] ++ args += ['-O3', '-std=c++17', '-g', '-Wno-reorder'] ++ return args + + def nvcc_args(self): + if self.build_for_cpu: + return [] +- args = ['-O3'] ++ args = super().nvcc_args() ++ args += ['-O3'] + if self.is_rocm_pytorch(): + ROCM_MAJOR, ROCM_MINOR = self.installed_rocm_version() + args += [ +@@ -835,6 +838,8 @@ class TorchCPUOpBuilder(CUDAOpBuilder): + '-lcublas', + '-g', + ] ++ else: ++ args += super(CUDAOpBuilder, self).cxx_args() + + CPU_ARCH = self.cpu_arch() + SIMD_WIDTH = self.simd_width() +diff --git a/op_builder/cpu/builder.py b/op_builder/cpu/builder.py +index d881842a..dfc5a31d 100644 +--- a/op_builder/cpu/builder.py ++++ b/op_builder/cpu/builder.py +@@ -30,7 +30,8 @@ class CPUOpBuilder(OpBuilder): + return cpp_ext + + def cxx_args(self): +- args = ['-O3', '-g', '-Wno-reorder'] ++ args = super().cxx_args() ++ args += ['-O3', '-g', '-Wno-reorder'] + CPU_ARCH = self.cpu_arch() + SIMD_WIDTH = self.simd_width() + args += [CPU_ARCH, '-fopenmp', SIMD_WIDTH] +diff --git a/op_builder/fused_adam.py b/op_builder/fused_adam.py +index ac6e4eea..0c723572 100644 +--- a/op_builder/fused_adam.py ++++ b/op_builder/fused_adam.py +@@ -29,7 +29,9 @@ class FusedAdamBuilder(CUDAOpBuilder): + return args + self.version_dependent_macros() + + def nvcc_args(self): +- nvcc_flags = ['-O3'] + self.version_dependent_macros() ++ nvcc_flags = super(CUDAOpBuilder, self).nvcc_args() ++ nvcc_flags += ['-O3'] ++ nvcc_flags += self.version_dependent_macros() + if not self.is_rocm_pytorch(): + nvcc_flags.extend( + ['-allow-unsupported-compiler' if sys.platform == "win32" else '', '-lineinfo', '--use_fast_math'] + +diff --git a/op_builder/fused_lamb.py b/op_builder/fused_lamb.py +index f0cb5577..a59b97d4 100644 +--- a/op_builder/fused_lamb.py ++++ b/op_builder/fused_lamb.py +@@ -29,7 +29,9 @@ class FusedLambBuilder(CUDAOpBuilder): + return args + self.version_dependent_macros() + + def nvcc_args(self): +- nvcc_flags = ['-O3'] + self.version_dependent_macros() ++ nvcc_flags = super(CUDAOpBuilder, self).nvcc_args() ++ nvcc_flags += ['-O3'] ++ nvcc_flags += self.version_dependent_macros() + if self.is_rocm_pytorch(): + ROCM_MAJOR, ROCM_MINOR = self.installed_rocm_version() + nvcc_flags += ['-DROCM_VERSION_MAJOR=%s' % ROCM_MAJOR, '-DROCM_VERSION_MINOR=%s' % ROCM_MINOR] +diff --git a/op_builder/fused_lion.py b/op_builder/fused_lion.py +index b900a8f2..119232b5 100644 +--- a/op_builder/fused_lion.py ++++ b/op_builder/fused_lion.py +@@ -29,7 +29,9 @@ class FusedLionBuilder(CUDAOpBuilder): + return args + self.version_dependent_macros() + + def nvcc_args(self): +- nvcc_flags = ['-O3'] + self.version_dependent_macros() ++ nvcc_flags = super(CUDAOpBuilder, self).nvcc_args() ++ nvcc_flags += ['-O3'] ++ nvcc_flags += self.version_dependent_macros() + if not self.is_rocm_pytorch(): + nvcc_flags.extend( + ['-allow-unsupported-compiler' if sys.platform == "win32" else '', '-lineinfo', '--use_fast_math'] + +diff --git a/op_builder/xpu/builder.py b/op_builder/xpu/builder.py +index f430b7b6..5a1a2219 100644 +--- a/op_builder/xpu/builder.py ++++ b/op_builder/xpu/builder.py +@@ -52,7 +52,8 @@ class SYCLOpBuilder(OpBuilder): + return version_ge_1_1 + version_ge_1_3 + version_ge_1_5 + + def cxx_args(self): +- cxx_flags = [ ++ cxx_flags = super().cxx_args() ++ cxx_flags += [ + '-fsycl', '-fsycl-targets=spir64_gen', '-g', '-gdwarf-4', '-O3', '-std=c++17', '-fPIC', '-DMKL_ILP64', + '-fno-strict-aliasing' + ] +-- +2.39.3 + diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_test-nvme-offload.patch b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_test-nvme-offload.patch new file mode 100644 index 00000000000..dcff709f2ce --- /dev/null +++ b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_test-nvme-offload.patch @@ -0,0 +1,135 @@ +From ddbf7ab23ce2e83747ff6a1482ac512e06da82ca Mon Sep 17 00:00:00 2001 +From: Viktor Rehnberg +Date: Mon, 4 Nov 2024 15:31:55 +0100 +Subject: [PATCH] Fix quantization tests + +NVME tests didn't always run because the hard-coded nvme_path wasn't +always writable. This commit changed to use tmp_path fixture instead and +disabled distributed test to avoid thread locks hanging. +--- + .../quantization/test_intX_quantization.py | 43 ++++++++++--------- + 1 file changed, 22 insertions(+), 21 deletions(-) + +diff --git a/tests/unit/inference/quantization/test_intX_quantization.py b/tests/unit/inference/quantization/test_intX_quantization.py +index 77b51fcd..9e0d7ac0 100644 +--- a/tests/unit/inference/quantization/test_intX_quantization.py ++++ b/tests/unit/inference/quantization/test_intX_quantization.py +@@ -17,6 +17,7 @@ from transformers import AutoConfig, OPTConfig, AutoModel + import pytest + from collections import OrderedDict + from typing import Dict ++from pathlib import Path + + device = get_accelerator().device_name() if get_accelerator().is_available() else 'cpu' + +@@ -53,11 +54,11 @@ def quantization_test_helper(pre_quant_type: torch.dtype, num_bits: int): + assert mean_diff < 0.15 and max_diff < 0.5, f'Numeric error exceed threshold, mean diff {mean_diff} (threshold 0.15), max diff {max_diff} (threshold 0.5)' + + +-def zero3_post_init_quantization_test_helper(cpu_offload: bool, nvme_offload: bool, bits: int): ++def zero3_post_init_quantization_test_helper(cpu_offload: bool, nvme_offload: bool, bits: int, tmp_path: Path): + import deepspeed + from transformers.integrations.deepspeed import HfDeepSpeedConfig + +- def get_zero3_ds_config(hf_config: OPTConfig, cpu_offload: bool, nvme_offload: bool, bits: int) -> Dict: ++ def get_zero3_ds_config(hf_config: OPTConfig, cpu_offload: bool, nvme_offload: bool, bits: int, tmp_path: Path) -> Dict: + GB = 1 << 30 + + ds_config = { +@@ -127,7 +128,7 @@ def zero3_post_init_quantization_test_helper(cpu_offload: bool, nvme_offload: bo + ds_config["zero_optimization"]["offload_param"] = dict( + device="nvme", + pin_memory=True, +- nvme_path='~/tmp_offload_dir', ++ nvme_path=str(tmp_path / "tmp_offload_dir"), + buffer_count=5, + buffer_size=1 * GB, + ) +@@ -142,7 +143,7 @@ def zero3_post_init_quantization_test_helper(cpu_offload: bool, nvme_offload: bo + return ds_config + + hf_config = AutoConfig.from_pretrained('facebook/opt-125m') +- ds_config = get_zero3_ds_config(hf_config=hf_config, cpu_offload=cpu_offload, nvme_offload=nvme_offload, bits=bits) ++ ds_config = get_zero3_ds_config(hf_config=hf_config, cpu_offload=cpu_offload, nvme_offload=nvme_offload, bits=bits, tmp_path=tmp_path) + + input_ids = torch.ones(1, 16, dtype=torch.int32, device=device) + attention_mask = torch.ones(1, 16, dtype=torch.float32, device=device) +@@ -170,11 +171,11 @@ def zero3_post_init_quantization_test_helper(cpu_offload: bool, nvme_offload: bo + assert mean_diff < 0.4, f'Numeric error exceed threshold, relative error {mean_diff} (threshold 0.4)' + + +-def zero3_quantized_initialization_test_helper(cpu_offload: bool, nvme_offload: bool, bits: int): ++def zero3_quantized_initialization_test_helper(cpu_offload: bool, nvme_offload: bool, bits: int, tmp_path: Path): + import deepspeed + from transformers.integrations.deepspeed import HfDeepSpeedConfig + +- def get_zero3_ds_config(hf_config: OPTConfig, cpu_offload: bool, nvme_offload: bool, bits: int) -> Dict: ++ def get_zero3_ds_config(hf_config: OPTConfig, cpu_offload: bool, nvme_offload: bool, bits: int, tmp_path: Path) -> Dict: + GB = 1 << 30 + + ds_config = { +@@ -206,7 +207,7 @@ def zero3_quantized_initialization_test_helper(cpu_offload: bool, nvme_offload: + ds_config["zero_optimization"]["offload_param"] = dict( + device="nvme", + pin_memory=True, +- nvme_path='~/tmp_offload_dir', ++ nvme_path=str(tmp_path / "tmp_offload_dir"), + buffer_count=5, + buffer_size=1 * GB, + ) +@@ -221,7 +222,7 @@ def zero3_quantized_initialization_test_helper(cpu_offload: bool, nvme_offload: + return ds_config + + hf_config = AutoConfig.from_pretrained('facebook/opt-125m') +- ds_config = get_zero3_ds_config(hf_config=hf_config, cpu_offload=cpu_offload, nvme_offload=nvme_offload, bits=bits) ++ ds_config = get_zero3_ds_config(hf_config=hf_config, cpu_offload=cpu_offload, nvme_offload=nvme_offload, bits=bits, tmp_path=tmp_path) + + input_ids = torch.ones(1, 16, dtype=torch.int32, device=device) + attention_mask = torch.ones(1, 16, dtype=torch.float32, device=device) +@@ -376,31 +377,31 @@ class TestQuantizedInt(DistributedTest): + quantization_test_helper(torch.float16, 8) + + @pytest.mark.skipif(device == 'cpu', reason='CPU does support FP16 GEMM') +- def test_zero3_int4_post_init_quant(self, quantization_bits): ++ def test_zero3_int4_post_init_quant(self, quantization_bits, tmp_path): + reset_random() +- zero3_post_init_quantization_test_helper(cpu_offload=False, nvme_offload=False, bits=quantization_bits) ++ zero3_post_init_quantization_test_helper(cpu_offload=False, nvme_offload=False, bits=quantization_bits, tmp_path=tmp_path) + + @pytest.mark.skipif(device == 'cpu', reason='CPU does support FP16 GEMM') +- def test_zero3_int4_post_init_quant_cpu_offload(self, quantization_bits): ++ def test_zero3_int4_post_init_quant_cpu_offload(self, quantization_bits, tmp_path): + reset_random() +- zero3_post_init_quantization_test_helper(cpu_offload=True, nvme_offload=False, bits=quantization_bits) ++ zero3_post_init_quantization_test_helper(cpu_offload=True, nvme_offload=False, bits=quantization_bits, tmp_path=tmp_path) + + @pytest.mark.skipif(device == 'cpu', reason='CPU does support FP16 GEMM') +- def test_zero3_int4_post_init_quant_nvme_offload(self): ++ def test_zero3_int4_post_init_quant_nvme_offload(self, tmp_path): + reset_random() +- zero3_post_init_quantization_test_helper(cpu_offload=False, nvme_offload=True, bits=4) ++ zero3_post_init_quantization_test_helper(cpu_offload=False, nvme_offload=True, bits=4, tmp_path=tmp_path) + + @pytest.mark.skipif(device == 'cpu', reason='CPU does support FP16 GEMM') +- def test_zero3_int4_quantized_initialization(self, quantization_bits): ++ def test_zero3_int4_quantized_initialization(self, quantization_bits, tmp_path): + reset_random() +- zero3_quantized_initialization_test_helper(cpu_offload=False, nvme_offload=False, bits=quantization_bits) ++ zero3_quantized_initialization_test_helper(cpu_offload=False, nvme_offload=False, bits=quantization_bits, tmp_path=tmp_path) + + @pytest.mark.skipif(device == 'cpu', reason='CPU does support FP16 GEMM') +- def test_zero3_int4_quantized_initialization_cpu_offload(self, quantization_bits): ++ def test_zero3_int4_quantized_initialization_cpu_offload(self, quantization_bits, tmp_path): + reset_random() +- zero3_quantized_initialization_test_helper(cpu_offload=True, nvme_offload=False, bits=quantization_bits) ++ zero3_quantized_initialization_test_helper(cpu_offload=True, nvme_offload=False, bits=quantization_bits, tmp_path=tmp_path) + + @pytest.mark.skipif(device == 'cpu', reason='CPU does support FP16 GEMM') +- def test_zero3_int4_quantized_initialization_nvme_offload(self): ++ def test_zero3_int4_quantized_initialization_nvme_offload(self, tmp_path): + reset_random() +- zero3_quantized_initialization_test_helper(cpu_offload=False, nvme_offload=True, bits=4) ++ zero3_quantized_initialization_test_helper(cpu_offload=False, nvme_offload=True, bits=4, tmp_path=tmp_path) +-- +2.39.3 + diff --git a/easybuild/easyconfigs/p/pytest-forked/pytest-forked-1.6.0-GCCcore-13.2.0.eb b/easybuild/easyconfigs/p/pytest-forked/pytest-forked-1.6.0-GCCcore-13.2.0.eb new file mode 100644 index 00000000000..56cbb558a23 --- /dev/null +++ b/easybuild/easyconfigs/p/pytest-forked/pytest-forked-1.6.0-GCCcore-13.2.0.eb @@ -0,0 +1,22 @@ +easyblock = 'PythonPackage' + +name = 'pytest-forked' +version = '1.6.0' + +homepage = 'https://github.com/pytest-dev/pytest-forked' +description = "Run tests in isolated forked subprocesses." + +toolchain = {'name': 'GCCcore', 'version': '13.2.0'} + +sources = [SOURCE_TAR_GZ] +checksums = ['4dafd46a9a600f65d822b8f605133ecf5b3e1941ebb3588e943b4e3eb71a5a3f'] + +builddependencies = [ + ('binutils', '2.40'), +] +dependencies = [ + ('Python', '3.11.5'), + ('Python-bundle-PyPI', '2023.10'), +] + +moduleclass = 'tools'