From acdbbffecc86b642b48e9aa04617f44783b34411 Mon Sep 17 00:00:00 2001 From: Stefan Doerr Date: Wed, 18 Mar 2026 10:36:42 +0200 Subject: [PATCH 1/7] rdkit pyodide support added --- packages/libboost-python/meta.yaml | 59 ++++ packages/rdkit/extras/patch_init.py | 52 ++++ packages/rdkit/extras/pyproject.toml | 15 + packages/rdkit/meta.yaml | 195 ++++++++++++ packages/rdkit/test_rdkit.py | 429 +++++++++++++++++++++++++++ 5 files changed, 750 insertions(+) create mode 100644 packages/libboost-python/meta.yaml create mode 100644 packages/rdkit/extras/patch_init.py create mode 100644 packages/rdkit/extras/pyproject.toml create mode 100644 packages/rdkit/meta.yaml create mode 100644 packages/rdkit/test_rdkit.py diff --git a/packages/libboost-python/meta.yaml b/packages/libboost-python/meta.yaml new file mode 100644 index 00000000..850818eb --- /dev/null +++ b/packages/libboost-python/meta.yaml @@ -0,0 +1,59 @@ +package: + name: libboost-python + version: 1.84.0 + tag: + - library + - static_library + +source: + url: https://github.com/boostorg/boost/releases/download/boost-1.84.0/boost-1.84.0.tar.gz + sha256: 4d27e9efed0f6f152dc28db6430b9d3dfb40c0345da7342eaa5a987dde57bd95 + +requirements: + host: + - libzlib + +build: + type: static_library + script: | + # Patch boost.numpy for NumPy 2.x (PyArray_Descr->elsize removed) + sed -i 's/reinterpret_cast(ptr())->elsize/PyDataType_ELSIZE(reinterpret_cast(ptr()))/' \ + libs/python/src/numpy/dtype.cpp + + ./bootstrap.sh --prefix=${WASM_LIBRARY_DIR} \ + --with-libraries=system,python,serialization,iostreams \ + --with-python=python${PYMAJOR}.${PYMINOR} + + # https://github.com/emscripten-core/emscripten/issues/17052 + # Without this, boost outputs WASM modules not static library archives. + printf "using clang : emscripten : emcc : emar emranlib emlink ;\n" \ + | tee -a ./project-config.jam + + # Bypass standard python detection, specify cross-compilation paths + sed -i 's/using python/#using python/' ./project-config.jam + NUMPY_INC=$(python3 -c "import numpy; print(numpy.get_include())") + printf "using python : ${PYMAJOR}.${PYMINOR} : python${PYMAJOR}.${PYMINOR} : ${PYTHONINCLUDE} ${NUMPY_INC} ;\n" \ + | tee -a ./project-config.jam + + ./b2 variant=release toolset=clang-emscripten link=static threading=single \ + address-model=32 --disable-icu \ + cxxflags="$SIDE_MODULE_CXXFLAGS -fwasm-exceptions -std=c++20 -DBOOST_SP_DISABLE_THREADS=1" \ + cflags="$SIDE_MODULE_CFLAGS -fwasm-exceptions -DBOOST_SP_DISABLE_THREADS=1" \ + linkflags="-fpic $SIDE_MODULE_LDFLAGS" \ + python=${PYMAJOR}.${PYMINOR} \ + -sZLIB_INCLUDE=${WASM_LIBRARY_DIR}/include \ + -sZLIB_LIBPATH=${WASM_LIBRARY_DIR}/lib \ + --layout=system -j"${PYODIDE_JOBS:-3}" --prefix=${WASM_LIBRARY_DIR} \ + install || true + + # Verify key libraries were built + echo "=== Boost libraries ===" + ls -la ${WASM_LIBRARY_DIR}/lib/libboost_python*.a + ls -la ${WASM_LIBRARY_DIR}/lib/libboost_numpy*.a + ls -la ${WASM_LIBRARY_DIR}/lib/libboost_serialization*.a + ls -la ${WASM_LIBRARY_DIR}/lib/libboost_iostreams*.a + +about: + home: https://www.boost.org/ + summary: Boost C++ libraries with Python and NumPy support for Emscripten + license: Boost diff --git a/packages/rdkit/extras/patch_init.py b/packages/rdkit/extras/patch_init.py new file mode 100644 index 00000000..81feeeef --- /dev/null +++ b/packages/rdkit/extras/patch_init.py @@ -0,0 +1,52 @@ +"""Patch rdkit/__init__.py to auto-load .so.wasm files on emscripten. + +This makes 'micropip.install()' + 'import rdkit' just work: +1. .so files are renamed to .so.wasm so micropip doesn't auto-load them +2. __init__.py loads librdkit_core.so.wasm with {global: true} on first import +3. A custom import finder loads wrapper .so.wasm via ExtensionFileLoader +""" + +init_path = "rdkit/__init__.py" +init = open(init_path).read() + +loader = '''import sys as _sys + +if _sys.platform == 'emscripten': + import os as _os + import importlib.abc + import importlib.machinery + + # Set RDBASE so RDConfig.py finds Data/, Docs/, etc. relative to this package + _os.environ['RDBASE'] = _os.path.dirname(__file__) + + from pyodide_js._module import loadDynamicLibrary as _ldl + import js as _js + _core = _os.path.join(_os.path.dirname(__file__), 'librdkit_core.so.wasm') + _ldl(_core, _js.JSON.parse('{"global": true}')) + del _ldl, _js, _core + + class _RDKitExtensionFinder(importlib.abc.MetaPathFinder): + def find_spec(self, fullname, path, target=None): + parts = fullname.split('.') + if parts[0] != 'rdkit': + return None + modname = parts[-1] + if path: + for d in path: + candidate = _os.path.join(d, modname + '.so.wasm') + if _os.path.exists(candidate): + loader = importlib.machinery.ExtensionFileLoader( + fullname, candidate + ) + return importlib.util.spec_from_file_location( + fullname, candidate, loader=loader, + ) + return None + + import importlib.util + _sys.meta_path.insert(0, _RDKitExtensionFinder()) + +''' + +open(init_path, "w").write(loader + init) +print("Patched rdkit/__init__.py") diff --git a/packages/rdkit/extras/pyproject.toml b/packages/rdkit/extras/pyproject.toml new file mode 100644 index 00000000..8a2d993e --- /dev/null +++ b/packages/rdkit/extras/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "rdkit" +version = "0.0.0" +description = "RDKit cheminformatics library for Pyodide" + +[tool.setuptools.packages.find] +include = ["rdkit*"] + +[tool.setuptools.package-data] +"*" = ["*.so.wasm"] +rdkit = ["Data/**/*"] diff --git a/packages/rdkit/meta.yaml b/packages/rdkit/meta.yaml new file mode 100644 index 00000000..54e1a80a --- /dev/null +++ b/packages/rdkit/meta.yaml @@ -0,0 +1,195 @@ +package: + name: rdkit + version: 2025.9.6 + top-level: + - rdkit + +source: + url: https://github.com/rdkit/rdkit/archive/refs/tags/Release_2025_09_6.tar.gz + sha256: 57b92e8f47d9dbd559bd808d5cf6c48a628bc36118bc35b832a35e2ca8a0c7a1 + extract_dir: rdkit-Release_2025_09_6 + + extras: + - [extras/pyproject.toml, pyproject.toml] + - [extras/patch_init.py, patch_init.py] + +requirements: + host: + - libboost-python + - libzlib + - numpy + run: + - numpy + +build: + script: | + set -e + + # ======================== + # 1. Setup + # ======================== + NUMPY_INC="$HOSTINSTALLDIR/lib/python$PYMAJOR.$PYMINOR/site-packages/numpy/_core/include/" + + # Download Eigen3 headers (header-only) + if [ ! -d "eigen-3.4.0" ]; then + wget -q https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz + tar xf eigen-3.4.0.tar.gz + fi + + # Dummy libpython — side modules don't link against it, but CMake's FindPython3 needs it + emar rcs /tmp/libpython${PYMAJOR}.${PYMINOR}.a + + # Patch CMakeLists.txt for Emscripten support + sed -i 's/OR RDK_BUILD_MINIMAL_LIB)/OR EMSCRIPTEN OR RDK_BUILD_MINIMAL_LIB)/' CMakeLists.txt + + # Set version in pyproject.toml + sed -i "s/^version = .*/version = \"${PKG_VERSION}\"/" pyproject.toml + + # ======================== + # 2. CMake configure + # ======================== + mkdir -p build && cd build + + emcmake cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DRDK_BUILD_PYTHON_WRAPPERS=ON \ + -DRDK_BUILD_MINIMAL_LIB=OFF \ + -DRDK_BUILD_CPP_TESTS=OFF \ + -DRDK_BUILD_INCHI_SUPPORT=ON \ + -DRDK_USE_BOOST_SERIALIZATION=ON \ + -DRDK_USE_BOOST_IOSTREAMS=ON \ + -DRDK_OPTIMIZE_POPCNT=OFF \ + -DRDK_BUILD_THREADSAFE_SSS=OFF \ + -DRDK_BUILD_DESCRIPTORS3D=ON \ + -DRDK_TEST_MULTITHREADED=OFF \ + -DRDK_BUILD_CHEMDRAW_SUPPORT=OFF \ + -DRDK_BUILD_MAEPARSER_SUPPORT=ON \ + -DRDK_BUILD_COORDGEN_SUPPORT=OFF \ + -DRDK_BUILD_SLN_SUPPORT=ON \ + -DRDK_BUILD_CAIRO_SUPPORT=OFF \ + -DRDK_BUILD_QT_SUPPORT=OFF \ + -DRDK_BUILD_PGSQL=OFF \ + -DRDK_BUILD_SWIG_WRAPPERS=OFF \ + -DRDK_BUILD_FREETYPE_SUPPORT=OFF \ + -DRDK_BUILD_AVALON_SUPPORT=ON \ + -DRDK_BUILD_YAEHMOP_SUPPORT=OFF \ + -DRDK_BUILD_XYZ2MOL_SUPPORT=ON \ + -DRDK_INSTALL_INTREE=ON \ + -DRDK_INSTALL_STATIC_LIBS=ON \ + -DRDK_BUILD_STATIC_LIBS_ONLY=ON \ + -DBoost_USE_STATIC_LIBS=ON \ + -DCMAKE_PREFIX_PATH="${WASM_LIBRARY_DIR}" \ + -DCMAKE_FIND_ROOT_PATH="${WASM_LIBRARY_DIR}" \ + -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ + -DBoost_INCLUDE_DIR=${WASM_LIBRARY_DIR}/include \ + -DCMAKE_INCLUDE_PATH="${WASM_LIBRARY_DIR}/include" \ + -DCMAKE_LIBRARY_PATH="${WASM_LIBRARY_DIR}/lib" \ + -DZLIB_INCLUDE_DIR=${WASM_LIBRARY_DIR}/include \ + -DZLIB_LIBRARY=${WASM_LIBRARY_DIR}/lib/libz.a \ + -DEIGEN3_INCLUDE_DIR=$PWD/../eigen-3.4.0 \ + -DPython3_INCLUDE_DIR=${PYTHONINCLUDE} \ + -DPython3_LIBRARY=/tmp/libpython${PYMAJOR}.${PYMINOR}.a \ + -DPython3_NumPy_INCLUDE_DIR=${NUMPY_INC} \ + -DPython3_EXECUTABLE=python${PYMAJOR}.${PYMINOR} \ + -DCMAKE_CXX_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC -std=c++20" \ + -DCMAKE_C_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC" \ + .. + + # ======================== + # 3. Build + # ======================== + # Use -k to keep going past wrapper MODULE link failures (expected in + # cross-compilation — we relink all wrappers ourselves in step 4). + # The important outputs are the static .a libraries and .o object files. + emmake make -k -j ${PYODIDE_JOBS:-3} || true + cd .. + + # ======================== + # 4. Relink static archives into WASM side modules + # ======================== + # CMake produces .a archives (Emscripten default for MODULE targets). + # Strategy: one core .so with ALL C++ code (loaded with {global: true}), + # then thin wrapper .so per Python extension module. + + # 4a. Collect all static libraries for the core + find build/lib -name '*.a' | sort > /tmp/all_libs.txt + for lib in ${WASM_LIBRARY_DIR}/lib/libboost_python*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_numpy*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_system*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_serialization*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_iostreams*.a; do + [ -f "$lib" ] && echo "$lib" >> /tmp/all_libs.txt + done + echo "${WASM_LIBRARY_DIR}/lib/libz.a" >> /tmp/all_libs.txt + echo "Core libraries: $(wc -l < /tmp/all_libs.txt)" + + # Link all archives into one core side module + WHOLE_ARGS="" + while read -r lib; do + WHOLE_ARGS="$WHOLE_ARGS -Wl,--whole-archive $lib -Wl,--no-whole-archive" + done < /tmp/all_libs.txt + + em++ -fwasm-exceptions -sSIDE_MODULE=1 -O2 -shared \ + -Wl,--no-gc-sections -Wl,--export-all \ + $WHOLE_ARGS \ + -o build/librdkit_core.so + ls -lh build/librdkit_core.so + + # 4b. Relink each Python wrapper as a thin .so and place it in the + # correct package directory. The output path is derived from cmake's + # build rules (LIBRARY_OUTPUT_DIRECTORY), so new/renamed modules are + # handled automatically without hardcoded mappings. + WRAPPER_COUNT=0 + for wrap_dir in $(find build -path '*/Wrap/CMakeFiles/*.dir' -type d 2>/dev/null); do + modname=$(basename "$wrap_dir" .dir) + obj_files=$(find "$wrap_dir" -name '*.o' 2>/dev/null) + if [ -z "$obj_files" ]; then + echo "SKIP: $modname (no .o files)" + continue + fi + # Derive the correct output path from cmake's build.make + build_make="$wrap_dir/build.make" + dest_path=$(grep -oP "rdkit/[^ ]*${modname}\.so" "$build_make" 2>/dev/null | head -1) + if [ -z "$dest_path" ]; then + echo "SKIP: $modname (no output path found in cmake)" + continue + fi + mkdir -p "$(dirname "$dest_path")" + em++ -fwasm-exceptions -sSIDE_MODULE=1 -O2 -shared \ + $obj_files \ + -o "$dest_path" + echo "LINKED: $modname -> $dest_path" + WRAPPER_COUNT=$((WRAPPER_COUNT + 1)) + done + echo "Total wrapper .so files: $WRAPPER_COUNT" + + # ======================== + # 5. Assemble Python package + # ======================== + # Copy core .so into rdkit package + cp build/librdkit_core.so rdkit/ + + # Copy Data directory into package + cp -r Data rdkit/Data + + # Rename .so -> .so.wasm (prevents micropip from auto-loading before core is ready) + find rdkit -name "*.so" -exec sh -c 'mv "$1" "$1.wasm"' _ {} \; + + # Patch __init__.py to load core and register custom import finder + python3 patch_init.py + + echo "=== Package contents ===" + find rdkit -name "*.so.wasm" | sort + echo "---" + du -sh rdkit + +test: + imports: + - rdkit + - rdkit.Chem + +about: + home: https://github.com/rdkit/rdkit + PyPI: https://pypi.org/project/rdkit + summary: RDKit cheminformatics library for Pyodide + license: BSD-3-Clause diff --git a/packages/rdkit/test_rdkit.py b/packages/rdkit/test_rdkit.py new file mode 100644 index 00000000..9ca4aaf9 --- /dev/null +++ b/packages/rdkit/test_rdkit.py @@ -0,0 +1,429 @@ +import pytest +from pytest_pyodide import run_in_pyodide + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_smiles_parsing(selenium): + from rdkit import Chem + + mol = Chem.MolFromSmiles("CCO") + assert mol is not None + assert mol.GetNumAtoms() == 3 + assert mol.GetNumBonds() == 2 + + # Canonical SMILES + assert Chem.MolToSmiles(Chem.MolFromSmiles("OCC")) == "CCO" + + # Aromatic molecules + benzene = Chem.MolFromSmiles("c1ccccc1") + assert benzene is not None + assert benzene.GetNumAtoms() == 6 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_substructure_search(selenium): + from rdkit import Chem + + mol = Chem.MolFromSmiles("CCO") + benzene = Chem.MolFromSmiles("c1ccccc1") + pat = Chem.MolFromSmarts("[OH]") + assert mol.HasSubstructMatch(pat) + assert not benzene.HasSubstructMatch(pat) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_inchi(selenium): + from rdkit import Chem + from rdkit.Chem import inchi + + mol = Chem.MolFromSmiles("CCO") + inchi_str = inchi.MolToInchi(mol) + assert inchi_str.startswith("InChI=") + + # Roundtrip + mol2 = inchi.MolFromInchi(inchi_str) + assert Chem.MolToSmiles(mol2) == "CCO" + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_molblock_roundtrip(selenium): + from rdkit import Chem + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + molblock = Chem.MolToMolBlock(aspirin) + assert "V2000" in molblock + aspirin2 = Chem.MolFromMolBlock(molblock) + assert Chem.MolToSmiles(aspirin) == Chem.MolToSmiles(aspirin2) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_ring_info(selenium): + from rdkit import Chem + + benzene = Chem.MolFromSmiles("c1ccccc1") + ri = benzene.GetRingInfo() + assert ri.NumRings() == 1 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_atom_bond_properties(selenium): + from rdkit import Chem + + mol = Chem.MolFromSmiles("CCO") + atom = mol.GetAtomWithIdx(2) + assert atom.GetSymbol() == "O" + assert atom.GetAtomicNum() == 8 + bond = mol.GetBondWithIdx(1) + assert bond.GetBondTypeAsDouble() == 1.0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_molecular_formula(selenium): + from rdkit import Chem + from rdkit.Chem import rdMolDescriptors + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + assert rdMolDescriptors.CalcMolFormula(aspirin) == "C9H8O4" + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_chemical_reactions(selenium): + from rdkit.Chem import AllChem + + rxn = AllChem.ReactionFromSmarts("[C:1](=O)[OH].[N:2]>>[C:1](=O)[N:2]") + assert rxn is not None + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_2d_coords_and_svg(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem + from rdkit.Chem.Draw import rdMolDraw2D + + benzene = Chem.MolFromSmiles("c1ccccc1") + AllChem.Compute2DCoords(benzene) + conf = benzene.GetConformer() + pos = conf.GetAtomPosition(0) + assert not (pos.x == 0.0 and pos.y == 0.0) + + # SVG drawing + drawer = rdMolDraw2D.MolDraw2DSVG(300, 300) + drawer.DrawMolecule(benzene) + drawer.FinishDrawing() + svg = drawer.GetDrawingText() + assert "" in svg + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_drug_molecules(selenium): + from rdkit import Chem + + drugs = { + "caffeine": "Cn1c(=O)c2c(ncn2C)n(C)c1=O", + "ibuprofen": "CC(C)Cc1ccc(cc1)C(C)C(=O)O", + "penicillin_G": "CC1(C)SC2C(NC(=O)Cc3ccccc3)C(=O)N2C1C(=O)O", + } + for name, smi in drugs.items(): + m = Chem.MolFromSmiles(smi) + assert m is not None, f"Failed to parse {name}" + can = Chem.MolToSmiles(m) + m2 = Chem.MolFromSmiles(can) + assert Chem.MolToSmiles(m2) == can + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_binary_serialization(selenium): + from rdkit import Chem + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + aspirin_bin = aspirin.ToBinary() + assert len(aspirin_bin) > 0 + aspirin2 = Chem.Mol(aspirin_bin) + assert Chem.MolToSmiles(aspirin2) == Chem.MolToSmiles(aspirin) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_fingerprint_numpy(selenium): + import numpy as np + from rdkit import Chem + from rdkit.Chem import rdFingerprintGenerator + + mol = Chem.MolFromSmiles("CCO") + fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2) + fp_np = fpgen.GetFingerprintAsNumPy(mol) + assert isinstance(fp_np, np.ndarray) + assert fp_np.shape[0] > 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_3d_embedding_and_optimization(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem, Descriptors3D + + # 3D embedding + mol = Chem.MolFromSmiles("c1ccc(O)cc1") + mol = Chem.AddHs(mol) + res = AllChem.EmbedMolecule(mol, randomSeed=42) + assert res == 0 + conf = mol.GetConformer() + assert conf.Is3D() + + # UFF optimization + res_opt = AllChem.UFFOptimizeMolecule(mol, maxIters=200) + assert res_opt == 0 + + # 3D descriptors + butane = Chem.MolFromSmiles("CCCC") + butane = Chem.AddHs(butane) + AllChem.EmbedMolecule(butane, randomSeed=42) + asphericity = Descriptors3D.Asphericity(butane) + assert asphericity >= 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_add_remove_hydrogens(selenium): + from rdkit import Chem + + phenol = Chem.MolFromSmiles("c1ccc(O)cc1") + assert phenol.GetNumAtoms() == 7 + phenol_h = Chem.AddHs(phenol) + assert phenol_h.GetNumAtoms() == 13 + phenol_noh = Chem.RemoveHs(phenol_h) + assert phenol_noh.GetNumAtoms() == 7 + assert Chem.MolToSmiles(phenol) == Chem.MolToSmiles(phenol_noh) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_kekulization(selenium): + from rdkit import Chem + + arom = Chem.MolFromSmiles("c1ccccc1") + bond = arom.GetBondWithIdx(0) + assert bond.GetIsAromatic() + assert bond.GetBondType() == Chem.rdchem.BondType.AROMATIC + + Chem.Kekulize(arom, clearAromaticFlags=True) + bond_k = arom.GetBondWithIdx(0) + assert not bond_k.GetIsAromatic() + assert bond_k.GetBondType() in ( + Chem.rdchem.BondType.SINGLE, + Chem.rdchem.BondType.DOUBLE, + ) + kek_smi = Chem.MolToSmiles(arom, kekuleSmiles=True) + assert "c" not in kek_smi + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_mmff_optimization(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem + + mol = Chem.MolFromSmiles("c1ccc(O)cc1") + mol = Chem.AddHs(mol) + AllChem.EmbedMolecule(mol, randomSeed=42) + props = AllChem.MMFFGetMoleculeProperties(mol) + assert props is not None + ff = AllChem.MMFFGetMoleculeForceField(mol, props) + assert ff is not None + e_before = ff.CalcEnergy() + res = AllChem.MMFFOptimizeMolecule(mol, maxIters=200) + assert res == 0 + ff2 = AllChem.MMFFGetMoleculeForceField( + mol, AllChem.MMFFGetMoleculeProperties(mol) + ) + e_after = ff2.CalcEnergy() + assert e_after <= e_before + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_multiple_conformers(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem + + mol = Chem.MolFromSmiles("CCCCCCC") + mol = Chem.AddHs(mol) + params = AllChem.ETKDGv3() + params.randomSeed = 42 + params.numThreads = 1 + cids = AllChem.EmbedMultipleConfs(mol, numConfs=5, params=params) + assert len(cids) == 5 + assert mol.GetNumConformers() == 5 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_molecular_alignment(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem, rdMolAlign + + mol = Chem.MolFromSmiles("CCCCCCC") + mol = Chem.AddHs(mol) + params = AllChem.ETKDGv3() + params.randomSeed = 42 + params.numThreads = 1 + AllChem.EmbedMultipleConfs(mol, numConfs=3, params=params) + rmsds = [] + rdMolAlign.AlignMolConformers(mol, RMSlist=rmsds) + assert len(rmsds) > 0 + + # Align two molecules + ref = Chem.AddHs(Chem.MolFromSmiles("c1ccccc1O")) + AllChem.EmbedMolecule(ref, randomSeed=42) + probe = Chem.AddHs(Chem.MolFromSmiles("c1ccccc1O")) + AllChem.EmbedMolecule(probe, randomSeed=123) + rmsd = rdMolAlign.AlignMol(probe, ref) + assert rmsd >= 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_mcs(selenium): + from rdkit import Chem + from rdkit.Chem import rdFMCS + + mol_a = Chem.MolFromSmiles("c1ccccc1CCO") + mol_b = Chem.MolFromSmiles("c1ccccc1CCCO") + mcs = rdFMCS.FindMCS([mol_a, mol_b]) + assert mcs.numAtoms > 0 + assert mcs.numBonds > 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_tautomer_enumeration(selenium): + from rdkit import Chem + from rdkit.Chem.MolStandardize import rdMolStandardize + + taut_enum = rdMolStandardize.TautomerEnumerator() + keto = Chem.MolFromSmiles("OC1=CC=CC=C1") + canonical = taut_enum.Canonicalize(keto) + assert canonical is not None + tautomers = list(taut_enum.Enumerate(keto)) + assert len(tautomers) >= 1 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_salt_removal(selenium): + from rdkit import Chem + from rdkit.Chem.SaltRemover import SaltRemover + + remover = SaltRemover() + salt_mol = Chem.MolFromSmiles("[Na+].OC1=CC=CC=C1") + stripped = remover.StripMol(salt_mol) + assert stripped is not None + assert stripped.GetNumAtoms() < salt_mol.GetNumAtoms() + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_stereochemistry(selenium): + from rdkit import Chem + from rdkit.Chem import AllChem + + chiral = Chem.MolFromSmiles("C[C@@H](O)F") + Chem.AssignStereochemistry(chiral, cleanIt=True, force=True) + stereo_atom = chiral.GetAtomWithIdx(1) + assert stereo_atom.GetChiralTag() != Chem.rdchem.ChiralType.CHI_UNSPECIFIED + + # AssignStereochemistryFrom3D + mol_3d = Chem.AddHs(Chem.MolFromSmiles("C[C@@H](O)F")) + AllChem.EmbedMolecule(mol_3d, randomSeed=42) + Chem.AssignStereochemistryFrom3D(mol_3d) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_rwmol_and_combine(selenium): + from rdkit import Chem + + rwmol = Chem.RWMol(Chem.MolFromSmiles("C")) + idx = rwmol.AddAtom(Chem.Atom(8)) + rwmol.AddBond(0, idx, Chem.rdchem.BondType.SINGLE) + Chem.SanitizeMol(rwmol) + assert Chem.MolToSmiles(rwmol) == "CO" + + combined = Chem.CombineMols(Chem.MolFromSmiles("C"), Chem.MolFromSmiles("O")) + assert combined.GetNumAtoms() == 2 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_descriptors(selenium): + from rdkit import Chem + from rdkit.Chem import Descriptors + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + assert len(Descriptors.descList) > 0 + mw = Descriptors.MolWt(aspirin) + assert 170 < mw < 190 + logp = Descriptors.MolLogP(aspirin) + assert isinstance(logp, float) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_maccs_fingerprints(selenium): + from rdkit import Chem + from rdkit.Chem import MACCSkeys + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + maccs = MACCSkeys.GenMACCSKeys(aspirin) + assert maccs is not None + assert maccs.GetNumOnBits() > 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_sdf_read_write(selenium): + from rdkit import Chem + + aspirin = Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O") + benzene = Chem.MolFromSmiles("c1ccccc1") + + sdf_out = Chem.SDWriter("/tmp/test.sdf") + sdf_out.write(aspirin) + sdf_out.write(benzene) + sdf_out.close() + + suppl = Chem.SDMolSupplier("/tmp/test.sdf") + mols = [m for m in suppl if m is not None] + assert len(mols) == 2 + assert Chem.MolToSmiles(mols[0]) == Chem.MolToSmiles(aspirin) + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_data_dir_and_chemical_features(selenium): + import os + from rdkit import RDConfig, Chem + from rdkit.Chem import AllChem, ChemicalFeatures + + assert os.path.isdir(RDConfig.RDDataDir) + fdef_path = os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef") + assert os.path.isfile(fdef_path) + + feat_factory = ChemicalFeatures.BuildFeatureFactory(fdef_path) + assert feat_factory is not None + + aspirin = Chem.AddHs(Chem.MolFromSmiles("CC(=O)Oc1ccccc1C(=O)O")) + AllChem.EmbedMolecule(aspirin, randomSeed=42) + feats = feat_factory.GetFeaturesForMol(aspirin) + assert len(feats) > 0 From ddc5c622266af7dceaed0e6acc70b57794b35576 Mon Sep 17 00:00:00 2001 From: Stefan Doerr Date: Wed, 18 Mar 2026 11:46:39 +0200 Subject: [PATCH 2/7] enabled chemdraw and coordgen support --- packages/rdkit/meta.yaml | 7 ++++--- packages/rdkit/test_rdkit.py | 38 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/packages/rdkit/meta.yaml b/packages/rdkit/meta.yaml index 54e1a80a..0e820a19 100644 --- a/packages/rdkit/meta.yaml +++ b/packages/rdkit/meta.yaml @@ -62,9 +62,9 @@ build: -DRDK_BUILD_THREADSAFE_SSS=OFF \ -DRDK_BUILD_DESCRIPTORS3D=ON \ -DRDK_TEST_MULTITHREADED=OFF \ - -DRDK_BUILD_CHEMDRAW_SUPPORT=OFF \ + -DRDK_BUILD_CHEMDRAW_SUPPORT=ON \ -DRDK_BUILD_MAEPARSER_SUPPORT=ON \ - -DRDK_BUILD_COORDGEN_SUPPORT=OFF \ + -DRDK_BUILD_COORDGEN_SUPPORT=ON \ -DRDK_BUILD_SLN_SUPPORT=ON \ -DRDK_BUILD_CAIRO_SUPPORT=OFF \ -DRDK_BUILD_QT_SUPPORT=OFF \ @@ -112,7 +112,8 @@ build: # then thin wrapper .so per Python extension module. # 4a. Collect all static libraries for the core - find build/lib -name '*.a' | sort > /tmp/all_libs.txt + # Search build/lib/ plus External/ for bundled deps like expat + find build/lib build/External -name '*.a' 2>/dev/null | sort > /tmp/all_libs.txt for lib in ${WASM_LIBRARY_DIR}/lib/libboost_python*.a \ ${WASM_LIBRARY_DIR}/lib/libboost_numpy*.a \ ${WASM_LIBRARY_DIR}/lib/libboost_system*.a \ diff --git a/packages/rdkit/test_rdkit.py b/packages/rdkit/test_rdkit.py index 9ca4aaf9..f27a0c2f 100644 --- a/packages/rdkit/test_rdkit.py +++ b/packages/rdkit/test_rdkit.py @@ -427,3 +427,41 @@ def test_data_dir_and_chemical_features(selenium): AllChem.EmbedMolecule(aspirin, randomSeed=42) feats = feat_factory.GetFeaturesForMol(aspirin) assert len(feats) > 0 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_coordgen_2d_coords(selenium): + from rdkit import Chem + from rdkit.Chem import rdCoordGen, AllChem + + mol = Chem.MolFromSmiles("c1ccc2c(c1)cc1ccc3cccc4ccc2c1c34") # pyrene + rdCoordGen.AddCoords(mol) + conf = mol.GetConformer() + assert conf.GetNumAtoms() == mol.GetNumAtoms() + + # Verify coordinates are non-degenerate (not all at origin) + positions = [conf.GetAtomPosition(i) for i in range(mol.GetNumAtoms())] + xs = [p.x for p in positions] + ys = [p.y for p in positions] + assert max(xs) - min(xs) > 0.1 + assert max(ys) - min(ys) > 0.1 + + +@pytest.mark.driver_timeout(60) +@run_in_pyodide(packages=["rdkit"]) +def test_chemdraw_cdxml(selenium): + from rdkit import Chem + from rdkit.Chem import rdChemDraw + + # Write a molecule to CDXML and read it back + mol = Chem.MolFromSmiles("c1ccccc1O") + Chem.AssignStereochemistry(mol) + cdxml = rdChemDraw.MolToCDXML(mol) + assert "" in cdxml + + # Roundtrip: CDXML -> mol + mols = rdChemDraw.MolsFromCDXML(cdxml) + assert len(mols) >= 1 + assert mols[0].GetNumAtoms() == mol.GetNumAtoms() From 320deaa7a80ce257101cd936ece0f9fb3446a708 Mon Sep 17 00:00:00 2001 From: Stefan Doerr Date: Wed, 18 Mar 2026 11:48:09 +0200 Subject: [PATCH 3/7] fix test --- packages/rdkit/test_rdkit.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/rdkit/test_rdkit.py b/packages/rdkit/test_rdkit.py index f27a0c2f..8659e2fa 100644 --- a/packages/rdkit/test_rdkit.py +++ b/packages/rdkit/test_rdkit.py @@ -450,18 +450,18 @@ def test_coordgen_2d_coords(selenium): @pytest.mark.driver_timeout(60) @run_in_pyodide(packages=["rdkit"]) -def test_chemdraw_cdxml(selenium): +def test_chemdraw(selenium): from rdkit import Chem - from rdkit.Chem import rdChemDraw + from rdkit.Chem import AllChem, rdChemDraw - # Write a molecule to CDXML and read it back + # Generate 2D coords (needed for ChemDraw output) mol = Chem.MolFromSmiles("c1ccccc1O") - Chem.AssignStereochemistry(mol) - cdxml = rdChemDraw.MolToCDXML(mol) - assert "" in cdxml + AllChem.Compute2DCoords(mol) - # Roundtrip: CDXML -> mol - mols = rdChemDraw.MolsFromCDXML(cdxml) + # Write to ChemDraw format and read back + cdx = rdChemDraw.MolToChemDrawBlock(mol) + assert len(cdx) > 0 + + mols = rdChemDraw.MolsFromChemDrawBlock(cdx) assert len(mols) >= 1 assert mols[0].GetNumAtoms() == mol.GetNumAtoms() From 8f44b903cbab88ce167f60a1ff5b8895589b90f6 Mon Sep 17 00:00:00 2001 From: Stefan Doerr Date: Wed, 18 Mar 2026 13:58:09 +0200 Subject: [PATCH 4/7] test splitting rdkit and librdkit to load asynchronously the larger .so file --- packages/librdkit/meta.yaml | 26 ++++++++++++++++++++++++++ packages/rdkit/extras/patch_init.py | 17 ++++++----------- packages/rdkit/meta.yaml | 27 +++++++++++++++------------ 3 files changed, 47 insertions(+), 23 deletions(-) create mode 100644 packages/librdkit/meta.yaml diff --git a/packages/librdkit/meta.yaml b/packages/librdkit/meta.yaml new file mode 100644 index 00000000..a8478316 --- /dev/null +++ b/packages/librdkit/meta.yaml @@ -0,0 +1,26 @@ +package: + name: librdkit + version: 2025.9.6 + tag: + - library + - shared_library + +source: + url: https://github.com/rdkit/rdkit/archive/refs/tags/Release_2025_09_6.tar.gz + sha256: 57b92e8f47d9dbd559bd808d5cf6c48a628bc36118bc35b832a35e2ca8a0c7a1 + extract_dir: rdkit-Release_2025_09_6 + +requirements: + host: + - rdkit + +build: + type: shared_library + script: | + cp ${WASM_LIBRARY_DIR}/lib/librdkit_core.so ${DISTDIR} + ls -lh ${DISTDIR}/librdkit_core.so + +about: + home: https://github.com/rdkit/rdkit + summary: RDKit core C++ libraries as a WASM side module + license: BSD-3-Clause diff --git a/packages/rdkit/extras/patch_init.py b/packages/rdkit/extras/patch_init.py index 81feeeef..b7c7e95d 100644 --- a/packages/rdkit/extras/patch_init.py +++ b/packages/rdkit/extras/patch_init.py @@ -1,9 +1,10 @@ -"""Patch rdkit/__init__.py to auto-load .so.wasm files on emscripten. +"""Patch rdkit/__init__.py for emscripten/Pyodide support. -This makes 'micropip.install()' + 'import rdkit' just work: -1. .so files are renamed to .so.wasm so micropip doesn't auto-load them -2. __init__.py loads librdkit_core.so.wasm with {global: true} on first import -3. A custom import finder loads wrapper .so.wasm via ExtensionFileLoader +The librdkit_core.so shared library is loaded by Pyodide with {global: true} +via the librdkit shared_library package (asynchronously during loadPackage). +This patch only needs to: +1. Set RDBASE for RDConfig.py path resolution +2. Register a custom MetaPathFinder to load .so.wasm wrapper modules """ init_path = "rdkit/__init__.py" @@ -19,12 +20,6 @@ # Set RDBASE so RDConfig.py finds Data/, Docs/, etc. relative to this package _os.environ['RDBASE'] = _os.path.dirname(__file__) - from pyodide_js._module import loadDynamicLibrary as _ldl - import js as _js - _core = _os.path.join(_os.path.dirname(__file__), 'librdkit_core.so.wasm') - _ldl(_core, _js.JSON.parse('{"global": true}')) - del _ldl, _js, _core - class _RDKitExtensionFinder(importlib.abc.MetaPathFinder): def find_spec(self, fullname, path, target=None): parts = fullname.split('.') diff --git a/packages/rdkit/meta.yaml b/packages/rdkit/meta.yaml index 0e820a19..4afcc533 100644 --- a/packages/rdkit/meta.yaml +++ b/packages/rdkit/meta.yaml @@ -19,6 +19,7 @@ requirements: - libzlib - numpy run: + - librdkit - numpy build: @@ -90,7 +91,7 @@ build: -DPython3_INCLUDE_DIR=${PYTHONINCLUDE} \ -DPython3_LIBRARY=/tmp/libpython${PYMAJOR}.${PYMINOR}.a \ -DPython3_NumPy_INCLUDE_DIR=${NUMPY_INC} \ - -DPython3_EXECUTABLE=python${PYMAJOR}.${PYMINOR} \ + -DPython3_EXECUTABLE=$(which python${PYMAJOR}.${PYMINOR}) \ -DCMAKE_CXX_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC -std=c++20" \ -DCMAKE_C_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC" \ .. @@ -107,12 +108,11 @@ build: # ======================== # 4. Relink static archives into WASM side modules # ======================== - # CMake produces .a archives (Emscripten default for MODULE targets). - # Strategy: one core .so with ALL C++ code (loaded with {global: true}), - # then thin wrapper .so per Python extension module. + # Strategy: one core .so with ALL C++ code (loaded by Pyodide with + # {global: true} via the librdkit shared_library package), then thin + # wrapper .so per Python extension module. # 4a. Collect all static libraries for the core - # Search build/lib/ plus External/ for bundled deps like expat find build/lib build/External -name '*.a' 2>/dev/null | sort > /tmp/all_libs.txt for lib in ${WASM_LIBRARY_DIR}/lib/libboost_python*.a \ ${WASM_LIBRARY_DIR}/lib/libboost_numpy*.a \ @@ -124,7 +124,6 @@ build: echo "${WASM_LIBRARY_DIR}/lib/libz.a" >> /tmp/all_libs.txt echo "Core libraries: $(wc -l < /tmp/all_libs.txt)" - # Link all archives into one core side module WHOLE_ARGS="" while read -r lib; do WHOLE_ARGS="$WHOLE_ARGS -Wl,--whole-archive $lib -Wl,--no-whole-archive" @@ -136,6 +135,12 @@ build: -o build/librdkit_core.so ls -lh build/librdkit_core.so + # Stage core .so for the librdkit shared_library recipe to package. + # NOT included in the wheel — Pyodide loads it asynchronously with + # {global: true} via the librdkit dynlib package, avoiding Chrome's + # 8MB synchronous WebAssembly.Compile limit. + cp build/librdkit_core.so ${WASM_LIBRARY_DIR}/lib/ + # 4b. Relink each Python wrapper as a thin .so and place it in the # correct package directory. The output path is derived from cmake's # build rules (LIBRARY_OUTPUT_DIRECTORY), so new/renamed modules are @@ -148,7 +153,6 @@ build: echo "SKIP: $modname (no .o files)" continue fi - # Derive the correct output path from cmake's build.make build_make="$wrap_dir/build.make" dest_path=$(grep -oP "rdkit/[^ ]*${modname}\.so" "$build_make" 2>/dev/null | head -1) if [ -z "$dest_path" ]; then @@ -167,16 +171,15 @@ build: # ======================== # 5. Assemble Python package # ======================== - # Copy core .so into rdkit package - cp build/librdkit_core.so rdkit/ - # Copy Data directory into package cp -r Data rdkit/Data - # Rename .so -> .so.wasm (prevents micropip from auto-loading before core is ready) + # Rename .so -> .so.wasm (prevents micropip from auto-loading before + # core is ready — the core is loaded by Pyodide via the librdkit + # shared_library package with {global: true}) find rdkit -name "*.so" -exec sh -c 'mv "$1" "$1.wasm"' _ {} \; - # Patch __init__.py to load core and register custom import finder + # Patch __init__.py to set RDBASE and register custom import finder python3 patch_init.py echo "=== Package contents ===" From f64964d1d4eb79e126ce4ffcf879b37c63e4e8b6 Mon Sep 17 00:00:00 2001 From: Stefan Doerr Date: Wed, 18 Mar 2026 15:31:08 +0200 Subject: [PATCH 5/7] fix the cyclic dependency --- packages/librdkit/meta.yaml | 134 ++++++++++++++++++++++++++++++++++- packages/rdkit/meta.yaml | 137 ++++-------------------------------- 2 files changed, 145 insertions(+), 126 deletions(-) diff --git a/packages/librdkit/meta.yaml b/packages/librdkit/meta.yaml index a8478316..ea476bb5 100644 --- a/packages/librdkit/meta.yaml +++ b/packages/librdkit/meta.yaml @@ -12,13 +12,141 @@ source: requirements: host: - - rdkit + - libboost-python + - libzlib build: type: shared_library script: | - cp ${WASM_LIBRARY_DIR}/lib/librdkit_core.so ${DISTDIR} - ls -lh ${DISTDIR}/librdkit_core.so + set -e + + # ======================== + # 1. Setup — derive Python paths from pyodide config + # ======================== + PYINC=$(pyodide config get python_include_dir) + PYVER=$(pyodide config get python_version | cut -d. -f1-2) + PYMAJ=$(echo $PYVER | cut -d. -f1) + PYMIN=$(echo $PYVER | cut -d. -f2) + NUMPY_INC=$(python3 -c "import numpy; print(numpy.get_include())") + + # Download Eigen3 headers (header-only) + if [ ! -d "eigen-3.4.0" ]; then + wget -q https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz + tar xf eigen-3.4.0.tar.gz + fi + + # Dummy libpython — side modules don't link against it, but CMake's FindPython3 needs it + emar rcs /tmp/libpython${PYVER}.a + + # Patch CMakeLists.txt for Emscripten support + sed -i 's/OR RDK_BUILD_MINIMAL_LIB)/OR EMSCRIPTEN OR RDK_BUILD_MINIMAL_LIB)/' CMakeLists.txt + + # ======================== + # 2. CMake configure + # ======================== + mkdir -p build && cd build + + emcmake cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DRDK_BUILD_PYTHON_WRAPPERS=ON \ + -DRDK_BUILD_MINIMAL_LIB=OFF \ + -DRDK_BUILD_CPP_TESTS=OFF \ + -DRDK_BUILD_INCHI_SUPPORT=ON \ + -DRDK_USE_BOOST_SERIALIZATION=ON \ + -DRDK_USE_BOOST_IOSTREAMS=ON \ + -DRDK_OPTIMIZE_POPCNT=OFF \ + -DRDK_BUILD_THREADSAFE_SSS=OFF \ + -DRDK_BUILD_DESCRIPTORS3D=ON \ + -DRDK_TEST_MULTITHREADED=OFF \ + -DRDK_BUILD_CHEMDRAW_SUPPORT=ON \ + -DRDK_BUILD_MAEPARSER_SUPPORT=ON \ + -DRDK_BUILD_COORDGEN_SUPPORT=ON \ + -DRDK_BUILD_SLN_SUPPORT=ON \ + -DRDK_BUILD_CAIRO_SUPPORT=OFF \ + -DRDK_BUILD_QT_SUPPORT=OFF \ + -DRDK_BUILD_PGSQL=OFF \ + -DRDK_BUILD_SWIG_WRAPPERS=OFF \ + -DRDK_BUILD_FREETYPE_SUPPORT=OFF \ + -DRDK_BUILD_AVALON_SUPPORT=ON \ + -DRDK_BUILD_YAEHMOP_SUPPORT=OFF \ + -DRDK_BUILD_XYZ2MOL_SUPPORT=ON \ + -DRDK_INSTALL_INTREE=ON \ + -DRDK_INSTALL_STATIC_LIBS=ON \ + -DRDK_BUILD_STATIC_LIBS_ONLY=ON \ + -DBoost_USE_STATIC_LIBS=ON \ + -DCMAKE_PREFIX_PATH="${WASM_LIBRARY_DIR}" \ + -DCMAKE_FIND_ROOT_PATH="${WASM_LIBRARY_DIR}" \ + -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ + -DBoost_INCLUDE_DIR=${WASM_LIBRARY_DIR}/include \ + -DCMAKE_INCLUDE_PATH="${WASM_LIBRARY_DIR}/include" \ + -DCMAKE_LIBRARY_PATH="${WASM_LIBRARY_DIR}/lib" \ + -DZLIB_INCLUDE_DIR=${WASM_LIBRARY_DIR}/include \ + -DZLIB_LIBRARY=${WASM_LIBRARY_DIR}/lib/libz.a \ + -DEIGEN3_INCLUDE_DIR=$PWD/../eigen-3.4.0 \ + -DPython3_INCLUDE_DIR=${PYINC} \ + -DPython3_LIBRARY=/tmp/libpython${PYVER}.a \ + -DPython3_NumPy_INCLUDE_DIR=${NUMPY_INC} \ + -DPython3_EXECUTABLE=$(which python${PYVER}) \ + -DCMAKE_CXX_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC -std=c++20" \ + -DCMAKE_C_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC" \ + .. + + # ======================== + # 3. Build + # ======================== + emmake make -k -j ${PYODIDE_JOBS:-3} || true + cd .. + + # ======================== + # 4. Relink core into a single WASM side module + # ======================== + find build/lib build/External -name '*.a' 2>/dev/null | sort > /tmp/all_libs.txt + for lib in ${WASM_LIBRARY_DIR}/lib/libboost_python*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_numpy*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_system*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_serialization*.a \ + ${WASM_LIBRARY_DIR}/lib/libboost_iostreams*.a; do + [ -f "$lib" ] && echo "$lib" >> /tmp/all_libs.txt + done + echo "${WASM_LIBRARY_DIR}/lib/libz.a" >> /tmp/all_libs.txt + echo "Core libraries: $(wc -l < /tmp/all_libs.txt)" + + WHOLE_ARGS="" + while read -r lib; do + WHOLE_ARGS="$WHOLE_ARGS -Wl,--whole-archive $lib -Wl,--no-whole-archive" + done < /tmp/all_libs.txt + + em++ -fwasm-exceptions -sSIDE_MODULE=1 -O2 -shared \ + -Wl,--no-gc-sections -Wl,--export-all \ + $WHOLE_ARGS \ + -o build/librdkit_core.so + ls -lh build/librdkit_core.so + + # Copy core .so to $DISTDIR — Pyodide loads shared_library packages + # asynchronously with {global: true}, avoiding Chrome's 8MB sync limit + cp build/librdkit_core.so ${DISTDIR} + + # ======================== + # 5. Stage build artifacts for the rdkit Python package recipe + # ======================== + STAGING=${WASM_LIBRARY_DIR}/share/rdkit + + # Stage wrapper .o files and cmake build.make (for output path derivation) + mkdir -p ${STAGING}/wrappers + for wrap_dir in $(find build -path '*/Wrap/CMakeFiles/*.dir' -type d 2>/dev/null); do + modname=$(basename "$wrap_dir" .dir) + obj_files=$(find "$wrap_dir" -name '*.o' 2>/dev/null) + if [ -z "$obj_files" ]; then continue; fi + mkdir -p "${STAGING}/wrappers/${modname}" + cp $obj_files "${STAGING}/wrappers/${modname}/" + cp "$wrap_dir/build.make" "${STAGING}/wrappers/${modname}/" + done + + # Stage Python source files and Data directory + cp -r rdkit ${STAGING}/python + cp -r Data ${STAGING}/Data + + echo "Staged $(ls ${STAGING}/wrappers | wc -l) wrapper modules" about: home: https://github.com/rdkit/rdkit diff --git a/packages/rdkit/meta.yaml b/packages/rdkit/meta.yaml index 4afcc533..4de2c529 100644 --- a/packages/rdkit/meta.yaml +++ b/packages/rdkit/meta.yaml @@ -15,8 +15,7 @@ source: requirements: host: - - libboost-python - - libzlib + - librdkit - numpy run: - librdkit @@ -26,137 +25,26 @@ build: script: | set -e - # ======================== - # 1. Setup - # ======================== - NUMPY_INC="$HOSTINSTALLDIR/lib/python$PYMAJOR.$PYMINOR/site-packages/numpy/_core/include/" - - # Download Eigen3 headers (header-only) - if [ ! -d "eigen-3.4.0" ]; then - wget -q https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz - tar xf eigen-3.4.0.tar.gz - fi - - # Dummy libpython — side modules don't link against it, but CMake's FindPython3 needs it - emar rcs /tmp/libpython${PYMAJOR}.${PYMINOR}.a - - # Patch CMakeLists.txt for Emscripten support - sed -i 's/OR RDK_BUILD_MINIMAL_LIB)/OR EMSCRIPTEN OR RDK_BUILD_MINIMAL_LIB)/' CMakeLists.txt + STAGING=${WASM_LIBRARY_DIR}/share/rdkit # Set version in pyproject.toml sed -i "s/^version = .*/version = \"${PKG_VERSION}\"/" pyproject.toml # ======================== - # 2. CMake configure - # ======================== - mkdir -p build && cd build - - emcmake cmake \ - -DCMAKE_BUILD_TYPE=Release \ - -DRDK_BUILD_PYTHON_WRAPPERS=ON \ - -DRDK_BUILD_MINIMAL_LIB=OFF \ - -DRDK_BUILD_CPP_TESTS=OFF \ - -DRDK_BUILD_INCHI_SUPPORT=ON \ - -DRDK_USE_BOOST_SERIALIZATION=ON \ - -DRDK_USE_BOOST_IOSTREAMS=ON \ - -DRDK_OPTIMIZE_POPCNT=OFF \ - -DRDK_BUILD_THREADSAFE_SSS=OFF \ - -DRDK_BUILD_DESCRIPTORS3D=ON \ - -DRDK_TEST_MULTITHREADED=OFF \ - -DRDK_BUILD_CHEMDRAW_SUPPORT=ON \ - -DRDK_BUILD_MAEPARSER_SUPPORT=ON \ - -DRDK_BUILD_COORDGEN_SUPPORT=ON \ - -DRDK_BUILD_SLN_SUPPORT=ON \ - -DRDK_BUILD_CAIRO_SUPPORT=OFF \ - -DRDK_BUILD_QT_SUPPORT=OFF \ - -DRDK_BUILD_PGSQL=OFF \ - -DRDK_BUILD_SWIG_WRAPPERS=OFF \ - -DRDK_BUILD_FREETYPE_SUPPORT=OFF \ - -DRDK_BUILD_AVALON_SUPPORT=ON \ - -DRDK_BUILD_YAEHMOP_SUPPORT=OFF \ - -DRDK_BUILD_XYZ2MOL_SUPPORT=ON \ - -DRDK_INSTALL_INTREE=ON \ - -DRDK_INSTALL_STATIC_LIBS=ON \ - -DRDK_BUILD_STATIC_LIBS_ONLY=ON \ - -DBoost_USE_STATIC_LIBS=ON \ - -DCMAKE_PREFIX_PATH="${WASM_LIBRARY_DIR}" \ - -DCMAKE_FIND_ROOT_PATH="${WASM_LIBRARY_DIR}" \ - -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \ - -DBoost_INCLUDE_DIR=${WASM_LIBRARY_DIR}/include \ - -DCMAKE_INCLUDE_PATH="${WASM_LIBRARY_DIR}/include" \ - -DCMAKE_LIBRARY_PATH="${WASM_LIBRARY_DIR}/lib" \ - -DZLIB_INCLUDE_DIR=${WASM_LIBRARY_DIR}/include \ - -DZLIB_LIBRARY=${WASM_LIBRARY_DIR}/lib/libz.a \ - -DEIGEN3_INCLUDE_DIR=$PWD/../eigen-3.4.0 \ - -DPython3_INCLUDE_DIR=${PYTHONINCLUDE} \ - -DPython3_LIBRARY=/tmp/libpython${PYMAJOR}.${PYMINOR}.a \ - -DPython3_NumPy_INCLUDE_DIR=${NUMPY_INC} \ - -DPython3_EXECUTABLE=$(which python${PYMAJOR}.${PYMINOR}) \ - -DCMAKE_CXX_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC -std=c++20" \ - -DCMAKE_C_FLAGS="-fwasm-exceptions -O2 -DNDEBUG -fPIC" \ - .. - - # ======================== - # 3. Build + # 1. Relink wrappers from staged .o files # ======================== - # Use -k to keep going past wrapper MODULE link failures (expected in - # cross-compilation — we relink all wrappers ourselves in step 4). - # The important outputs are the static .a libraries and .o object files. - emmake make -k -j ${PYODIDE_JOBS:-3} || true - cd .. - - # ======================== - # 4. Relink static archives into WASM side modules - # ======================== - # Strategy: one core .so with ALL C++ code (loaded by Pyodide with - # {global: true} via the librdkit shared_library package), then thin - # wrapper .so per Python extension module. - - # 4a. Collect all static libraries for the core - find build/lib build/External -name '*.a' 2>/dev/null | sort > /tmp/all_libs.txt - for lib in ${WASM_LIBRARY_DIR}/lib/libboost_python*.a \ - ${WASM_LIBRARY_DIR}/lib/libboost_numpy*.a \ - ${WASM_LIBRARY_DIR}/lib/libboost_system*.a \ - ${WASM_LIBRARY_DIR}/lib/libboost_serialization*.a \ - ${WASM_LIBRARY_DIR}/lib/libboost_iostreams*.a; do - [ -f "$lib" ] && echo "$lib" >> /tmp/all_libs.txt - done - echo "${WASM_LIBRARY_DIR}/lib/libz.a" >> /tmp/all_libs.txt - echo "Core libraries: $(wc -l < /tmp/all_libs.txt)" - - WHOLE_ARGS="" - while read -r lib; do - WHOLE_ARGS="$WHOLE_ARGS -Wl,--whole-archive $lib -Wl,--no-whole-archive" - done < /tmp/all_libs.txt - - em++ -fwasm-exceptions -sSIDE_MODULE=1 -O2 -shared \ - -Wl,--no-gc-sections -Wl,--export-all \ - $WHOLE_ARGS \ - -o build/librdkit_core.so - ls -lh build/librdkit_core.so - - # Stage core .so for the librdkit shared_library recipe to package. - # NOT included in the wheel — Pyodide loads it asynchronously with - # {global: true} via the librdkit dynlib package, avoiding Chrome's - # 8MB synchronous WebAssembly.Compile limit. - cp build/librdkit_core.so ${WASM_LIBRARY_DIR}/lib/ - - # 4b. Relink each Python wrapper as a thin .so and place it in the - # correct package directory. The output path is derived from cmake's - # build rules (LIBRARY_OUTPUT_DIRECTORY), so new/renamed modules are - # handled automatically without hardcoded mappings. WRAPPER_COUNT=0 - for wrap_dir in $(find build -path '*/Wrap/CMakeFiles/*.dir' -type d 2>/dev/null); do - modname=$(basename "$wrap_dir" .dir) - obj_files=$(find "$wrap_dir" -name '*.o' 2>/dev/null) + for wrapper_dir in ${STAGING}/wrappers/*/; do + modname=$(basename "$wrapper_dir") + obj_files=$(find "$wrapper_dir" -name '*.o' 2>/dev/null) if [ -z "$obj_files" ]; then echo "SKIP: $modname (no .o files)" continue fi - build_make="$wrap_dir/build.make" - dest_path=$(grep -oP "rdkit/[^ ]*${modname}\.so" "$build_make" 2>/dev/null | head -1) + # Derive the correct output path from cmake's build.make + dest_path=$(grep -oP "rdkit/[^ ]*${modname}\.so" "$wrapper_dir/build.make" 2>/dev/null | head -1) if [ -z "$dest_path" ]; then - echo "SKIP: $modname (no output path found in cmake)" + echo "SKIP: $modname (no output path found)" continue fi mkdir -p "$(dirname "$dest_path")" @@ -169,10 +57,13 @@ build: echo "Total wrapper .so files: $WRAPPER_COUNT" # ======================== - # 5. Assemble Python package + # 2. Assemble Python package # ======================== + # Overlay cmake-generated Python files (e.g. inchi.py) from staging + cp -rn ${STAGING}/python/* rdkit/ 2>/dev/null || true + # Copy Data directory into package - cp -r Data rdkit/Data + cp -r ${STAGING}/Data rdkit/Data # Rename .so -> .so.wasm (prevents micropip from auto-loading before # core is ready — the core is loaded by Pyodide via the librdkit From c88d4d823878766c3e80b00d77974bd919455150 Mon Sep 17 00:00:00 2001 From: Stefan Doerr Date: Wed, 18 Mar 2026 16:59:54 +0200 Subject: [PATCH 6/7] one more attempt --- packages/rdkit/extras/patch_init.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/packages/rdkit/extras/patch_init.py b/packages/rdkit/extras/patch_init.py index b7c7e95d..fa197356 100644 --- a/packages/rdkit/extras/patch_init.py +++ b/packages/rdkit/extras/patch_init.py @@ -1,10 +1,14 @@ """Patch rdkit/__init__.py for emscripten/Pyodide support. -The librdkit_core.so shared library is loaded by Pyodide with {global: true} -via the librdkit shared_library package (asynchronously during loadPackage). -This patch only needs to: -1. Set RDBASE for RDConfig.py path resolution -2. Register a custom MetaPathFinder to load .so.wasm wrapper modules +The librdkit_core.so shared library is pre-loaded by Pyodide via the librdkit +shared_library package (asynchronously during loadPackage, avoiding Chrome's +8MB sync WebAssembly.Compile limit). However, Pyodide loads it with RTLD_NOW +(no RTLD_GLOBAL), so we need to re-open it with RTLD_GLOBAL to make symbols +available to the wrapper modules. Re-opening is instant since the module is +already compiled and cached. + +This patch also registers a custom MetaPathFinder to load .so.wasm wrapper +modules. """ init_path = "rdkit/__init__.py" @@ -20,6 +24,15 @@ # Set RDBASE so RDConfig.py finds Data/, Docs/, etc. relative to this package _os.environ['RDBASE'] = _os.path.dirname(__file__) + # Re-open librdkit_core.so with {global: true} so wrapper modules can + # resolve symbols. The library was already loaded (and WASM-compiled) by + # Pyodide via the librdkit shared_library package, so this re-open is + # instant — no recompilation, just promoting symbols to global scope. + from pyodide_js._module import loadDynamicLibrary as _ldl + import js as _js + _ldl("/usr/lib/librdkit_core.so", _js.JSON.parse('{"global": true}')) + del _ldl, _js + class _RDKitExtensionFinder(importlib.abc.MetaPathFinder): def find_spec(self, fullname, path, target=None): parts = fullname.split('.') From aedd1440c9a1d79ea320b839f900368e9f2c802f Mon Sep 17 00:00:00 2001 From: Stefan Doerr Date: Wed, 18 Mar 2026 17:27:02 +0200 Subject: [PATCH 7/7] simplified lib loading --- packages/librdkit/meta.yaml | 8 ++++++-- packages/rdkit/extras/patch_init.py | 26 ++++++++------------------ packages/rdkit/meta.yaml | 1 + 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/packages/librdkit/meta.yaml b/packages/librdkit/meta.yaml index ea476bb5..acae4b32 100644 --- a/packages/librdkit/meta.yaml +++ b/packages/librdkit/meta.yaml @@ -122,10 +122,14 @@ build: -o build/librdkit_core.so ls -lh build/librdkit_core.so - # Copy core .so to $DISTDIR — Pyodide loads shared_library packages - # asynchronously with {global: true}, avoiding Chrome's 8MB sync limit + # Copy core .so to $DISTDIR (Pyodide loads shared_library packages + # asynchronously, avoiding Chrome's 8MB sync WebAssembly.Compile limit) cp build/librdkit_core.so ${DISTDIR} + # Also install to $WASM_LIBRARY_DIR/lib/ so the rdkit recipe can link + # wrapper .so modules against it (proper dynamic linking, no RTLD_GLOBAL) + cp build/librdkit_core.so ${WASM_LIBRARY_DIR}/lib/ + # ======================== # 5. Stage build artifacts for the rdkit Python package recipe # ======================== diff --git a/packages/rdkit/extras/patch_init.py b/packages/rdkit/extras/patch_init.py index fa197356..27aa9ecb 100644 --- a/packages/rdkit/extras/patch_init.py +++ b/packages/rdkit/extras/patch_init.py @@ -1,14 +1,13 @@ """Patch rdkit/__init__.py for emscripten/Pyodide support. -The librdkit_core.so shared library is pre-loaded by Pyodide via the librdkit -shared_library package (asynchronously during loadPackage, avoiding Chrome's -8MB sync WebAssembly.Compile limit). However, Pyodide loads it with RTLD_NOW -(no RTLD_GLOBAL), so we need to re-open it with RTLD_GLOBAL to make symbols -available to the wrapper modules. Re-opening is instant since the module is -already compiled and cached. - -This patch also registers a custom MetaPathFinder to load .so.wasm wrapper -modules. +The librdkit_core.so shared library is loaded by Pyodide via the librdkit +shared_library package (asynchronously during loadPackage). Each wrapper +.so.wasm module links against librdkit_core.so directly (like scipy links +against libopenblas), so no RTLD_GLOBAL hack is needed. + +This patch: +1. Sets RDBASE for RDConfig.py path resolution +2. Registers a custom MetaPathFinder to load .so.wasm wrapper modules """ init_path = "rdkit/__init__.py" @@ -24,15 +23,6 @@ # Set RDBASE so RDConfig.py finds Data/, Docs/, etc. relative to this package _os.environ['RDBASE'] = _os.path.dirname(__file__) - # Re-open librdkit_core.so with {global: true} so wrapper modules can - # resolve symbols. The library was already loaded (and WASM-compiled) by - # Pyodide via the librdkit shared_library package, so this re-open is - # instant — no recompilation, just promoting symbols to global scope. - from pyodide_js._module import loadDynamicLibrary as _ldl - import js as _js - _ldl("/usr/lib/librdkit_core.so", _js.JSON.parse('{"global": true}')) - del _ldl, _js - class _RDKitExtensionFinder(importlib.abc.MetaPathFinder): def find_spec(self, fullname, path, target=None): parts = fullname.split('.') diff --git a/packages/rdkit/meta.yaml b/packages/rdkit/meta.yaml index 4de2c529..5c490c67 100644 --- a/packages/rdkit/meta.yaml +++ b/packages/rdkit/meta.yaml @@ -50,6 +50,7 @@ build: mkdir -p "$(dirname "$dest_path")" em++ -fwasm-exceptions -sSIDE_MODULE=1 -O2 -shared \ $obj_files \ + -L${WASM_LIBRARY_DIR}/lib -lrdkit_core \ -o "$dest_path" echo "LINKED: $modname -> $dest_path" WRAPPER_COUNT=$((WRAPPER_COUNT + 1))