openml · saathviksheerla · Apr 21, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 23, 2026
diff --git a/src/config.py b/src/config.py
@@ -54,6 +54,10 @@ def load_routing_configuration(file: Path = _config_file) -> TomlTable:
     return typing.cast("TomlTable", _load_configuration(file)["routing"])
 
 
+def load_run_configuration(file: Path = _config_file) -> TomlTable:
+    return typing.cast("TomlTable", _load_configuration(file).get("run", {}))
+
+
 @functools.cache
 def load_database_configuration(file: Path = _config_file) -> TomlTable:
     configuration = _load_configuration(file)

diff --git a/src/config.toml b/src/config.toml
@@ -37,3 +37,6 @@ database="openml"
 [routing]
 minio_url="http://minio:9000/"
 server_url="http://php-api:80/"
+
+[run]
+evaluation_engine_ids = [1]
diff --git a/src/database/flows.py b/src/database/flows.py
@@ -73,7 +73,7 @@ async def get(id_: int, expdb: AsyncConnection) -> Row | None:
     row = await expdb.execute(
         text(
             """
-            SELECT *, uploadDate as upload_date
+            SELECT *, uploadDate as upload_date, fullName AS full_name
             FROM implementation
             WHERE id = :flow_id
             """,

diff --git a/src/database/runs.py b/src/database/runs.py
@@ -22,6 +22,185 @@ async def exist(id_: int, expdb: AsyncConnection) -> bool:
     return bool(row.one_or_none())
 
 
+async def get(run_id: int, expdb: AsyncConnection) -> Row | None:
+    """Fetch the core run row from the `run` table.
+
+    Returns the row if found, or None if no run with `run_id` exists.
+    The `error_message` column is NULL when the run completed without errors.
+    """
+    row = await expdb.execute(
+        text(
+            """
+            SELECT `rid`, `uploader`, `setup`, `task_id`, `error_message`
+            FROM `run`
+            WHERE `rid` = :run_id
+            """,
+        ),
+        parameters={"run_id": run_id},
+    )
+    return row.one_or_none()
+
+
+async def get_uploader_name(uploader_id: int, userdb: AsyncConnection) -> str | None:
+    """Fetch the display name of a user from the openml database.
+
+    Queries the `users` table in the separate openml DB and concatenates
+    first_name + ' ' + last_name. Returns None if the user does not exist.
+    """
+    row = await userdb.execute(
+        text(
+            """
+            SELECT CONCAT(`first_name`, ' ', `last_name`) AS `name`
+            FROM `users`
+            WHERE `id` = :uploader_id
+            """,
+        ),
+        parameters={"uploader_id": uploader_id},
+    )
+    result = row.one_or_none()
+    return result.name if result else None
+
+
+async def get_tags(run_id: int, expdb: AsyncConnection) -> list[str]:
+    """Fetch all tags associated with a run from the `run_tag` table.
+
+    The `id` column in `run_tag` refers to the run ID
+    """
+    rows = await expdb.execute(
+        text(
+            """
+            SELECT `tag`
+            FROM `run_tag`
+            WHERE `id` = :run_id
+            """,
+        ),
+        parameters={"run_id": run_id},
+    )
+    return [row.tag for row in rows.all()]
+
+
+async def get_input_data(run_id: int, expdb: AsyncConnection) -> list[Row]:
+    """Fetch the dataset(s) used as input for a run, with name and url.
+
+    Joins `input_data` with `dataset` to include the dataset name and ARFF URL.
+    """
+    rows = await expdb.execute(
+        text(
+            """
+            SELECT `id`.`data` AS `did`, `d`.`name`, `d`.`url`
+            FROM `input_data` `id`
+            JOIN `dataset` `d` ON `id`.`data` = `d`.`did`
+            WHERE `id`.`run` = :run_id
+            """,
+        ),
+        parameters={"run_id": run_id},
+    )
+    return cast("list[Row]", rows.all())
+
+
+async def get_output_files(run_id: int, expdb: AsyncConnection) -> list[Row]:
+    """Fetch output files attached to a run from the `runfile` table.
+
+    Typical entries include the description XML and predictions ARFF.
+    The `field` column holds the file label (e.g. "description", "predictions").
+
+    Note: the PHP response includes a deprecated `did` field hardcoded to "-1"
+    for each file. This implementation omits it entirely.
+    """
+    rows = await expdb.execute(
+        text(
+            """
+            SELECT `file_id`, `field`
+            FROM `runfile`
+            WHERE `source` = :run_id
+            """,
+        ),
+        parameters={"run_id": run_id},
+    )
+    return cast("list[Row]", rows.all())
+
+
+async def get_evaluations(
+    run_id: int,
+    expdb: AsyncConnection,
+    *,
+    evaluation_engine_ids: list[int],
+) -> list[Row]:
+    """Fetch evaluation metric results for a run.
+
+    Joins `evaluation` with `math_function` to resolve the metric name
+    (the `evaluation` table stores only a `function_id`, not the name directly).
+
+    Filters by `evaluation_engine_id IN (...)`. The list is configurable
+    via `config.toml [run] evaluation_engine_ids`.
+    Dynamic named parameters are used for aiomysql compatibility.
+    """
+    if not evaluation_engine_ids:
+        return []
+
+    # Build :eid_0, :eid_1, ... placeholders — one per engine ID.
+    eid_params = {f"eid_{i}": eid for i, eid in enumerate(evaluation_engine_ids)}
+    placeholders = ", ".join(f":eid_{i}" for i in range(len(evaluation_engine_ids)))
+
+    query = text(
+        f"""
+        SELECT `m`.`name`, `e`.`value`, `e`.`array_data`
+        FROM `evaluation` `e`
+        JOIN `math_function` `m` ON `e`.`function_id` = `m`.`id`
+        WHERE `e`.`source` = :run_id
+          AND `e`.`evaluation_engine_id` IN ({placeholders})
+        """,  # noqa: S608  # placeholders are trusted integer params, not user input
+    )
+    rows = await expdb.execute(
+        query,
+        parameters={"run_id": run_id, **eid_params},
+    )
+    return cast("list[Row]", rows.all())
+
+
+async def get_task_type(task_id: int, expdb: AsyncConnection) -> str | None:
+    """Fetch the human-readable task type name for the task associated with a run.
+
+    Joins `task` and `task_type` on `ttid` to resolve the name
+    (e.g. "Supervised Classification").
+    """
+    row = await expdb.execute(
+        text(
+            """
+            SELECT `tt`.`name`
+            FROM `task` `t`
+            JOIN `task_type` `tt` ON `t`.`ttid` = `tt`.`ttid`
+            WHERE `t`.`task_id` = :task_id
+            """,
+        ),
+        parameters={"task_id": task_id},
+    )
+    result = row.one_or_none()
+    return result.name if result else None
+
+
+async def get_task_evaluation_measure(task_id: int, expdb: AsyncConnection) -> str | None:
+    """Fetch the evaluation measure configured for a task, if any.
+
+    Queries `task_inputs` for the row where `input = 'evaluation_measures'`.
+    Returns None (not an empty string) when no such row exists, so callers
+    can treat a falsy result uniformly.
+    """
+    row = await expdb.execute(
+        text(
+            """
+            SELECT `value`
+            FROM `task_inputs`
+            WHERE `task_id` = :task_id
+              AND `input` = 'evaluation_measures'
+            """,
+        ),
+        parameters={"task_id": task_id},
+    )
+    result = row.one_or_none()
+    return result.value if result else None
+
+
 async def get_trace(run_id: int, expdb: AsyncConnection) -> Sequence[Row]:
     """Get trace rows for a run from the trace table."""
     rows = await expdb.execute(

diff --git a/src/routers/openml/runs.py b/src/routers/openml/runs.py
@@ -1,14 +1,29 @@
 """Endpoints for run-related data."""
 
-from typing import Annotated
+import asyncio
+from typing import TYPE_CHECKING, Annotated, cast
 
 from fastapi import APIRouter, Depends
+
+if TYPE_CHECKING:
+    from sqlalchemy import Row
 from sqlalchemy.ext.asyncio import AsyncConnection
 
+import config
+import database.flows
 import database.runs
+import database.setups
 from core.errors import RunNotFoundError, RunTraceNotFoundError
-from routers.dependencies import expdb_connection
-from schemas.runs import RunTrace, TraceIteration
+from routers.dependencies import expdb_connection, userdb_connection
+from schemas.runs import (
+    EvaluationScore,
+    InputDataset,
+    OutputFile,
+    ParameterSetting,
+    Run,
+    RunTrace,
+    TraceIteration,
+)
 
 router = APIRouter(prefix="/run", tags=["run"])
 
@@ -42,3 +57,122 @@ async def get_run_trace(
             for row in trace_rows
         ],
     )
+
+
+@router.post(
+    path="/{run_id}",
+    description="Provided for convenience, same as `GET` endpoint.",
+    response_model_exclude_none=True,
+)
+@router.get("/{run_id}", response_model_exclude_none=True)
+async def get_run(
+    run_id: int,
+    expdb: Annotated[AsyncConnection, Depends(expdb_connection)],
+    userdb: Annotated[AsyncConnection, Depends(userdb_connection)],
+) -> Run:
+    """Get full metadata for a run by ID.
+
+    No authentication or visibility check is performed — all runs are
+    publicly accessible.
+    """
+    # Core run record — all other data depends on uploader, setup, and task_id.
+    run = await database.runs.get(run_id, expdb)
+    if run is None:
+        msg = f"Run {run_id} not found."
+        # Reuse RunNotFoundError and pass code=236 at the call site for
+        # backward compat with the PHP GET /run/{id} error code
+        raise RunNotFoundError(msg, code=236)
+
+    # Evaluation engine IDs come from config.toml [run] so they can be
+    # extended when a new evaluation engine is deployed, without code changes.
+    engine_ids: list[int] = config.load_run_configuration().get("evaluation_engine_ids", [1])
+
+    # Fetch all independent data concurrently.
+    (
+        uploader_name,
+        tags,
+        input_data_rows,
+        output_file_rows,
+        evaluation_rows,
+        task_type,
+        task_evaluation_measure,
+        setup,
+        parameter_rows,
+    ) = cast(
+        "tuple[str | None, list[str], list[Row], list[Row], list[Row], str | None, str"
+        "| None, Row | None, list[Row]]",
+        await asyncio.gather(
+            database.runs.get_uploader_name(run.uploader, userdb),
+            database.runs.get_tags(run_id, expdb),
+            database.runs.get_input_data(run_id, expdb),
+            database.runs.get_output_files(run_id, expdb),
+            database.runs.get_evaluations(run_id, expdb, evaluation_engine_ids=engine_ids),
+            database.runs.get_task_type(run.task_id, expdb),
+            database.runs.get_task_evaluation_measure(run.task_id, expdb),
+            database.setups.get(run.setup, expdb),
+            database.setups.get_parameters(run.setup, expdb),
+        ),
+    )
+
+    # Flow is fetched after the gather because it requires setup.implementation_id.
+    # flows.get() selects fullName AS full_name for reliable case-insensitive access.
+    flow = await database.flows.get(setup.implementation_id, expdb) if setup else None
+
+    # Build parameter_setting list from the denormalised parameter rows
+    # returned by database.setups.get_parameters (which already JOINs input + implementation).
+    parameter_settings = [
+        ParameterSetting(
+            name=p["name"],
+            value=p["value"],
+            component=p["flow_id"],  # implementation_id of the sub-flow owning this param
+        )
+        for p in parameter_rows
+    ]
+
+    input_datasets = [
+        InputDataset(did=row.did, name=row.name, url=row.url) for row in input_data_rows
+    ]
+
+    # runfile.field is the file label (e.g. "description", "predictions")
+    output_files = [OutputFile(file_id=row.file_id, name=row.field) for row in output_file_rows]
+
+    evaluations = [
+        EvaluationScore(
+            name=row.name,
+            # Whole-number floats (e.g. counts) are converted to int to match PHP's
+            # integer representation. e.g. 253.0 → 253, 0.0 → 0.
+            value=int(row.value)
+            if isinstance(row.value, float) and row.value.is_integer()
+            else row.value,
+            array_data=row.array_data,
+        )
+        for row in evaluation_rows
+    ]
+
+    # Normalise task_evaluation_measure: empty string → None so the field is
+    # excluded entirely by response_model_exclude_none=True (matches PHP behaviour
+    # of returning "" but we opt to omit rather than return an empty string).
+    normalised_measure = task_evaluation_measure or None
+
+    # error_message is NULL in the DB when the run has no error.
+    # The PHP response returns an empty array [] in that case.
+    error_messages = [run.error_message] if run.error_message else []
+
+    return Run(
+        run_id=run_id,
+        uploader=run.uploader,
+        uploader_name=uploader_name,
+        task_id=run.task_id,
+        task_type=task_type,
+        task_evaluation_measure=normalised_measure,
+        flow_id=setup.implementation_id if setup else 0,
+        flow_name=flow.full_name if flow else None,
+        setup_id=run.setup,
+        setup_string=setup.setup_string if setup else None,
+        parameter_setting=parameter_settings,
+        error_message=error_messages,
+        tag=tags,
+        # Preserve PHP envelope structure for backward compat
+        input_data={"dataset": input_datasets},
+        output_data={"file": output_files, "evaluation": evaluations},
+    )