Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ repos:
name: isort (pyi)
types: [pyi]
- repo: https://github.com/psf/black
rev: 21.6b0
rev: 22.3.0
hooks:
- id: black
- repo: https://github.com/pre-commit/mirrors-mypy
Expand Down
190 changes: 178 additions & 12 deletions slidescore_api/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
This module contains the CLI utilities that can be used with slidescore in python.

"""

# pylint: disable=duplicate-code

import argparse
import csv
import json
Expand All @@ -15,7 +18,7 @@
from collections import defaultdict
from enum import Enum
from pathlib import Path
from typing import Iterable, Optional
from typing import Iterable, List, Optional, Union

import shapely.geometry
from tqdm import tqdm
Expand Down Expand Up @@ -70,6 +73,7 @@ def parse_api_token(data: Optional[Path] = None) -> str:


def _shapely_to_slidescore(shapely_object):
# pylint: disable=too-many-branches
shapely_type = type(shapely_object)
if shapely_type == shapely.geometry.Polygon:
if len(shapely_object.interiors) != 0:
Expand Down Expand Up @@ -351,30 +355,117 @@ def _download_labels(args: argparse.Namespace) -> None:
)


def append_to_manifest(save_dir: pathlib.Path, image_id: int, filename: pathlib.Path) -> None:
def append_to_tsv_mapping(save_dir: pathlib.Path, items: List[str]) -> None:
"""
Create a manifest mapping image id to the filename.
Create a manifest mapping image id to image name
Comment on lines +358 to +393

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add that if the file does not exist yet, you write the first two lines?

# slidescore_url: <URL>
# study_id: <id>

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do this manually in cli.download_wsis.

Otherwise I'd need to always pass the url and id to the function.

Also, if this would be the functionality, then downloading a second slidescory study into the same directory would NOT write the slidescore_url and study_id of the second study.

    elif mapping_format == "tsv":
        append_to_tsv_mapping(save_dir=save_dir, items=[f"# {slidescore_url}"])
        append_to_tsv_mapping(save_dir=save_dir, items=[f"# {study_id}"])


Creates a file that looks like
```txt
# <slidescore_url>
# <slidescore_study_id>
<image_id_1> <image_name_1>
...
```

Parameters
----------
save_dir: pathlib.Path
items: List[str]

Returns
-------
None
"""
if not save_dir.is_dir():
save_dir.mkdir(parents=True)
tab = "\t"
with open(save_dir / "slidescore_mapping.tsv", "a+", encoding="utf-8") as file:
file.write(f"{tab.join(items)}\n")


def append_to_json_mapping(save_dir: pathlib.Path, keys: List[str], value: Union[pathlib.Path, int, str]) -> None:
"""
Generic method to append a hierarchical key structure with one value to a dictionary in a json file to fix
missing functionality in python dict classes

Works as desired:
>> {}['a'] = 1
{'a': 1'}

Does not work as desired in this case
>> {}['a', 'b'] = 1
{('a', 'b'): 1}

The following is not possible in a python dict, and throws an error
>> {}[['a', 'b']] = 1
TypeError: unhashable type: 'list'

We wish to get
>> {}[['a', 'b']] = 1
{'a': {'b': 1}}

Used to create a manifest mapping filename to slidescore ID Created when downloading WSIs from slidescores.

Will end up with something like
{
'slidescore_url': str,
'slidescore_study_id': int,
'slide_filename_to_id_mapping': {
str: int
...
}
}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not filename anymore is it?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not the structure indeed.

changed

    {
        "url": {
            "study_id": {
                "slidescore_url": "url",
                "slidescore_study_id": study_id,
                "slide_filename_to_study_image_id_mapping": {
                    "image_id": "image_name",
                    ...
                    ...
                }
            }
        }
    }


Parameters
----------
save_dir : pathlib.Path
image_id : int
filename : pathlib.Path
keys : List[Union[str, pathlib.Path]], sets the hierarchical keys to be set.
E.g. ['slide_filename_id_mapping', 'filename']
sets manifest['slide_filename_id_mapping']['filename'] to the given slidescore ID
value : Union[int, pathlib.Path, str], value to be set. Generally either a URL, a path, or an integer ID

Returns
-------
None
"""
with open(save_dir / "slidescore_mapping.txt", "a", encoding="utf-8") as file:
file.write(f"{image_id} {filename.name}\n")
# Make dir if it doesn't exist. Usage in the CLI, however, places it in an existing directory
if not save_dir.is_dir():
save_dir.mkdir(parents=True)

value = value.name if isinstance(value, pathlib.Path) else value
config_filepath = save_dir / "slidescore_mapping.json"

try:
# Read file if it exists
with open(config_filepath, mode="r", encoding="utf-8") as file:
obj = json.load(file)
except FileNotFoundError:
# Otherwise create new object
obj = {}

new_obj = obj # Create a pointer that we can update
for idx, key in enumerate(keys):
if idx == len(keys) - 1: # At the last node
new_obj[key] = value # Set the leaf
else:
if key not in new_obj.keys():
new_obj[key] = {} # Not at the last node and the key does not exist; make a subdict
new_obj = new_obj[key] # Update pointer

# Save file, overwriting the old file
with open(config_filepath, mode="w", encoding="utf-8") as file:
json.dump(obj, file, ensure_ascii=False, indent=4)


# pylint: disable=too-many-arguments
def download_wsis(
slidescore_url: str,
api_token: str,
study_id: int,
save_dir: pathlib.Path,
disable_certificate_check: bool = False,
disable_download: bool = False,
mapping_format: str = "json",
Comment on lines +502 to +503

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there no other way to get the mapping only?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not that i can think of without duplicating a lot of code or entirely refactoring the function into less coherent parts.

) -> None:
"""
Download all WSIs for a given study from SlideScore
Expand All @@ -386,6 +477,9 @@ def download_wsis(
study_id : int
save_dir : pathlib.Path
disable_certificate_check : bool
disable_download : bool
mapping_format: str
either of "json" or "tsv"

Returns
-------
Expand All @@ -399,23 +493,56 @@ def download_wsis(
# Collect image metadata
images = client.get_images(study_id)

# # Add study details to mapping manifest

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single #

if mapping_format == "json":
append_to_json_mapping(
save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_url"], value=slidescore_url
)
append_to_json_mapping(
save_dir=save_dir, keys=[slidescore_url, str(study_id), "slidescore_study_id"], value=study_id
)
elif mapping_format == "tsv":
append_to_tsv_mapping(save_dir=save_dir, items=[f"# {slidescore_url}"])
append_to_tsv_mapping(save_dir=save_dir, items=[f"# {study_id}"])

# Download and save WSIs
for image in tqdm(images):
image_id = image["id"]
image_name = image["name"]
if not disable_download:
logger.info("Downloading image for id: %s", image_id)
filename = client.download_slide(study_id, image, save_dir=save_dir)
logger.info("Image with id %s has been saved to %s.", image_id, filename)
if mapping_format == "json":
append_to_json_mapping(
save_dir=save_dir,
keys=[slidescore_url, str(study_id), "slide_filename_to_study_image_id_mapping", str(image_id)],
value=image_name,
)
elif mapping_format == "tsv":
append_to_tsv_mapping(
save_dir=save_dir,
items=[str(image_id), image_name],
)
Comment on lines +550 to +554

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If not one of these, it's better to raise an error

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Thought this was done by the argparser, but raising error here makes the function more generic

    else:
        raise ValueError(f"mapping_format should be either 'tsv' or 'json', but is {mapping_format}")


logger.info("Downloading image for id: %s", image_id)
filename = client.download_slide(study_id, image, save_dir=save_dir)
logger.info("Image with id %s has been saved to %s.", image_id, filename)
append_to_manifest(save_dir, image_id, filename)

def _download_mapping(args: argparse.Namespace):
"""Main function that downloads only the mapping from filename to slidescore slide ID

Calls _download_wsi while setting `disable_download=True`
"""
_download_wsi(args=args, disable_download=True)


def _download_wsi(args: argparse.Namespace):
def _download_wsi(args: argparse.Namespace, disable_download=False):
"""Main function that downloads WSIs from SlideScore.

Parameters
----------
args: argparse.Namespace
The arguments passed from the CLI. Run with `-h` to see the required parameters
disable_download: bool
If download is disabled, only the mapping is saved. Can also be used to debug.

Returns
-------
Expand All @@ -429,6 +556,8 @@ def _download_wsi(args: argparse.Namespace):
args.study_id,
args.output_dir,
disable_certificate_check=args.disable_certificate_check,
disable_download=disable_download,
mapping_format=args.mapping_format,
)


Expand All @@ -445,6 +574,43 @@ def register_parser(parser: argparse._SubParsersAction):

download_wsi_parser.set_defaults(subcommand=_download_wsi)

download_wsi_parser.add_argument(
"--mapping-format",
dest="mapping_format",
type=str,
help="Save mapping as either json or tsv",
choices=["tsv", "json"],
required=False,
default="tsv",
)

download_mapping_parser = parser.add_parser(
"download-study-slide-mapping",
help="Download the download_config.json"
" with url, study id, and file to "
"slidescore ID mapping from SlideScore"
" without downloading the WSIs. "
"Useful if slides are already on disk,"
"but slidescore information is not",
)
download_mapping_parser.add_argument(
"output_dir",
type=pathlib.Path,
help="Directory to save output too.",
)

download_mapping_parser.add_argument(
"--mapping-format",
dest="mapping_format",
type=str,
help="Save mapping as either json or tsv",
choices=["tsv", "json"],
required=False,
default="tsv",
)

download_mapping_parser.set_defaults(subcommand=_download_mapping)

download_label_parser = parser.add_parser("download-labels", help="Download labels from SlideScore.")
download_label_parser.add_argument(
"-q",
Expand Down
87 changes: 85 additions & 2 deletions tests/test_tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,87 @@
# coding=utf-8
# Placeholder
def test_test():
assert True
"""
Unit tests
"""

# pylint: disable=duplicate-code

import json
from pathlib import Path

from slidescore_api.cli import append_to_json_mapping, append_to_tsv_mapping


def test_append_to_json_mapping():
# Ignores similar lines
"""
Tests the function of slidescore_api.cli.append_to_manifest
"""
if Path("./test_append_to_json_mapping/slidescore_mapping.json").is_file():
Path("./test_append_to_json_mapping/slidescore_mapping.json").unlink() # Remove file

append_to_json_mapping(
save_dir=Path("./test_append_to_json_mapping"),
keys=["slidescore_url", "1234", "slidescore_url"],
value="https",
) # -> {"slidescore_url": {"1234": {"slidescore_url": "https}}}

append_to_json_mapping(
save_dir=Path("./test_append_to_json_mapping"), keys=["slidescore_url", "1234", "slidescore_id"], value=1234
) # -> {"slidescore_url": {"1234": {"slidescore_url": "https, "slidescore_id": 1234}}}

append_to_json_mapping(
save_dir=Path("./test_append_to_json_mapping"),
keys=["slidescore_url", "1234", "mapping", "1234"],
value="slidename",
)
# -> {"slidescore_url": {"1234": {"slidescore_url": "https, "slidescore_id": 1234,
# "mapping": {"pathname": 1234} }}}

expected_object = {
"slidescore_url": {
"1234": {"slidescore_url": "https", "slidescore_id": 1234, "mapping": {"1234": "slidename"}}
}
}

with open("./test_append_to_json_mapping/slidescore_mapping.json", "r", encoding="utf-8") as file:
obj = json.load(file)
assert (
obj == expected_object
), f"expected object is {expected_object}, while the actual object on disk is {obj}"

Path("./test_append_to_json_mapping/slidescore_mapping.json").unlink() # Remove file
Path("./test_append_to_json_mapping").rmdir() # Remove dir


def test_append_to_tsv_mapping():
# Ignores similar lines
"""
Tests the function of slidescore_api.cli.append_to_tsv_manifest
"""
if Path("./test_append_to_tsv_mapping/slidescore_mapping.tsv").is_file():
Path("./test_append_to_tsv_mapping/slidescore_mapping.tsv").unlink() # Remove file

append_to_tsv_mapping(save_dir=Path("./test_append_to_tsv_mapping"), items=["# slidescore_url"])
# -> {"slidescore_url": {"1234": {"slidescore_url": "https}}}

append_to_tsv_mapping(save_dir=Path("./test_append_to_tsv_mapping"), items=["# slidescore_study_id"])

append_to_tsv_mapping(save_dir=Path("./test_append_to_tsv_mapping"), items=["image_id", "image_name"])

lines = []
with open("./test_append_to_tsv_mapping/slidescore_mapping.tsv", "r", encoding="utf-8") as file:
for line in file:
lines.append(line)

assert lines[0] == "# slidescore_url\n"
assert lines[1] == "# slidescore_study_id\n"
assert lines[2] == "image_id\timage_name\n"

Path("./test_append_to_tsv_mapping/slidescore_mapping.tsv").unlink() # Remove file
Path("./test_append_to_tsv_mapping").rmdir() # Remove dir


if __name__ == "__main__":
test_append_to_json_mapping()
test_append_to_tsv_mapping()