Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8701333
docs(post-processing): design spec for admin scaffolding precursor PR
mihow May 1, 2026
29447e7
feat(post-processing): admin scaffolding precursor
mihow May 1, 2026
a1c8600
docs(post-processing): add PR #1289 admin smoke screenshots
mihow May 1, 2026
59a7d4b
Merge branch 'main' into feat/post-processing-admin-scaffolding
mihow Jun 4, 2026
93bd529
chore: host PR screenshots on S3 instead of committing to repo
mihow Jun 4, 2026
89778ea
refactor(post-processing): extract shared admin-action factory
mihow Jun 5, 2026
9b4b7b3
fix(post-processing): refuse "select all across pages" in admin trigger
mihow Jun 5, 2026
abcb446
refactor(post-processing): address re-review on the action factory
mihow Jun 5, 2026
329489e
test(post-processing): prune redundant tests, cover atomicity and abs…
mihow Jun 5, 2026
beacb3f
feat(post-processing): per-occurrence trigger and job stage metrics
mihow Jun 8, 2026
3d74ed9
fix(post-processing): dedup occurrences_updated across flush batches
mihow Jun 8, 2026
b0e50fd
Merge remote-tracking branch 'origin/main' into feat/post-processing-…
mihow Jun 9, 2026
d977b7a
test: cut post-processing test fixture cost
mihow Jun 10, 2026
8ba683a
Merge remote-tracking branch 'origin/main' into feat/post-processing-…
mihow Jun 10, 2026
f374535
fix(post-processing): render admin field errors as errorlist, clarify…
mihow Jun 23, 2026
b85e32c
Merge remote-tracking branch 'origin/main' into feat/post-processing-…
mihow Jun 23, 2026
5e2438b
fix(post-processing): count only occurrences whose determination changed
mihow Jun 25, 2026
b3577df
feat(post-processing): link the admin action result to the created jo…
mihow Jun 25, 2026
54da7a7
perf(admin): speed up the occurrence changelist and add id search
mihow Jun 25, 2026
d4254b3
perf(admin): speed up the detection changelist and link detections fr…
mihow Jun 25, 2026
3cf6500
feat(admin): search detections by id in the admin
mihow Jun 25, 2026
f5922ec
feat(admin): speed up the classification admin and recompute determin…
mihow Jun 26, 2026
cdbb4e4
refactor(admin): share id search in a mixin, skip the full changelist…
mihow Jun 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 162 additions & 41 deletions ami/main/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from ami import tasks
from ami.jobs.models import Job
from ami.ml.models.project_pipeline_config import ProjectPipelineConfig
from ami.ml.post_processing.admin.actions import make_post_processing_action
from ami.ml.post_processing.admin.small_size_filter_form import SmallSizeFilterActionForm
from ami.ml.post_processing.small_size_filter import SmallSizeFilterTask
from ami.ml.tasks import remove_duplicate_classifications

from .models import (
Expand All @@ -34,6 +37,30 @@
Taxon,
)

# PostgreSQL ``bigint`` upper bound. The primary keys on these models are
# BigAutoField, so an all-digit search term longer than this cannot be a valid id.
_BIGINT_MAX = 9223372036854775807


class IdSearchAdminMixin:
"""Treat an all-digit admin search term as an exact primary-key lookup.

The ids on these models are numeric and their text search fields (taxon and
determination names, image paths) never are, so a bare number is unambiguous and
jumps straight to that row. Anything else falls through to the normal
``search_fields`` search. A number too large to be a valid id returns no results
rather than raising a database ``DataError``.
"""

def get_search_results(self, request: HttpRequest, queryset: QuerySet[Any], search_term: str):
term = search_term.strip()
if term.isdigit():
pk = int(term)
if pk > _BIGINT_MAX:
return queryset.none(), False
return queryset.filter(pk=pk), False
return super().get_search_results(request, queryset, search_term) # type: ignore[misc]


class ProjectPipelineConfigInline(admin.TabularInline):
model = ProjectPipelineConfig
Expand Down Expand Up @@ -355,6 +382,9 @@ def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
class ClassificationInline(admin.TabularInline):
model = Classification
extra = 0
# Link each row to its Classification change page, where the full scores /
# logits and the applied_to chain (post-processing provenance) are visible.
show_change_link = True
fields = (
"taxon",
"algorithm",
Expand All @@ -378,6 +408,9 @@ def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
class DetectionInline(admin.TabularInline):
model = Detection
extra = 0
# Link each row to its Detection change page, where the classifications
# inline shows which algorithms (including post-processing) were applied.
show_change_link = True
fields = (
"detection_algorithm",
"source_image",
Expand All @@ -395,7 +428,7 @@ class DetectionInline(admin.TabularInline):


@admin.register(Detection)
class DetectionAdmin(admin.ModelAdmin[Detection]):
class DetectionAdmin(IdSearchAdminMixin, admin.ModelAdmin[Detection]):
"""Admin panel example for ``Detection`` model."""

list_display = (
Expand All @@ -409,11 +442,32 @@ class DetectionAdmin(admin.ModelAdmin[Detection]):
)

autocomplete_fields = ("source_image", "occurrence")
# A digit term jumps to that detection by id (IdSearchAdminMixin); text searches path.
search_fields = ("source_image__path",)
# Skip the extra unfiltered COUNT(*) the changelist runs for its total; on a large
# table that count is as expensive as the page query it accompanies.
show_full_result_count = False

def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
qs = super().get_queryset(request)
return qs.select_related("source_image", "occurrence").annotate(
classifications_count=models.Count("classifications"),
from django.db.models.functions import Coalesce

qs = super().get_queryset(request).select_related("source_image", "occurrence")
# Correlated subquery instead of Count("classifications") + GROUP BY. The
# grouped aggregate over the whole detection x classification join must run
# before ORDER BY ... LIMIT can take a page, which on a large table is slow
# enough to exhaust work_mem and error out. The subquery runs only for the
# rows on the page. Coalesce maps "no classifications" to 0.
classifications_count = (
Classification.objects.filter(detection=models.OuterRef("pk"))
.order_by()
.values("detection")
.annotate(c=models.Count("*"))
.values("c")
)
return qs.annotate(
classifications_count=Coalesce(
models.Subquery(classifications_count, output_field=models.IntegerField()), 0
)
)

@admin.display(
Expand All @@ -423,13 +477,15 @@ def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
def classifications_count(self, obj) -> int:
return obj.classifications_count

ordering = ("-created_at",)
# Order by -id (indexed PK) rather than -created_at, which has no index and
# forces a full sort of the table to find the newest page.
ordering = ("-id",)

inlines = [ClassificationInline]


@admin.register(Occurrence)
class OccurrenceAdmin(admin.ModelAdmin[Occurrence]):
class OccurrenceAdmin(IdSearchAdminMixin, admin.ModelAdmin[Occurrence]):
"""Admin panel example for ``Occurrence`` model."""

list_display = (
Expand All @@ -450,19 +506,33 @@ class OccurrenceAdmin(admin.ModelAdmin[Occurrence]):
"determination__rank",
"created_at",
)
# A digit term jumps to that occurrence by id (IdSearchAdminMixin); text searches names.
search_fields = ("determination__name", "determination__search_names")
# Skip the extra unfiltered COUNT(*) the changelist runs for its total; on a large
# table that count is as expensive as the page query it accompanies.
show_full_result_count = False

def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
from django.db.models.functions import Coalesce

qs = super().get_queryset(request)
qs = qs.select_related("determination", "project", "deployment", "event")
# Add detections count to queryset
qs = qs.annotate(detections_count=models.Count("detections"))
# Add min, max and avg detection__classifications counts to queryset
# qs = qs.annotate(
# min_detection_classifications=models.Min("detections__classifications"),
# max_detection_classifications=models.Max("detections__classifications"),
# avg_detection_classifications=models.Avg("detections__classifications"),
# )
# Count detections with a correlated subquery instead of a JOIN + GROUP BY.
# A grouped count must aggregate the whole occurrence x detection join before
# the changelist's ORDER BY ... LIMIT can take a page, so it scans every row to
# show 25 (~15s on a 1.3M-row table). The subquery runs only for the rows that
# survive the limit. Coalesce maps "no detections" to 0 (a bare subquery is NULL,
# where the old JOIN count returned 0).
detections_count = (
Detection.objects.filter(occurrence=models.OuterRef("pk"))
.order_by()
.values("occurrence")
.annotate(c=models.Count("*"))
.values("c")
)
qs = qs.annotate(
detections_count=Coalesce(models.Subquery(detections_count, output_field=models.IntegerField()), 0)
)
return qs

@admin.display(
Expand All @@ -472,14 +542,45 @@ def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
def detections_count(self, obj) -> int:
return obj.detections_count

ordering = ("-created_at",)
# Per-occurrence post-processing trigger. Same factory as the capture-set
# action on SourceImageCollectionAdmin, scoped to one occurrence — the fast
# spot/dev path for iterating on a filter without running a whole collection.
# New per-occurrence tasks add their own action here the same way.
run_small_size_filter = make_post_processing_action(
SmallSizeFilterTask,
SmallSizeFilterActionForm,
scope_resolver=lambda occurrence: {"occurrence_id": occurrence.pk},
name_resolver=lambda task_cls, occurrence: (f"Post-processing: {task_cls.name} on Occurrence {occurrence.pk}"),
)

@admin.action(description="Recompute determination from current classifications and identifications")
def recompute_determination(self, request: HttpRequest, queryset: QuerySet[Any]) -> None:
"""Re-derive each selected occurrence's determination from its current
predictions and human identifications.

Editing an occurrence's classifications by hand does not recompute its
determination — only Occurrence and Identification saves do — so this action
is the way to refresh it after manual changes.
"""
count = 0
for occurrence in queryset:
occurrence.save(update_determination=True)
count += 1
self.message_user(request, f"Recomputed determination for {count} occurrence(s).")

actions = [run_small_size_filter, recompute_determination]

# Order by -id (the indexed primary key) rather than -created_at, which has no
# index and would force a full sort of the table to find the newest page. id
# increases with insertion time, so newest-first is preserved.
ordering = ("-id",)

# Add classifications as inline
inlines = [DetectionInline]


@admin.register(Classification)
class ClassificationAdmin(admin.ModelAdmin[Classification]):
class ClassificationAdmin(IdSearchAdminMixin, admin.ModelAdmin[Classification]):
list_display = (
"__str__",
"taxon",
Expand All @@ -499,25 +600,52 @@ class ClassificationAdmin(admin.ModelAdmin[Classification]):
"detection__source_image__project",
"taxon__rank",
)
# FK fields render as AJAX autocompletes instead of <select>s preloaded with
# every taxon / detection / classification — the latter makes the change page
# unusable on a large database.
autocomplete_fields = ("detection", "taxon", "algorithm", "category_map", "applied_to")
# A digit term jumps to that classification by id (IdSearchAdminMixin); text searches taxon name.
search_fields = ("taxon__name",)
# Order by -id (indexed PK) rather than the model's -created_at (no index).
ordering = ("-id",)
# Skip the extra unfiltered COUNT(*) the changelist runs for its total; on a large
# table that count is as expensive as the page query it accompanies.
show_full_result_count = False

def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
from django.db.models import Func
from django.db.models.functions import Coalesce

qs = super().get_queryset(request)
return qs.select_related(
"taxon", "detection", "detection__source_image", "detection__source_image__project"
).annotate(
detection_date=models.F("detection__timestamp"),
# Count the scores / logits arrays in SQL (cardinality) and defer the arrays
# themselves, so the changelist does not transfer thousands of floats per row
# just to display their length.
return (
qs.select_related("taxon", "detection", "detection__source_image", "detection__source_image__project")
.defer("scores", "logits")
.annotate(
detection_date=models.F("detection__timestamp"),
scores_count=Coalesce(
Func(models.F("scores"), function="cardinality", output_field=models.IntegerField()), 0
),
logits_count=Coalesce(
Func(models.F("logits"), function="cardinality", output_field=models.IntegerField()), 0
),
)
)

@admin.display()
def detection_date(self, obj: Classification) -> str:
# This property comes from the annotation in get_queryset, not the model
return obj.detection_date # type: ignore

@admin.display(description="num scores")
def num_scores(self, obj: Classification) -> int:
return len(obj.scores) if obj.scores else 0
return obj.scores_count # type: ignore[attr-defined]

@admin.display(description="num logits")
def num_logits(self, obj: Classification) -> int:
return len(obj.logits) if obj.logits else 0
return obj.logits_count # type: ignore[attr-defined]


class TaxonParentFilter(admin.SimpleListFilter):
Expand Down Expand Up @@ -710,25 +838,18 @@ def populate_collection_async(self, request: HttpRequest, queryset: QuerySet[Sou
f"Populating {len(queued_tasks)} capture set(s) background tasks: {queued_tasks}.",
)

@admin.action(description="Run Small Size Filter post-processing task (async)")
def run_small_size_filter(self, request: HttpRequest, queryset: QuerySet[SourceImageCollection]) -> None:
jobs = []
for collection in queryset:
job = Job.objects.create(
name=f"Post-processing: SmallSizeFilter on Capture Set {collection.pk}",
project=collection.project,
job_type_key="post_processing",
params={
"task": "small_size_filter",
"config": {
"source_image_collection_id": collection.pk,
},
},
)
job.enqueue()
jobs.append(job.pk)

self.message_user(request, f"Queued Small Size Filter for {queryset.count()} capture set(s). Jobs: {jobs}")
# Built from the shared post-processing action factory: renders an intermediate
# confirmation page with the task's knob form, validates each selection against
# SmallSizeFilterConfig, then enqueues one Job per capture set. New post-processing
# tasks declare their own trigger the same way (task class + form + scope_resolver).
run_small_size_filter = make_post_processing_action(
SmallSizeFilterTask,
SmallSizeFilterActionForm,
scope_resolver=lambda collection: {"source_image_collection_id": collection.pk},
name_resolver=lambda task_cls, collection: (
f"Post-processing: {task_cls.name} on Capture Set {collection.pk}"
),
)

actions = [
populate_collection,
Expand Down
Loading
Loading