RolnickLab · mihow · Jun 26, 2026 · May 1, 2026 · May 1, 2026 · May 1, 2026
diff --git a/ami/main/admin.py b/ami/main/admin.py
@@ -13,6 +13,9 @@
 from ami import tasks
 from ami.jobs.models import Job
 from ami.ml.models.project_pipeline_config import ProjectPipelineConfig
+from ami.ml.post_processing.admin.actions import make_post_processing_action
+from ami.ml.post_processing.admin.small_size_filter_form import SmallSizeFilterActionForm
+from ami.ml.post_processing.small_size_filter import SmallSizeFilterTask
 from ami.ml.tasks import remove_duplicate_classifications
 
 from .models import (
@@ -34,6 +37,30 @@
     Taxon,
 )
 
+# PostgreSQL ``bigint`` upper bound. The primary keys on these models are
+# BigAutoField, so an all-digit search term longer than this cannot be a valid id.
+_BIGINT_MAX = 9223372036854775807
+
+
+class IdSearchAdminMixin:
+    """Treat an all-digit admin search term as an exact primary-key lookup.
+
+    The ids on these models are numeric and their text search fields (taxon and
+    determination names, image paths) never are, so a bare number is unambiguous and
+    jumps straight to that row. Anything else falls through to the normal
+    ``search_fields`` search. A number too large to be a valid id returns no results
+    rather than raising a database ``DataError``.
+    """
+
+    def get_search_results(self, request: HttpRequest, queryset: QuerySet[Any], search_term: str):
+        term = search_term.strip()
+        if term.isdigit():
+            pk = int(term)
+            if pk > _BIGINT_MAX:
+                return queryset.none(), False
+            return queryset.filter(pk=pk), False
+        return super().get_search_results(request, queryset, search_term)  # type: ignore[misc]
+
 
 class ProjectPipelineConfigInline(admin.TabularInline):
     model = ProjectPipelineConfig
@@ -355,6 +382,9 @@ def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
 class ClassificationInline(admin.TabularInline):
     model = Classification
     extra = 0
+    # Link each row to its Classification change page, where the full scores /
+    # logits and the applied_to chain (post-processing provenance) are visible.
+    show_change_link = True
     fields = (
         "taxon",
         "algorithm",
@@ -378,6 +408,9 @@ def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
 class DetectionInline(admin.TabularInline):
     model = Detection
     extra = 0
+    # Link each row to its Detection change page, where the classifications
+    # inline shows which algorithms (including post-processing) were applied.
+    show_change_link = True
     fields = (
         "detection_algorithm",
         "source_image",
@@ -395,7 +428,7 @@ class DetectionInline(admin.TabularInline):
 
 
 @admin.register(Detection)
-class DetectionAdmin(admin.ModelAdmin[Detection]):
+class DetectionAdmin(IdSearchAdminMixin, admin.ModelAdmin[Detection]):
     """Admin panel example for ``Detection`` model."""
 
     list_display = (
@@ -409,11 +442,32 @@ class DetectionAdmin(admin.ModelAdmin[Detection]):
     )
 
     autocomplete_fields = ("source_image", "occurrence")
+    # A digit term jumps to that detection by id (IdSearchAdminMixin); text searches path.
+    search_fields = ("source_image__path",)
+    # Skip the extra unfiltered COUNT(*) the changelist runs for its total; on a large
+    # table that count is as expensive as the page query it accompanies.
+    show_full_result_count = False
 
     def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
-        qs = super().get_queryset(request)
-        return qs.select_related("source_image", "occurrence").annotate(
-            classifications_count=models.Count("classifications"),
+        from django.db.models.functions import Coalesce
+
+        qs = super().get_queryset(request).select_related("source_image", "occurrence")
+        # Correlated subquery instead of Count("classifications") + GROUP BY. The
+        # grouped aggregate over the whole detection x classification join must run
+        # before ORDER BY ... LIMIT can take a page, which on a large table is slow
+        # enough to exhaust work_mem and error out. The subquery runs only for the
+        # rows on the page. Coalesce maps "no classifications" to 0.
+        classifications_count = (
+            Classification.objects.filter(detection=models.OuterRef("pk"))
+            .order_by()
+            .values("detection")
+            .annotate(c=models.Count("*"))
+            .values("c")
+        )
+        return qs.annotate(
+            classifications_count=Coalesce(
+                models.Subquery(classifications_count, output_field=models.IntegerField()), 0
+            )
         )
 
     @admin.display(
@@ -423,13 +477,15 @@ def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
     def classifications_count(self, obj) -> int:
         return obj.classifications_count
 
-    ordering = ("-created_at",)
+    # Order by -id (indexed PK) rather than -created_at, which has no index and
+    # forces a full sort of the table to find the newest page.
+    ordering = ("-id",)
 
     inlines = [ClassificationInline]
 
 
 @admin.register(Occurrence)
-class OccurrenceAdmin(admin.ModelAdmin[Occurrence]):
+class OccurrenceAdmin(IdSearchAdminMixin, admin.ModelAdmin[Occurrence]):
     """Admin panel example for ``Occurrence`` model."""
 
     list_display = (
@@ -450,19 +506,33 @@ class OccurrenceAdmin(admin.ModelAdmin[Occurrence]):
         "determination__rank",
         "created_at",
     )
+    # A digit term jumps to that occurrence by id (IdSearchAdminMixin); text searches names.
     search_fields = ("determination__name", "determination__search_names")
+    # Skip the extra unfiltered COUNT(*) the changelist runs for its total; on a large
+    # table that count is as expensive as the page query it accompanies.
+    show_full_result_count = False
 
     def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
+        from django.db.models.functions import Coalesce
+
         qs = super().get_queryset(request)
         qs = qs.select_related("determination", "project", "deployment", "event")
-        # Add detections count to queryset
-        qs = qs.annotate(detections_count=models.Count("detections"))
-        # Add min, max and avg detection__classifications counts to queryset
-        # qs = qs.annotate(
-        #     min_detection_classifications=models.Min("detections__classifications"),
-        #     max_detection_classifications=models.Max("detections__classifications"),
-        #     avg_detection_classifications=models.Avg("detections__classifications"),
-        # )
+        # Count detections with a correlated subquery instead of a JOIN + GROUP BY.
+        # A grouped count must aggregate the whole occurrence x detection join before
+        # the changelist's ORDER BY ... LIMIT can take a page, so it scans every row to
+        # show 25 (~15s on a 1.3M-row table). The subquery runs only for the rows that
+        # survive the limit. Coalesce maps "no detections" to 0 (a bare subquery is NULL,
+        # where the old JOIN count returned 0).
+        detections_count = (
+            Detection.objects.filter(occurrence=models.OuterRef("pk"))
+            .order_by()
+            .values("occurrence")
+            .annotate(c=models.Count("*"))
+            .values("c")
+        )
+        qs = qs.annotate(
+            detections_count=Coalesce(models.Subquery(detections_count, output_field=models.IntegerField()), 0)
+        )
         return qs
 
     @admin.display(
@@ -472,14 +542,45 @@ def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
     def detections_count(self, obj) -> int:
         return obj.detections_count
 
-    ordering = ("-created_at",)
+    # Per-occurrence post-processing trigger. Same factory as the capture-set
+    # action on SourceImageCollectionAdmin, scoped to one occurrence — the fast
+    # spot/dev path for iterating on a filter without running a whole collection.
+    # New per-occurrence tasks add their own action here the same way.
+    run_small_size_filter = make_post_processing_action(
+        SmallSizeFilterTask,
+        SmallSizeFilterActionForm,
+        scope_resolver=lambda occurrence: {"occurrence_id": occurrence.pk},
+        name_resolver=lambda task_cls, occurrence: (f"Post-processing: {task_cls.name} on Occurrence {occurrence.pk}"),
+    )
+
+    @admin.action(description="Recompute determination from current classifications and identifications")
+    def recompute_determination(self, request: HttpRequest, queryset: QuerySet[Any]) -> None:
+        """Re-derive each selected occurrence's determination from its current
+        predictions and human identifications.
+
+        Editing an occurrence's classifications by hand does not recompute its
+        determination — only Occurrence and Identification saves do — so this action
+        is the way to refresh it after manual changes.
+        """
+        count = 0
+        for occurrence in queryset:
+            occurrence.save(update_determination=True)
+            count += 1
+        self.message_user(request, f"Recomputed determination for {count} occurrence(s).")
+
+    actions = [run_small_size_filter, recompute_determination]
+
+    # Order by -id (the indexed primary key) rather than -created_at, which has no
+    # index and would force a full sort of the table to find the newest page. id
+    # increases with insertion time, so newest-first is preserved.
+    ordering = ("-id",)
 
     # Add classifications as inline
     inlines = [DetectionInline]
 
 
 @admin.register(Classification)
-class ClassificationAdmin(admin.ModelAdmin[Classification]):
+class ClassificationAdmin(IdSearchAdminMixin, admin.ModelAdmin[Classification]):
     list_display = (
         "__str__",
         "taxon",
@@ -499,25 +600,52 @@ class ClassificationAdmin(admin.ModelAdmin[Classification]):
         "detection__source_image__project",
         "taxon__rank",
     )
+    # FK fields render as AJAX autocompletes instead of <select>s preloaded with
+    # every taxon / detection / classification — the latter makes the change page
+    # unusable on a large database.
+    autocomplete_fields = ("detection", "taxon", "algorithm", "category_map", "applied_to")
+    # A digit term jumps to that classification by id (IdSearchAdminMixin); text searches taxon name.
+    search_fields = ("taxon__name",)
+    # Order by -id (indexed PK) rather than the model's -created_at (no index).
+    ordering = ("-id",)
+    # Skip the extra unfiltered COUNT(*) the changelist runs for its total; on a large
+    # table that count is as expensive as the page query it accompanies.
+    show_full_result_count = False
 
     def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
+        from django.db.models import Func
+        from django.db.models.functions import Coalesce
+
         qs = super().get_queryset(request)
-        return qs.select_related(
-            "taxon", "detection", "detection__source_image", "detection__source_image__project"
-        ).annotate(
-            detection_date=models.F("detection__timestamp"),
+        # Count the scores / logits arrays in SQL (cardinality) and defer the arrays
+        # themselves, so the changelist does not transfer thousands of floats per row
+        # just to display their length.
+        return (
+            qs.select_related("taxon", "detection", "detection__source_image", "detection__source_image__project")
+            .defer("scores", "logits")
+            .annotate(
+                detection_date=models.F("detection__timestamp"),
+                scores_count=Coalesce(
+                    Func(models.F("scores"), function="cardinality", output_field=models.IntegerField()), 0
+                ),
+                logits_count=Coalesce(
+                    Func(models.F("logits"), function="cardinality", output_field=models.IntegerField()), 0
+                ),
+            )
         )
 
     @admin.display()
     def detection_date(self, obj: Classification) -> str:
         # This property comes from the annotation in get_queryset, not the model
         return obj.detection_date  # type: ignore
 
+    @admin.display(description="num scores")
     def num_scores(self, obj: Classification) -> int:
-        return len(obj.scores) if obj.scores else 0
+        return obj.scores_count  # type: ignore[attr-defined]
 
+    @admin.display(description="num logits")
     def num_logits(self, obj: Classification) -> int:
-        return len(obj.logits) if obj.logits else 0
+        return obj.logits_count  # type: ignore[attr-defined]
 
 
 class TaxonParentFilter(admin.SimpleListFilter):
@@ -710,25 +838,18 @@ def populate_collection_async(self, request: HttpRequest, queryset: QuerySet[Sou
             f"Populating {len(queued_tasks)} capture set(s) background tasks: {queued_tasks}.",
         )
 
-    @admin.action(description="Run Small Size Filter post-processing task (async)")
-    def run_small_size_filter(self, request: HttpRequest, queryset: QuerySet[SourceImageCollection]) -> None:
-        jobs = []
-        for collection in queryset:
-            job = Job.objects.create(
-                name=f"Post-processing: SmallSizeFilter on Capture Set {collection.pk}",
-                project=collection.project,
-                job_type_key="post_processing",
-                params={
-                    "task": "small_size_filter",
-                    "config": {
-                        "source_image_collection_id": collection.pk,
-                    },
-                },
-            )
-            job.enqueue()
-            jobs.append(job.pk)
-
-        self.message_user(request, f"Queued Small Size Filter for {queryset.count()} capture set(s). Jobs: {jobs}")
+    # Built from the shared post-processing action factory: renders an intermediate
+    # confirmation page with the task's knob form, validates each selection against
+    # SmallSizeFilterConfig, then enqueues one Job per capture set. New post-processing
+    # tasks declare their own trigger the same way (task class + form + scope_resolver).
+    run_small_size_filter = make_post_processing_action(
+        SmallSizeFilterTask,
+        SmallSizeFilterActionForm,
+        scope_resolver=lambda collection: {"source_image_collection_id": collection.pk},
+        name_resolver=lambda task_cls, collection: (
+            f"Post-processing: {task_cls.name} on Capture Set {collection.pk}"
+        ),
+    )
 
     actions = [
         populate_collection,