Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 179 additions & 5 deletions ami/base/pagination.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,191 @@
from rest_framework.pagination import LimitOffsetPagination
from django.core.exceptions import ValidationError
from django.forms import BooleanField
from rest_framework.pagination import LimitOffsetPagination, remove_query_param, replace_query_param
from rest_framework.response import Response

from .permissions import add_collection_level_permissions

# Query parameter name used to opt out of the total count in paginated list responses.
# Pass ``?with_counts=false`` to skip the COUNT(*) query entirely on large tables.
WITH_TOTAL_COUNT_PARAM = "with_counts"


class LimitOffsetPaginationWithPermissions(LimitOffsetPagination):
"""
LimitOffsetPagination with a precision cap on the total count.

The total ``count`` is exact for result sets up to
``COUNT_PRECISION_THRESHOLD`` rows. Beyond that, counting the full set means
scanning a large slice of a big table on every page load, so the count is
capped: the response returns the threshold value with
``count_is_exact: false``, which the UI renders as e.g. "10,000+". That
number is a lower bound, not the true total, so ``next`` / ``previous`` links
are computed from a one-extra-row probe rather than from ``count``.

Callers that don't need a total at all can pass ``?with_counts=false`` to
skip the count query entirely and receive ``count: null`` (with
``count_is_exact: null``). ``next`` / ``previous`` still work via the probe.

Response fields:
- ``count``: the exact total, the precision cap (a lower bound), or null.
- ``count_is_exact``: true when ``count`` is exact, false when it is the
capped lower bound, null when counting was skipped.
"""

# Sentinel returned by ``_get_capped_count`` when the result set is larger
# than the precision threshold, so the exact total is deliberately not run.
_OVER_CAP = object()

# Largest result set we count exactly. Past this the count query would scan
# an unbounded slice of a large table, so we cap precision instead.
COUNT_PRECISION_THRESHOLD = 10_000

# Per-request flag; the default is overwritten in ``paginate_queryset``.
count_is_exact = True

def paginate_queryset(self, queryset, request, view=None):
self.request = request
self.limit = self.get_limit(request)
if self.limit is None:
return None
self.offset = self.get_offset(request)

if self._should_skip_count(request):
# Opt-out: no count at all. Probe one extra row for the next link.
self.count = None
self.count_is_exact = None
page = list(queryset[self.offset : self.offset + self.limit + 1])
self._has_next = len(page) > self.limit
return page[: self.limit]

capped = self._get_capped_count(queryset)
if capped is self._OVER_CAP:
# Over the precision cap: report the threshold as an approximate
# lower bound. It must not drive next/previous (the true total is
# higher), so fall back to the probe-based links.
self.count = self.COUNT_PRECISION_THRESHOLD
Comment on lines +61 to +66
self.count_is_exact = False
page = list(queryset[self.offset : self.offset + self.limit + 1])
self._has_next = len(page) > self.limit
return page[: self.limit]

# Exact count.
self.count = capped
self.count_is_exact = True
if self.count > self.limit and self.template is not None:
self.display_page_controls = True
if self.count == 0 or self.offset > self.count:
return []
return list(queryset[self.offset : self.offset + self.limit])

def get_next_link(self):
# When the count is not exact (opt-out or over the cap) the total can't
# tell us whether a next page exists, so use the one-extra-row probe.
if not self.count_is_exact:
if not self._has_next:
return None
url = self.request.build_absolute_uri()
url = replace_query_param(url, self.limit_query_param, self.limit)
return replace_query_param(url, self.offset_query_param, self.offset + self.limit)
return super().get_next_link()

def get_previous_link(self):
# Previous link logic does not depend on the total count.
if not self.count_is_exact:
if self.offset <= 0:
return None
url = self.request.build_absolute_uri()
url = replace_query_param(url, self.limit_query_param, self.limit)
offset = max(0, self.offset - self.limit)
if offset == 0:
return remove_query_param(url, self.offset_query_param)
return replace_query_param(url, self.offset_query_param, offset)
return super().get_previous_link()

def get_paginated_response(self, data):
model = self._get_current_model()
project = self._get_project()
paginated_response = super().get_paginated_response(data=data)
paginated_response.data = add_collection_level_permissions(
user=self.request.user, response_data=paginated_response.data, model=model, project=project
response = Response(
{
"count": self.count,
"count_is_exact": self.count_is_exact,
"next": self.get_next_link(),
"previous": self.get_previous_link(),
"results": data,
}
)
return paginated_response
response.data = add_collection_level_permissions(
user=self.request.user, response_data=response.data, model=model, project=project
)
return response

def get_paginated_response_schema(self, schema):
paginated_schema = super().get_paginated_response_schema(schema)
# count is the exact total, the precision cap (a lower bound), or null
# when the caller passed with_counts=false.
paginated_schema["properties"]["count"]["nullable"] = True
paginated_schema["properties"]["count_is_exact"] = {
"type": "boolean",
"nullable": True,
"description": (
"True when `count` is exact; false when it is the precision cap "
'(a lower bound, render as e.g. "10,000+"); null when the count '
"was skipped via with_counts=false."
),
}
return paginated_schema
Comment on lines +122 to +136

def _count_queryset(self, queryset):
"""
Return the queryset reduced to the cheapest form that still counts the
same rows: ordering removed and projection narrowed to the primary key.

Both reductions matter once the count is wrapped in a ``LIMIT`` for the
precision cap. An ``ORDER BY`` not served by an index forces a top-N
sort of the whole filtered set before the ``LIMIT`` can stop it, undoing
the early exit. And the list orderings annotate correlated subqueries
(e.g. ``last_processed`` on captures); an unsliced ``COUNT(*)`` drops
those automatically, but the slice would otherwise re-project them and
run the subquery for every scanned row. Counting ``values("pk")`` keeps
the COUNT over a bare primary-key scan. Neither reduction changes the
count, only its cost.

This is also the single seam a subclass overrides to count a different
way; the previous per-view ``get_count`` override is folded in here.
"""
return queryset.order_by().values("pk")

def _get_capped_count(self, queryset):
"""
Run a bounded COUNT that stops scanning after
``COUNT_PRECISION_THRESHOLD`` rows. Returns the exact count when the
result set is within the cap, or the ``_OVER_CAP`` sentinel when it is
larger so the caller reports an approximate lower bound instead.

Django translates ``queryset...[:N].count()`` into::

SELECT COUNT(*) FROM (SELECT pk … LIMIT N) sub

so the scan stops after at most N matching rows and the cost is O(N)
regardless of total table size. See ``_count_queryset`` for why the
ordering and annotations are stripped first.
"""
# Fetch one extra row beyond the threshold so we can distinguish
# "exactly N rows" (exact count) from "more than N rows" (over the cap).
capped = self._count_queryset(queryset)[: self.COUNT_PRECISION_THRESHOLD + 1].count()
if capped <= self.COUNT_PRECISION_THRESHOLD:
return capped
return self._OVER_CAP

def _should_skip_count(self, request) -> bool:
"""Return True when the caller has explicitly opted out of the total count."""
raw = request.query_params.get(WITH_TOTAL_COUNT_PARAM, None)
if raw is None:
return False
try:
return not BooleanField(required=False).clean(raw)
except ValidationError:
return False

def _get_current_model(self):
"""
Expand Down
9 changes: 3 additions & 6 deletions ami/main/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,9 @@ class DefaultReadOnlyViewSet(DefaultViewSetMixin, viewsets.ReadOnlyModelViewSet)

class ProjectPagination(LimitOffsetPaginationWithPermissions):
default_limit = 40

def get_count(self, queryset):
# The recent-activity orderings annotate correlated subqueries onto the
# queryset. They don't change the row count, so strip them (and ordering)
# before counting to keep the pagination COUNT query cheap.
return super().get_count(queryset.order_by().values("pk"))
# The recent-activity orderings annotate correlated subqueries onto the
# queryset; the base paginator's _count_queryset strips them (and ordering)
# before counting, so no get_count override is needed here.


class ProjectViewSet(DefaultViewSet, ProjectMixin):
Expand Down
128 changes: 128 additions & 0 deletions ami/main/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6667,3 +6667,131 @@ def test_scores_and_logits_counted_in_sql_including_empty(self):
row = next(c for c in self.admin.get_queryset(self._request()) if c.pk == clf.pk)
self.assertEqual(row.scores_count, 3)
self.assertEqual(row.logits_count, 0)


class TestPaginationWithCounts(APITestCase):
"""
Verify the precision cap and the ``with_counts`` opt-out on list endpoints.

By default ``count`` is exact with ``count_is_exact: true``. Once a result
set exceeds ``COUNT_PRECISION_THRESHOLD`` the count is capped to the
threshold (a lower bound) with ``count_is_exact: false`` so the UI can
render "N+". Callers that want no count at all pass ``with_counts=false``
and receive ``count: null`` (``count_is_exact: null``). In both non-exact
modes ``next`` / ``previous`` are computed from a one-extra-row probe.
"""

def setUp(self) -> None:
project, deployment = setup_test_project()
create_captures(deployment=deployment, num_nights=2, images_per_night=5)
self.project = project
self.user = User.objects.create_user( # type: ignore
email="pagination_test@insectai.org",
is_staff=True,
is_superuser=True,
)
self.client.force_authenticate(user=self.user)
return super().setUp()

def _captures_url(self, **params):
from urllib.parse import urlencode

base = f"/api/v2/captures/?project_id={self.project.pk}"
if params:
base += "&" + urlencode(params)
return base

def test_default_response_includes_exact_integer_count(self):
"""By default a small result set returns an exact integer count."""
response = self.client.get(self._captures_url(limit=5))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertIsInstance(data["count"], int)
self.assertGreater(data["count"], 0)
self.assertTrue(data["count_is_exact"])

def test_with_counts_true_returns_exact_integer_count(self):
"""Explicit with_counts=true on a small result set is also exact."""
response = self.client.get(self._captures_url(with_counts="true", limit=5))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertIsInstance(data["count"], int)
self.assertGreater(data["count"], 0)
self.assertTrue(data["count_is_exact"])

def test_with_counts_false_returns_null_count(self):
"""with_counts=false skips the count and returns count/count_is_exact null."""
response = self.client.get(self._captures_url(with_counts="false", limit=5))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertIn("count", data)
self.assertIsNone(data["count"])
self.assertIsNone(data["count_is_exact"])
self.assertIn("results", data)

def test_with_counts_false_next_link_present_when_more_results(self):
"""next link is returned even without count when more results exist."""
total = SourceImage.objects.filter(deployment__project=self.project).count()
limit = max(1, total - 1)
response = self.client.get(self._captures_url(with_counts="false", limit=limit))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertIsNone(data["count"])
self.assertIsNotNone(data["next"])

def test_with_counts_false_next_link_absent_on_last_page(self):
"""next is None when the current page is the last page."""
total = SourceImage.objects.filter(deployment__project=self.project).count()
response = self.client.get(self._captures_url(with_counts="false", limit=total))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertIsNone(data["count"])
self.assertIsNone(data["next"])

def test_with_counts_false_previous_link_present_with_nonzero_offset(self):
"""previous link is returned correctly without count."""
response = self.client.get(self._captures_url(with_counts="false", limit=2, offset=2))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertIsNone(data["count"])
self.assertIsNotNone(data["previous"])

def test_count_is_capped_and_marked_inexact_over_threshold(self):
"""
When the result set exceeds COUNT_PRECISION_THRESHOLD the count is
capped to the threshold (a lower bound) and flagged inexact, while
next/previous still work via the probe-based path.
"""
from unittest.mock import patch

from ami.base.pagination import LimitOffsetPaginationWithPermissions

# Patch the threshold to 1 so even a second row trips the precision cap.
with patch.object(LimitOffsetPaginationWithPermissions, "COUNT_PRECISION_THRESHOLD", 1):
total = SourceImage.objects.filter(deployment__project=self.project).count()
self.assertGreater(total, 1, "Need at least 2 captures for this test")

response = self.client.get(self._captures_url(limit=1))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertEqual(data["count"], 1, "count is capped to the threshold as a lower bound")
self.assertFalse(data["count_is_exact"], "count_is_exact must be false above the cap")
self.assertIsNotNone(data["next"], "next link must still be present")
self.assertIsNone(data["previous"])

def test_count_exact_at_threshold_boundary(self):
"""A result set exactly at the threshold is still reported exactly."""
from unittest.mock import patch

from ami.base.pagination import LimitOffsetPaginationWithPermissions

total = SourceImage.objects.filter(deployment__project=self.project).count()
self.assertGreater(total, 1, "Need at least 2 captures for this test")

# Threshold == total: the count is within the cap, so it stays exact.
with patch.object(LimitOffsetPaginationWithPermissions, "COUNT_PRECISION_THRESHOLD", total):
response = self.client.get(self._captures_url(limit=1))
self.assertEqual(response.status_code, status.HTTP_200_OK)
data = response.json()
self.assertEqual(data["count"], total)
self.assertTrue(data["count_is_exact"])
3 changes: 3 additions & 0 deletions ui/src/data-services/hooks/captures/useCaptures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export const useCaptures = (
captures?: Capture[]
userPermissions?: UserPermission[]
total: number
totalIsExact: boolean
isLoading: boolean
isFetching: boolean
error?: unknown
Expand All @@ -24,6 +25,7 @@ export const useCaptures = (
results: ServerCapture[]
user_permissions?: UserPermission[]
count: number
count_is_exact?: boolean
}>({
queryKey: [API_ROUTES.CAPTURES, params],
url: fetchUrl,
Expand All @@ -36,6 +38,7 @@ export const useCaptures = (
captures,
userPermissions: data?.user_permissions,
total: data?.count ?? 0,
totalIsExact: data?.count_is_exact ?? true,
isLoading,
isFetching,
error,
Expand Down
3 changes: 3 additions & 0 deletions ui/src/data-services/hooks/occurrences/useOccurrences.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export const useOccurrences = (
): {
occurrences?: Occurrence[]
total: number
totalIsExact: boolean
isLoading: boolean
isFetching: boolean
error?: unknown
Expand All @@ -21,6 +22,7 @@ export const useOccurrences = (
const { data, isLoading, isFetching, error } = useAuthorizedQuery<{
results: ServerOccurrence[]
count: number
count_is_exact?: boolean
}>({
queryKey: [API_ROUTES.OCCURRENCES, params],
url: fetchUrl,
Expand All @@ -34,6 +36,7 @@ export const useOccurrences = (
return {
occurrences,
total: data?.count ?? 0,
totalIsExact: data?.count_is_exact ?? true,
isLoading,
isFetching,
error,
Expand Down
Loading
Loading