Skip to content

Commit 4487d49

Browse files
authored
fix: BROS-499: Ground truth queue for onboarding mode skipped when “Desired agreement threshold” is enabled (#8586)
1 parent cdbebcd commit 4487d49

File tree

7 files changed

+176
-33
lines changed

7 files changed

+176
-33
lines changed

label_studio/core/settings/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,7 @@
617617
TASK_MIXIN = 'tasks.mixins.TaskMixin'
618618
LSE_PROJECT = None
619619
GET_TASKS_AGREEMENT_QUERYSET = None
620+
SHOULD_ATTEMPT_GROUND_TRUTH_FIRST = None
620621
ANNOTATION_MIXIN = 'tasks.mixins.AnnotationMixin'
621622
ORGANIZATION_MIXIN = 'organizations.mixins.OrganizationMixin'
622623
USER_MIXIN = 'users.mixins.UserMixin'

label_studio/data_manager/managers.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -548,11 +548,7 @@ def annotate_completed_at(queryset: TaskQuerySet) -> TaskQuerySet:
548548
is_lse_project = bool(LseProject)
549549
has_custom_agreement_queryset = bool(get_tasks_agreement_queryset)
550550

551-
if (
552-
is_lse_project
553-
and has_custom_agreement_queryset
554-
and flag_set('fflag_feat_optic_161_project_settings_for_low_agreement_threshold_score_short', user='auto')
555-
):
551+
if is_lse_project and has_custom_agreement_queryset:
556552
return annotated_completed_at_considering_agreement_threshold(queryset)
557553

558554
return base_annotate_completed_at(queryset)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Label Stream: Next Task Selection (LSE)
2+
3+
This doc summarizes how Label Studio Enterprise selects the next task for labeling, based on the current code in `label_studio/projects/functions/next_task.py` and LSE-specific logic from `label_studio_enterprise/lse_projects/functions.py`.
4+
5+
Notes
6+
- Queue labels shown in the UI (e.g. "Ground truth queue", "Show overlap first") indicate which strategies were attempted, not always the final source of the selected task.
7+
- Feature flags impact ordering and inclusion at several steps (see Legend below).
8+
9+
## High-level flow
10+
11+
```mermaid
12+
flowchart TD
13+
A["Input: prepared_tasks, dm_queue, assigned_flag, user, project"] --> B
14+
15+
subgraph Build candidate pool not_solved_tasks
16+
B[Start from prepared_tasks] --> B1[Exclude tasks annotated by this user]
17+
B1 --> B2[Exclude user's postponed drafts]
18+
B2 --> B3{assigned_flag?}
19+
B3 -- yes --> B5[Skip agreement logic] --> B7
20+
21+
B3 -- no --> B4{"LSE low-agreement path?<br/>fflag OPTIC-161<br/>agreement_threshold set<br/>user is annotator"}
22+
B4 -- yes --> B6["Filter by agreement threshold<br/>and annotator capacity"] --> B7[Optionally prioritize by low agreement]
23+
24+
B4 -- no --> B8{"Evaluation mode?<br/>fflag ALL-LEAP-1825<br/>show_ground_truth_first"}
25+
B8 -- yes --> B7
26+
B8 -- no --> B9[Filter: is_labeled=false] --> B7
27+
end
28+
29+
B7 --> C{dm_queue?}
30+
C -- yes --> DM["Data manager queue<br/>not_solved_tasks.first()"] --> K
31+
C -- no --> D
32+
33+
subgraph No DM queue path
34+
D{assigned_flag?} -- yes --> M["Manually assigned queue<br/>first() from not_solved_tasks"] --> K
35+
D -- no --> E["Check existing lock for user<br/>if exists: Task lock"] --> F
36+
37+
F{prioritized_low_agreement?} -- yes --> LAL["Low agreement queue<br/>first unlocked"] --> K
38+
F -- no --> G
39+
40+
G{"GT-first gating?<br/>should_attempt_ground_truth_first(user, project)"} -- yes --> GT["Ground truth queue<br/>_try_ground_truth()"] --> H
41+
G -- no --> H
42+
43+
H{project.maximum_annotations > 1?} -- yes --> BF["Breadth first queue<br/>_try_breadth_first()"] --> I
44+
H -- no --> I
45+
46+
I{"FF overlap-after?<br/>fflag FIX-BACK-LSDV-4523 AND show_overlap_first<br/>AND no next_task"}
47+
I -- yes --> OF["Filter to overlap>1<br/>Show overlap first"] --> S
48+
I -- no --> S
49+
50+
S{next_task selected?}
51+
S -- yes --> P[Check post-queues]
52+
S -- no --> T{project.sampling}
53+
T -- Sequence --> SQ["Sequence queue<br/>first unlocked"] --> P
54+
T -- Uncertainty --> AL["Active learning or random queue"] --> P
55+
T -- Uniform --> UR["Uniform random queue<br/>random unlocked"] --> P
56+
end
57+
58+
subgraph Post queues user-specific
59+
P --> PD["Postponed draft queue<br/>user drafts: was_postponed=true, is_labeled=false"] --> SK
60+
SK["Skipped queue (REQUEUE_FOR_ME)<br/>user annotations: was_cancelled=true, is_labeled=false"] --> K
61+
end
62+
63+
K["Finalize<br/>- Set task lock if required<br/>- add_stream_history()<br/>- return next_task + queue_info"]
64+
```
65+
66+
## Legend and flags
67+
68+
- fflag FIX-BACK-LSDV-4523 (Overlap First Ordering): applies the "Show overlap first" filtering after GT/low-agreement/breadth-first attempts; otherwise, it is applied earlier while building the candidate pool.
69+
70+
### GT-first gating
71+
- `should_attempt_ground_truth_first(user, project)` returns true when:
72+
- `show_ground_truth_first=True` and either no `lse_project` or `annotator_evaluation_minimum_tasks` is not set, or
73+
- the user's completed GT-equipped tasks < `annotator_evaluation_minimum_tasks`, or
74+
- minimum tasks reached but the user's GT agreement score is missing or below `annotator_evaluation_minimum_score` (percent).
75+
- Otherwise returns false (GT-first disabled; proceed via low-agreement/overlap/sampling).
76+
77+
## Queue labels appended to response
78+
79+
The `queue_info` string aggregates labels as specific stages are attempted:
80+
- "Manually assigned queue" when `assigned_flag` path is used.
81+
- "Task lock" when returning a task already locked by the user.
82+
- "Low agreement queue" when the prioritized low-agreement branch returns a task.
83+
- "Ground truth queue" when GT is attempted (label may appear even if selection falls through).
84+
- "Breadth first queue" for in-progress tasks (when `maximum_annotations > 1`).
85+
- "Show overlap first" when overlap filtering is applied.
86+
- Sampling labels:
87+
- "Sequence queue"
88+
- "Active learning or random queue" (uncertainty)
89+
- "Uniform random queue"
90+
- Post queues:
91+
- "Postponed draft queue"
92+
- "Skipped queue"
93+
94+
## References
95+
- Core selection: `label_studio/projects/functions/next_task.py`
96+
- LSE agreement & counters: `label_studio_enterprise/lse_projects/functions.py`
97+
98+

label_studio/projects/functions/next_task.py

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,17 @@
1515

1616
logger = logging.getLogger(__name__)
1717

18+
19+
# Hook for GT-first gating (Enterprise can override via settings)
20+
def _oss_should_attempt_gt_first(user: User, project: Project) -> bool:
21+
# Open-source default: if project enables GT-first, allow it without onboarding gates
22+
return bool(project.show_ground_truth_first)
23+
24+
1825
get_tasks_agreement_queryset = load_func(settings.GET_TASKS_AGREEMENT_QUERYSET)
26+
should_attempt_ground_truth_first = (
27+
load_func(settings.SHOULD_ATTEMPT_GROUND_TRUTH_FIRST) or _oss_should_attempt_gt_first
28+
)
1929

2030

2131
def get_next_task_logging_level(user: User) -> int:
@@ -158,33 +168,41 @@ def get_not_solved_tasks_qs(
158168
prioritized_on_agreement = False
159169
# if annotator is assigned for tasks, he must solve it regardless of is_labeled=True
160170
if not assigned_flag:
161-
# include tasks that have been completed if their agreement is not at threshold if threshold setting is set
171+
# low agreement strategy for auto-assigned annotators:
172+
# Include tasks that have been completed if their agreement is not at threshold if threshold setting is set
162173
lse_project = getattr(project, 'lse_project', None)
163174
if (
164175
lse_project
165-
and flag_set('fflag_feat_optic_161_project_settings_for_low_agreement_threshold_score_short', user='auto')
166176
and lse_project.agreement_threshold is not None
167177
and get_tasks_agreement_queryset
168178
and user.is_project_annotator(project)
169179
):
170-
not_solved_tasks = (
171-
get_tasks_agreement_queryset(not_solved_tasks)
172-
# include tasks that are not labeled or are labeled but fall below the agreement threshold
173-
.filter(
174-
Q(_agreement__lt=lse_project.agreement_threshold, is_labeled=True) | Q(is_labeled=False)
175-
).annotate(annotators=Count('annotations__completed_by', distinct=True))
176-
# skip tasks that have been annotated by the maximum additional number of annotators
177-
.filter(annotators__lt=F('overlap') + lse_project.max_additional_annotators_assignable)
180+
# Onboarding mode (GT-first) should keep GT tasks eligible regardless of is_labeled/agreement
181+
qs = get_tasks_agreement_queryset(not_solved_tasks)
182+
qs = qs.annotate(annotators=Count('annotations__completed_by', distinct=True))
183+
184+
low_agreement_pred = Q(_agreement__lt=lse_project.agreement_threshold, is_labeled=True) | Q(
185+
is_labeled=False
178186
)
187+
capacity_pred = Q(annotators__lt=F('overlap') + (lse_project.max_additional_annotators_assignable or 0))
188+
189+
if project.show_ground_truth_first:
190+
gt_subq = Annotation.objects.filter(task=OuterRef('pk'), ground_truth=True)
191+
qs = qs.annotate(has_ground_truths=Exists(gt_subq))
192+
# Keep all GT tasks + apply low-agreement+capacity to the rest. For sure, we can do:
193+
# - if user.solved_tasks_array.count < lse_project.annotator_evaluation_minimum_tasks
194+
# - else, apply low-agreement+capacity to the rest (maybe performance will be better)
195+
# but it's a question - what is better here. This version is simpler at least from the code perspective.
196+
not_solved_tasks = qs.filter(Q(has_ground_truths=True) | (low_agreement_pred & capacity_pred))
197+
else:
198+
not_solved_tasks = qs.filter(low_agreement_pred & capacity_pred)
199+
179200
prioritized_on_agreement, not_solved_tasks = _prioritize_low_agreement_tasks(not_solved_tasks, lse_project)
180201

181202
# otherwise, filtering out completed tasks is sufficient
182203
else:
183204
# ignore tasks that are already labeled for onboarding mode
184-
if not (
185-
flag_set('fflag_feat_all_leap_1825_annotator_evaluation_short', user='auto')
186-
and project.show_ground_truth_first
187-
):
205+
if not project.show_ground_truth_first:
188206
not_solved_tasks = not_solved_tasks.filter(is_labeled=False)
189207

190208
if not flag_set('fflag_fix_back_lsdv_4523_show_overlap_first_order_27022023_short'):
@@ -220,33 +238,39 @@ def get_next_task_without_dm_queue(
220238
use_task_lock = True
221239
queue_info = ''
222240

223-
# ordered by data manager
241+
# Manually assigned tasks
224242
if assigned_flag:
225243
logger.debug(f'User={user} try to get task from assigned')
226244
next_task = not_solved_tasks.first()
227245
use_task_lock = False
228246
queue_info += (' & ' if queue_info else '') + 'Manually assigned queue'
229247

230-
# If current user has already lock one task - return it (without setting the lock again)
248+
# Task lock: if current user already has a locked task, return it (without setting the lock again)
231249
if not next_task:
232250
next_task = Task.get_locked_by(user, tasks=not_solved_tasks)
233251
if next_task:
234252
logger.debug(f'User={user} got already locked for them {next_task}')
235253
use_task_lock = False
236254
queue_info += (' & ' if queue_info else '') + 'Task lock'
237255

256+
# Ground truth: label GT first only during onboarding window for user (gated by min tasks and min score)
257+
allow_gt_first = should_attempt_ground_truth_first(user, project)
258+
if not next_task and allow_gt_first:
259+
logger.debug(f'User={user} tries ground truth from prepared tasks')
260+
next_task = _try_ground_truth(not_solved_tasks, project, user)
261+
if next_task:
262+
queue_info += (' & ' if queue_info else '') + 'Ground truth queue'
263+
264+
# Low agreement strategy: reassign this annotator to low agreement tasks
238265
if not next_task and prioritized_low_agreement:
239266
logger.debug(f'User={user} tries low agreement from prepared tasks')
240267
next_task = _get_first_unlocked(not_solved_tasks, user)
241-
queue_info += (' & ' if queue_info else '') + 'Low agreement queue'
242-
243-
if not next_task and project.show_ground_truth_first:
244-
logger.debug(f'User={user} tries ground truth from prepared tasks')
245-
next_task = _try_ground_truth(not_solved_tasks, project, user)
246-
queue_info += (' & ' if queue_info else '') + 'Ground truth queue'
268+
if next_task:
269+
queue_info += (' & ' if queue_info else '') + 'Low agreement queue'
247270

271+
# Breadth first: label in-progress tasks first;
248272
if not next_task and project.maximum_annotations > 1:
249-
# if there are any tasks in progress (with maximum number of annotations), randomly sampling from them
273+
# if there are already labeled tasks, but task.overlap still < project.maximum_annotations, randomly sampling from them
250274
logger.debug(f'User={user} tries depth first from prepared tasks')
251275
next_task = _try_breadth_first(not_solved_tasks, user)
252276
if next_task:
@@ -358,7 +382,7 @@ def get_next_task(
358382
# don't output anything - just filter tasks with overlap
359383
logger.debug(f'User={user} tries overlap first from prepared tasks')
360384
_, tasks_with_overlap = _try_tasks_with_overlap(not_solved_tasks)
361-
queue_info += 'Show overlap first'
385+
queue_info += (' & ' if queue_info else '') + 'Show overlap first'
362386
next_task, queue_info = get_task_from_qs_with_sampling(
363387
tasks_with_overlap, user_solved_tasks_array, prepared_tasks, user, project, queue_info
364388
)
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Generated by Django 5.1.12 on 2025-10-03 12:10
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("projects", "0030_project_search_vector_index"),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name="project",
15+
name="show_ground_truth_first",
16+
field=models.BooleanField(
17+
default=False,
18+
help_text="Onboarding mode (true): show ground truth tasks first in the labeling stream",
19+
verbose_name="show ground truth first",
20+
),
21+
),
22+
]

label_studio/projects/models.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,11 @@ class SkipQueue(models.TextChoices):
266266
skip_queue = models.CharField(
267267
max_length=100, choices=SkipQueue.choices, null=True, default=SkipQueue.REQUEUE_FOR_OTHERS
268268
)
269-
show_ground_truth_first = models.BooleanField(_('show ground truth first'), default=False)
269+
show_ground_truth_first = models.BooleanField(
270+
_('show ground truth first'),
271+
default=False,
272+
help_text='Onboarding mode (true): show ground truth tasks first in the labeling stream',
273+
)
270274
show_overlap_first = models.BooleanField(_('show overlap first'), default=False)
271275
overlap_cohort_percentage = models.IntegerField(_('overlap_cohort_percentage'), default=100)
272276

label_studio/tasks/models.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -276,9 +276,7 @@ def has_lock(self, user=None):
276276
"""
277277
from projects.functions.next_task import get_next_task_logging_level
278278

279-
if self.project.show_ground_truth_first and flag_set(
280-
'fflag_feat_all_leap_1825_annotator_evaluation_short', user='auto'
281-
):
279+
if self.project.show_ground_truth_first:
282280
# in show_ground_truth_first mode(onboarding)
283281
# we ignore overlap setting for ground_truth tasks
284282
# https://humansignal.atlassian.net/browse/LEAP-1963

0 commit comments

Comments
 (0)