|
15 | 15 |
|
16 | 16 | logger = logging.getLogger(__name__) |
17 | 17 |
|
| 18 | + |
| 19 | +# Hook for GT-first gating (Enterprise can override via settings) |
| 20 | +def _oss_should_attempt_gt_first(user: User, project: Project) -> bool: |
| 21 | + # Open-source default: if project enables GT-first, allow it without onboarding gates |
| 22 | + return bool(project.show_ground_truth_first) |
| 23 | + |
| 24 | + |
18 | 25 | get_tasks_agreement_queryset = load_func(settings.GET_TASKS_AGREEMENT_QUERYSET) |
| 26 | +should_attempt_ground_truth_first = ( |
| 27 | + load_func(settings.SHOULD_ATTEMPT_GROUND_TRUTH_FIRST) or _oss_should_attempt_gt_first |
| 28 | +) |
19 | 29 |
|
20 | 30 |
|
21 | 31 | def get_next_task_logging_level(user: User) -> int: |
@@ -158,33 +168,41 @@ def get_not_solved_tasks_qs( |
158 | 168 | prioritized_on_agreement = False |
159 | 169 | # if annotator is assigned for tasks, he must solve it regardless of is_labeled=True |
160 | 170 | if not assigned_flag: |
161 | | - # include tasks that have been completed if their agreement is not at threshold if threshold setting is set |
| 171 | + # low agreement strategy for auto-assigned annotators: |
| 172 | + # Include tasks that have been completed if their agreement is not at threshold if threshold setting is set |
162 | 173 | lse_project = getattr(project, 'lse_project', None) |
163 | 174 | if ( |
164 | 175 | lse_project |
165 | | - and flag_set('fflag_feat_optic_161_project_settings_for_low_agreement_threshold_score_short', user='auto') |
166 | 176 | and lse_project.agreement_threshold is not None |
167 | 177 | and get_tasks_agreement_queryset |
168 | 178 | and user.is_project_annotator(project) |
169 | 179 | ): |
170 | | - not_solved_tasks = ( |
171 | | - get_tasks_agreement_queryset(not_solved_tasks) |
172 | | - # include tasks that are not labeled or are labeled but fall below the agreement threshold |
173 | | - .filter( |
174 | | - Q(_agreement__lt=lse_project.agreement_threshold, is_labeled=True) | Q(is_labeled=False) |
175 | | - ).annotate(annotators=Count('annotations__completed_by', distinct=True)) |
176 | | - # skip tasks that have been annotated by the maximum additional number of annotators |
177 | | - .filter(annotators__lt=F('overlap') + lse_project.max_additional_annotators_assignable) |
| 180 | + # Onboarding mode (GT-first) should keep GT tasks eligible regardless of is_labeled/agreement |
| 181 | + qs = get_tasks_agreement_queryset(not_solved_tasks) |
| 182 | + qs = qs.annotate(annotators=Count('annotations__completed_by', distinct=True)) |
| 183 | + |
| 184 | + low_agreement_pred = Q(_agreement__lt=lse_project.agreement_threshold, is_labeled=True) | Q( |
| 185 | + is_labeled=False |
178 | 186 | ) |
| 187 | + capacity_pred = Q(annotators__lt=F('overlap') + (lse_project.max_additional_annotators_assignable or 0)) |
| 188 | + |
| 189 | + if project.show_ground_truth_first: |
| 190 | + gt_subq = Annotation.objects.filter(task=OuterRef('pk'), ground_truth=True) |
| 191 | + qs = qs.annotate(has_ground_truths=Exists(gt_subq)) |
| 192 | + # Keep all GT tasks + apply low-agreement+capacity to the rest. For sure, we can do: |
| 193 | + # - if user.solved_tasks_array.count < lse_project.annotator_evaluation_minimum_tasks |
| 194 | + # - else, apply low-agreement+capacity to the rest (maybe performance will be better) |
| 195 | + # but it's a question - what is better here. This version is simpler at least from the code perspective. |
| 196 | + not_solved_tasks = qs.filter(Q(has_ground_truths=True) | (low_agreement_pred & capacity_pred)) |
| 197 | + else: |
| 198 | + not_solved_tasks = qs.filter(low_agreement_pred & capacity_pred) |
| 199 | + |
179 | 200 | prioritized_on_agreement, not_solved_tasks = _prioritize_low_agreement_tasks(not_solved_tasks, lse_project) |
180 | 201 |
|
181 | 202 | # otherwise, filtering out completed tasks is sufficient |
182 | 203 | else: |
183 | 204 | # ignore tasks that are already labeled for onboarding mode |
184 | | - if not ( |
185 | | - flag_set('fflag_feat_all_leap_1825_annotator_evaluation_short', user='auto') |
186 | | - and project.show_ground_truth_first |
187 | | - ): |
| 205 | + if not project.show_ground_truth_first: |
188 | 206 | not_solved_tasks = not_solved_tasks.filter(is_labeled=False) |
189 | 207 |
|
190 | 208 | if not flag_set('fflag_fix_back_lsdv_4523_show_overlap_first_order_27022023_short'): |
@@ -220,33 +238,39 @@ def get_next_task_without_dm_queue( |
220 | 238 | use_task_lock = True |
221 | 239 | queue_info = '' |
222 | 240 |
|
223 | | - # ordered by data manager |
| 241 | + # Manually assigned tasks |
224 | 242 | if assigned_flag: |
225 | 243 | logger.debug(f'User={user} try to get task from assigned') |
226 | 244 | next_task = not_solved_tasks.first() |
227 | 245 | use_task_lock = False |
228 | 246 | queue_info += (' & ' if queue_info else '') + 'Manually assigned queue' |
229 | 247 |
|
230 | | - # If current user has already lock one task - return it (without setting the lock again) |
| 248 | + # Task lock: if current user already has a locked task, return it (without setting the lock again) |
231 | 249 | if not next_task: |
232 | 250 | next_task = Task.get_locked_by(user, tasks=not_solved_tasks) |
233 | 251 | if next_task: |
234 | 252 | logger.debug(f'User={user} got already locked for them {next_task}') |
235 | 253 | use_task_lock = False |
236 | 254 | queue_info += (' & ' if queue_info else '') + 'Task lock' |
237 | 255 |
|
| 256 | + # Ground truth: label GT first only during onboarding window for user (gated by min tasks and min score) |
| 257 | + allow_gt_first = should_attempt_ground_truth_first(user, project) |
| 258 | + if not next_task and allow_gt_first: |
| 259 | + logger.debug(f'User={user} tries ground truth from prepared tasks') |
| 260 | + next_task = _try_ground_truth(not_solved_tasks, project, user) |
| 261 | + if next_task: |
| 262 | + queue_info += (' & ' if queue_info else '') + 'Ground truth queue' |
| 263 | + |
| 264 | + # Low agreement strategy: reassign this annotator to low agreement tasks |
238 | 265 | if not next_task and prioritized_low_agreement: |
239 | 266 | logger.debug(f'User={user} tries low agreement from prepared tasks') |
240 | 267 | next_task = _get_first_unlocked(not_solved_tasks, user) |
241 | | - queue_info += (' & ' if queue_info else '') + 'Low agreement queue' |
242 | | - |
243 | | - if not next_task and project.show_ground_truth_first: |
244 | | - logger.debug(f'User={user} tries ground truth from prepared tasks') |
245 | | - next_task = _try_ground_truth(not_solved_tasks, project, user) |
246 | | - queue_info += (' & ' if queue_info else '') + 'Ground truth queue' |
| 268 | + if next_task: |
| 269 | + queue_info += (' & ' if queue_info else '') + 'Low agreement queue' |
247 | 270 |
|
| 271 | + # Breadth first: label in-progress tasks first; |
248 | 272 | if not next_task and project.maximum_annotations > 1: |
249 | | - # if there are any tasks in progress (with maximum number of annotations), randomly sampling from them |
| 273 | + # if there are already labeled tasks, but task.overlap still < project.maximum_annotations, randomly sampling from them |
250 | 274 | logger.debug(f'User={user} tries depth first from prepared tasks') |
251 | 275 | next_task = _try_breadth_first(not_solved_tasks, user) |
252 | 276 | if next_task: |
@@ -358,7 +382,7 @@ def get_next_task( |
358 | 382 | # don't output anything - just filter tasks with overlap |
359 | 383 | logger.debug(f'User={user} tries overlap first from prepared tasks') |
360 | 384 | _, tasks_with_overlap = _try_tasks_with_overlap(not_solved_tasks) |
361 | | - queue_info += 'Show overlap first' |
| 385 | + queue_info += (' & ' if queue_info else '') + 'Show overlap first' |
362 | 386 | next_task, queue_info = get_task_from_qs_with_sampling( |
363 | 387 | tasks_with_overlap, user_solved_tasks_array, prepared_tasks, user, project, queue_info |
364 | 388 | ) |
|
0 commit comments