-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscripted_baselines.py
More file actions
164 lines (154 loc) · 6.09 KB
/
Copy pathscripted_baselines.py
File metadata and controls
164 lines (154 loc) · 6.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""Deterministic gold trajectories for reproducible baseline scores (no LLM)."""
from __future__ import annotations
try:
from supportdesk_env.models import SupportDeskAction
from supportdesk_env.server.supportdesk_environment import SupportDeskEnvironment
except ImportError: # pragma: no cover - source-tree fallback
from models import SupportDeskAction
from server.supportdesk_environment import SupportDeskEnvironment
def run_easy_gold(env: SupportDeskEnvironment) -> float:
env.reset()
env.step(SupportDeskAction(operation="select_task", task_id="task_easy_password_reset"))
env.step(SupportDeskAction(operation="search_docs", query="password reset access"))
env.step(SupportDeskAction(operation="open_resource", resource_id="doc_reset_policy"))
env.step(
SupportDeskAction(operation="open_resource", resource_id="doc_account_access_routing")
)
env.step(SupportDeskAction(operation="set_queue", queue="account_access"))
env.step(SupportDeskAction(operation="set_priority", priority="normal"))
env.step(
SupportDeskAction(
operation="set_tags",
tags=["password_reset", "login_issue"],
)
)
env.step(
SupportDeskAction(
operation="set_resolution_code",
resolution_code="send_reset_link",
)
)
env.step(
SupportDeskAction(
operation="save_internal_note",
text=(
"Customer needs a fresh reset link. Older email links are invalid when a newer email "
"has already been sent."
),
)
)
env.step(
SupportDeskAction(
operation="save_reply",
text=(
"Please use the fresh reset link from the newest email within 30 minutes and ignore "
"older email reset links."
),
)
)
obs = env.step(SupportDeskAction(operation="submit"))
return float(obs.score)
def run_medium_gold(env: SupportDeskEnvironment) -> float:
env.reset()
env.step(SupportDeskAction(operation="select_task", task_id="task_medium_duplicate_charge"))
env.step(
SupportDeskAction(
operation="search_docs",
query="billing duplicate charge upgrade authorization hold invoice",
)
)
env.step(SupportDeskAction(operation="open_resource", resource_id="doc_billing_upgrade_holds"))
env.step(SupportDeskAction(operation="open_resource", resource_id="record_invoice_ledger"))
env.step(SupportDeskAction(operation="open_resource", resource_id="doc_billing_queue_matrix"))
env.step(SupportDeskAction(operation="set_queue", queue="billing"))
env.step(SupportDeskAction(operation="set_priority", priority="high"))
env.step(
SupportDeskAction(
operation="set_tags",
tags=["duplicate_charge", "plan_upgrade"],
)
)
env.step(
SupportDeskAction(
operation="set_resolution_code",
resolution_code="explain_authorization_hold",
)
)
env.step(
SupportDeskAction(
operation="save_internal_note",
text=(
"Settled invoice INV-1048 is posted; INV-1049 is still showing as an authorization "
"hold rather than a settled charge."
),
)
)
env.step(
SupportDeskAction(
operation="save_reply",
text=(
"The authorization hold on INV-1049 is separate from your settled charge INV-1048. "
"Authorization holds typically clear within 3 to 5 business days while the card "
"network processes activity."
),
)
)
obs = env.step(SupportDeskAction(operation="submit"))
return float(obs.score)
def run_hard_gold(env: SupportDeskEnvironment) -> float:
env.reset()
env.step(SupportDeskAction(operation="select_task", task_id="task_hard_security_incident"))
env.step(
SupportDeskAction(
operation="search_docs",
query="security billing token enterprise audit freeze sla revoke seat",
)
)
env.step(SupportDeskAction(operation="open_resource", resource_id="doc_security_runbook"))
env.step(SupportDeskAction(operation="open_resource", resource_id="doc_enterprise_sla"))
env.step(SupportDeskAction(operation="open_resource", resource_id="doc_billing_review_freeze"))
env.step(SupportDeskAction(operation="open_resource", resource_id="record_enterprise_audit_log"))
env.step(SupportDeskAction(operation="set_queue", queue="account_security"))
env.step(SupportDeskAction(operation="set_priority", priority="urgent"))
env.step(
SupportDeskAction(
operation="set_tags",
tags=["security_incident", "billing_anomaly", "enterprise_sla"],
)
)
env.step(
SupportDeskAction(
operation="set_resolution_code",
resolution_code="security_escalation",
)
)
env.step(
SupportDeskAction(
operation="save_internal_note",
text=(
"Triage: suspicious token tok_prod_3921; seat count moved from 42 to 137; "
"billing review freeze requested while security investigates."
),
)
)
env.step(
SupportDeskAction(
operation="save_reply",
text=(
"We revoked the suspicious API token and opened a security escalation to "
"account security under our one hour enterprise SLA, with a billing review freeze "
"during the investigation. We will not request credentials in email."
),
)
)
obs = env.step(SupportDeskAction(operation="submit"))
return float(obs.score)
def run_all_scripted() -> dict[str, float]:
"""Run all three gold trajectories in-process; returns task_id -> final grader score."""
env = SupportDeskEnvironment()
scores = {
"task_easy_password_reset": run_easy_gold(env),
"task_medium_duplicate_charge": run_medium_gold(env),
"task_hard_security_incident": run_hard_gold(env),
}
return scores