基类:AppAgentProcessor
用于执行任务并保存结果的 ExecuteFlow 类。
初始化任务的执行流。
参数 |
-
task_file_name (str ) –
-
context (Context ) –
-
environment (WindowsAppEnv ) –
|
源代码位于 execution/workflow/execute_flow.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53 | def __init__(
self, task_file_name: str, context: Context, environment: WindowsAppEnv
) -> None:
"""
Initialize the execute flow for a task.
:param task_file_name: Name of the task file being processed.
:param context: Context object for the current session.
:param environment: Environment object for the application being processed.
"""
super().__init__(agent=ExecuteAgent, context=context)
self.execution_time = None
self.eval_time = None
self._app_env = environment
self._task_file_name = task_file_name
self._app_name = self._app_env.app_name
log_path = _configs["EXECUTE_LOG_PATH"].format(task=task_file_name)
self._initialize_logs(log_path)
self.application_window = self._app_env.find_matching_window(task_file_name)
self.app_agent = self._get_or_create_execute_agent()
self.eval_agent = self._get_or_create_evaluation_agent()
self._matched_control = None # Matched control for the current step.
|
execute(request, instantiated_plan)
执行执行流:执行任务并保存结果。
参数 |
-
request (str ) –
-
instantiated_plan (List[Dict[str, Any]] ) –
|
返回 |
-
Tuple[List[Dict[str, Any]], Dict[str, str]] –
|
源代码位于 execution/workflow/execute_flow.py
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128 | def execute(
self, request: str, instantiated_plan: List[Dict[str, Any]]
) -> Tuple[List[Dict[str, Any]], Dict[str, str]]:
"""
Execute the execute flow: Execute the task and save the result.
:param request: Original request to be executed.
:param instantiated_plan: Instantiated plan containing steps to execute.
:return: Tuple containing task quality flag, comment, and task type.
"""
start_time = time.time()
try:
executed_plan = self.execute_plan(instantiated_plan)
except Exception as error:
raise RuntimeError(f"Execution failed. {error}")
finally:
self.execution_time = round(time.time() - start_time, 3)
start_time = time.time()
try:
result, _ = self.eval_agent.evaluate(
request=request, log_path=self.log_path
)
utils.print_with_color(f"Result: {result}", "green")
except Exception as error:
raise RuntimeError(f"Evaluation failed. {error}")
finally:
self.eval_time = round(time.time() - start_time, 3)
return executed_plan, result
|
execute_action()
执行动作。
源代码位于 execution/workflow/execute_flow.py
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372 | def execute_action(self) -> None:
"""
Execute the action.
"""
control_selected = None
# Find the matching window and control.
self.application_window = self._app_env.find_matching_window(
self._task_file_name
)
if self.control_text == "":
control_selected = self.application_window
else:
self._control_label, control_selected = (
self._app_env.find_matching_controller(
self.filtered_annotation_dict, self.control_text
)
)
if control_selected:
self._matched_control = control_selected.window_text()
if not control_selected:
# If the control is not found, raise an error.
raise RuntimeError(f"Control with text '{self.control_text}' not found.")
try:
# Get the selected control item from the annotation dictionary and LLM response.
# The LLM response is a number index corresponding to the key in the annotation dictionary.
if control_selected:
if _ufo_configs.get("SHOW_VISUAL_OUTLINE_ON_SCREEN", True):
control_selected.draw_outline(colour="red", thickness=3)
time.sleep(_ufo_configs.get("RECTANGLE_TIME", 0))
control_coordinates = PhotographerDecorator.coordinate_adjusted(
self.application_window.rectangle(), control_selected.rectangle()
)
self._control_log = {
"control_class": control_selected.element_info.class_name,
"control_type": control_selected.element_info.control_type,
"control_automation_id": control_selected.element_info.automation_id,
"control_friendly_class_name": control_selected.friendly_class_name(),
"control_coordinates": {
"left": control_coordinates[0],
"top": control_coordinates[1],
"right": control_coordinates[2],
"bottom": control_coordinates[3],
},
}
self.app_agent.Puppeteer.receiver_manager.create_ui_control_receiver(
control_selected, self.application_window
)
# Save the screenshot of the tagged selected control.
self.capture_control_screenshot(control_selected)
self._results = self.app_agent.Puppeteer.execute_command(
self._operation, self._args
)
self.control_reannotate = None
if not utils.is_json_serializable(self._results):
self._results = ""
return
except Exception:
self.general_error_handler()
|
execute_plan(instantiated_plan)
从执行代理获取执行结果。
参数 |
-
instantiated_plan (List[Dict[str, Any]] ) –
|
源代码位于 execution/workflow/execute_flow.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217 | def execute_plan(
self, instantiated_plan: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""
Get the executed result from the execute agent.
:param instantiated_plan: Plan containing steps to execute.
:return: List of executed steps.
"""
# Initialize the step counter and capture the initial screenshot.
self.session_step = 0
try:
time.sleep(1)
# Initialize the API receiver
self.app_agent.Puppeteer.receiver_manager.create_api_receiver(
self.app_agent._app_root_name, self.app_agent._process_name
)
# Initialize the control receiver
current_receiver = self.app_agent.Puppeteer.receiver_manager.receiver_list[
-1
]
if current_receiver is not None:
self.application_window = self._app_env.find_matching_window(
self._task_file_name
)
current_receiver.com_object = (
current_receiver.get_object_from_process_name()
)
self.init_and_final_capture_screenshot()
except Exception as error:
raise RuntimeError(f"Execution initialization failed. {error}")
# Initialize the success flag for each step.
for index, step_plan in enumerate(instantiated_plan):
instantiated_plan[index]["Success"] = None
instantiated_plan[index]["MatchedControlText"] = None
for index, step_plan in enumerate(instantiated_plan):
try:
self.session_step += 1
# Check if the maximum steps have been exceeded.
if self.session_step > _configs["MAX_STEPS"]:
raise RuntimeError("Maximum steps exceeded.")
self._parse_step_plan(step_plan)
try:
self.process()
instantiated_plan[index]["Success"] = True
instantiated_plan[index]["ControlLabel"] = self._control_label
instantiated_plan[index][
"MatchedControlText"
] = self._matched_control
except Exception as ControllerNotFoundError:
instantiated_plan[index]["Success"] = False
raise ControllerNotFoundError
except Exception as error:
err_info = RuntimeError(
f"Step {self.session_step} execution failed. {error}"
)
raise err_info
# capture the final screenshot
self.session_step += 1
time.sleep(1)
self.init_and_final_capture_screenshot()
# save the final state of the app
win_com_receiver = None
for receiver in reversed(
self.app_agent.Puppeteer.receiver_manager.receiver_list
):
if isinstance(receiver, WinCOMReceiverBasic):
if receiver.client is not None:
win_com_receiver = receiver
break
if win_com_receiver is not None:
win_com_receiver.save()
time.sleep(1)
win_com_receiver.client.Quit()
print("Execution complete.")
return instantiated_plan
|
general_error_handler()
处理一般错误。
源代码位于 execution/workflow/execute_flow.py
| def general_error_handler(self) -> None:
"""
Handle general errors.
"""
pass
|
init_and_final_capture_screenshot()
捕获屏幕截图。
源代码位于 execution/workflow/execute_flow.py
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302 | def init_and_final_capture_screenshot(self) -> None:
"""
Capture the screenshot.
"""
# Define the paths for the screenshots saved.
screenshot_save_path = self.log_path + f"action_step{self.session_step}.png"
self._memory_data.add_values_from_dict(
{
"CleanScreenshot": screenshot_save_path,
}
)
self.photographer.capture_app_window_screenshot(
self.application_window, save_path=screenshot_save_path
)
# Capture the control screenshot.
control_selected = self._app_env.app_window
self.capture_control_screenshot(control_selected)
|
log_save()
记录为 PrefillAgent 构建的提示消息。
源代码位于 execution/workflow/execute_flow.py
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261 | def log_save(self) -> None:
"""
Log the constructed prompt message for the PrefillAgent.
"""
step_memory = {
"Step": self.session_step,
"Subtask": self.subtask,
"ControlLabel": self._control_label,
"ControlText": self.control_text,
"Action": self.action,
"ActionType": self.app_agent.Puppeteer.get_command_types(self._operation),
"Results": self._results,
"Application": self.app_agent._app_root_name,
"TimeCost": self.time_cost,
}
self._memory_data.add_values_from_dict(step_memory)
self.log(self._memory_data.to_dict())
|
print_step_info()
打印步骤信息。
源代码位于 execution/workflow/execute_flow.py
231
232
233
234
235
236
237
238
239
240
241
242 | def print_step_info(self) -> None:
"""
Print the step information.
"""
utils.print_with_color(
"Step {step}: {subtask}".format(
step=self.session_step,
subtask=self.subtask,
),
"magenta",
)
|
process()
处理当前步骤。
源代码位于 execution/workflow/execute_flow.py
219
220
221
222
223
224
225
226
227
228
229 | def process(self) -> None:
"""
Process the current step.
"""
step_start_time = time.time()
self.print_step_info()
self.capture_screenshot()
self.execute_action()
self.time_cost = round(time.time() - step_start_time, 3)
self.log_save()
|