Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ plugins = pydantic.mypy,sqlalchemy.ext.mypy.plugin
exclude = (?x)(
^src/askui/models/ui_tars_ep/ui_tars_api\.py$
| ^src/askui/tools/askui/askui_ui_controller_grpc/.*$
| ^venv/.*$
| ^\.venv/.*$
)
mypy_path = src:tests
explicit_package_bases = true
Expand Down
5 changes: 4 additions & 1 deletion src/askui/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""AskUI Python SDK"""

__version__ = "0.32.1"
__version__ = "0.33.0"

import logging
import os
Expand Down Expand Up @@ -45,6 +45,7 @@
from .models.types.response_schemas import ResponseSchema, ResponseSchemaBase
from .retry import ConfigurableRetry, Retry
from .tools import ModifierKey, PcKey
from .tools.askui import LocalAgentOsServer, RemoteAgentOsServer
from .utils.image_utils import ImageSource
from .utils.source_utils import InputSource

Expand All @@ -69,6 +70,8 @@
logging.getLogger(__name__).addHandler(logging.NullHandler())

__all__ = [
"RemoteAgentOsServer",
"LocalAgentOsServer",
"Agent",
"AutomationError",
"ComputerAgent",
Expand Down
84 changes: 78 additions & 6 deletions src/askui/computer_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
create_computer_agent_prompt,
)
from askui.tools.computer import (
ComputerGetActiveAgentOsServerTool,
ComputerGetMousePositionTool,
ComputerGetSystemInfoTool,
ComputerKeyboardPressedTool,
ComputerKeyboardReleaseTool,
ComputerKeyboardTapTool,
ComputerListAgentOsServersTool,
ComputerListDisplaysTool,
ComputerMouseClickTool,
ComputerMouseHoldDownTool,
Expand All @@ -31,14 +33,15 @@
ComputerRetrieveActiveDisplayTool,
ComputerScreenshotTool,
ComputerSetActiveDisplayTool,
ComputerSwitchAgentOsServerTool,
ComputerTypeTool,
)
from askui.tools.exception_tool import ExceptionTool

from .reporting import CompositeReporter, Reporter
from .retry import Retry
from .tools import AgentToolbox, ComputerAgentOsFacade, ModifierKey, PcKey
from .tools.askui import AskUiControllerClient
from .tools.askui import AgentOsServer, AskUiControllerClient

logger = logging.getLogger(__name__)

Expand All @@ -50,15 +53,36 @@ class ComputerAgent(Agent):
This agent can perform various UI interactions like clicking, typing, scrolling, and more.
It uses computer vision models to locate UI elements and execute actions on them.

A single `ComputerAgent` can drive **one or more machines** through the
`agent_os_servers` argument. Each entry is an Agent OS server (local
subprocess or remote gRPC endpoint) identified by a stable `computer_id`.
At any moment one server is *active* and receives all explicit calls
(`click`, `type`, `keyboard`, ...). The active server can be changed at
runtime via `agent.tools.os.switch_agent_os_server(computer_id)` or
scoped to a block using `agent.tools.os.temporary_select(computer_id)`.
The `act()` model is also given list/switch/get-active tools so it can
orchestrate work across machines on its own (e.g. read something on one
computer and re-enter it on another).

Args:
display (int, optional): The display number to use for screen interactions. Defaults to `1`.
display (int, optional): The display number to use for screen interactions on the default local server. Ignored when `agent_os_servers` is provided. Defaults to `1`.
reporters (list[Reporter] | None, optional): List of reporter instances for logging and reporting. If `None`, an empty list is used.
tools (AgentToolbox | None, optional): Custom toolbox instance. If `None`, a default one will be created with `AskUiControllerClient`.
agent_os_servers (list[AgentOsServer] | None, optional):
Agent OS servers the agent can route actions to. May mix one
`LocalAgentOsServer` (managing a controller subprocess on this
machine) with any number of `RemoteAgentOsServer`s pointing at
controllers already running on other machines. Constraints: at
least one server, at most one local, and remote `address`es plus
all `computer_id`s must be unique. The first entry becomes the
initial active server. Defaults to a single local server bound to
`display`.
settings (AgentSettings | None, optional): Provider-based model settings. If `None`, uses the default AskUI model stack.
retry (Retry, optional): The retry instance to use for retrying failed actions. Defaults to `ConfigurableRetry` with exponential backoff. Currently only supported for `locate()` method.
act_tools (list[Tool] | None, optional): Additional tools to make available for the `act()` method.

Example:
Single local machine (the default):

```python
from askui import ComputerAgent

Expand All @@ -67,35 +91,80 @@ class ComputerAgent(Agent):
agent.type("Hello World")
agent.act("Open settings menu")
```

Example:
Research on one machine and write up the findings on another. The
first server in the list is the active one; `temporary_select`
re-routes a block of explicit calls and restores the previous
active server on exit.

```python
from askui import ComputerAgent
from askui.tools.askui import LocalAgentOsServer, RemoteAgentOsServer

with ComputerAgent(
agent_os_servers=[
LocalAgentOsServer(computer_id="research-box"),
RemoteAgentOsServer(
address="192.168.1.42:26000",
description="Writer box with a text editor open",
computer_id="writer-box",
),
],
) as agent:
agent.act(
"On research-box, open a browser, google 'askui', and read "
"the top results to gather key facts about what AskUI is, "
"what it does, and notable features. Then switch to "
"writer-box and write a Markdown document titled "
"'AskUI Findings' summarizing those facts as a bulleted "
"list in the open text editor."
)
```

Example:
Register a remote machine at runtime:

```python
from askui import ComputerAgent

with ComputerAgent() as agent:
agent.tools.os.add_remote_agent_os_server(
address="10.0.0.5:26000",
description="Build server",
)
agent.act("Kick off a release build on the build server")
```
"""

@telemetry.record_call(
exclude={
"reporters",
"tools",
"settings",
"act_tools",
"callbacks",
"truncation_strategy",
"agent_os_servers",
}
)
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
def __init__(
self,
display: Annotated[int, Field(ge=1)] = 1,
reporters: list[Reporter] | None = None,
tools: AgentToolbox | None = None,
agent_os_servers: list[AgentOsServer] | None = None,
settings: AgentSettings | None = None,
retry: Retry | None = None,
act_tools: list[Tool] | None = None,
callbacks: list[ConversationCallback] | None = None,
truncation_strategy: TruncationStrategy | None = None,
) -> None:
reporter = CompositeReporter(reporters=reporters)
self.tools = tools or AgentToolbox(
self.tools = AgentToolbox(
agent_os=AskUiControllerClient(
display=display,
reporter=reporter,
agent_os_servers=agent_os_servers,
)
)
super().__init__(
Expand Down Expand Up @@ -519,6 +588,9 @@ def get_default_tools() -> list[Tool]:
ComputerListDisplaysTool(),
ComputerRetrieveActiveDisplayTool(),
ComputerSetActiveDisplayTool(),
ComputerListAgentOsServersTool(),
ComputerSwitchAgentOsServerTool(),
ComputerGetActiveAgentOsServerTool(),
]


Expand Down
19 changes: 15 additions & 4 deletions src/askui/models/shared/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,12 +534,23 @@ def reset_tools(self, tools: list[Tool] | None = None) -> None:
"""Reset the tools in the collection with new tools."""
self._tools = tools or []

def get_agent_os_by_tags(self, tags: list[str]) -> AgentOs | AndroidAgentOs:
"""Get an agent OS by tags."""
def get_agent_os_by_tags(
self, required_tags: list[str]
) -> AgentOs | AndroidAgentOs:
"""
Find the first registered agent OS whose tags are a superset of
`required_tags`.

Every tag in `required_tags` must appear in the agent OS's tags; the
agent OS may declare additional tags beyond those.

Raises:
ValueError: when no registered agent OS satisfies the required tags.
"""
for agent_os in self._agent_os_list:
if all(tag in agent_os.tags for tag in tags):
if all(required in agent_os.tags for required in required_tags):
return agent_os
msg = f"Agent OS with tags [{', '.join(tags)}] not found"
msg = f"No agent OS satisfies required tags [{', '.join(required_tags)}]"
raise ValueError(msg)

def _initialize_tools(self) -> None:
Expand Down
67 changes: 67 additions & 0 deletions src/askui/tools/agent_os.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from abc import ABC, abstractmethod
from contextlib import AbstractContextManager
from typing import TYPE_CHECKING, Literal

from PIL import Image
from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import Self

from askui.models.shared.tool_tags import ToolTags

if TYPE_CHECKING:
from askui.tools.askui.agent_os_server import (
AgentOsServer,
RemoteAgentOsServer,
)
from askui.tools.askui.askui_ui_controller_grpc.generated import (
Controller_V1_pb2 as controller_v1_pbs,
)
Expand Down Expand Up @@ -676,3 +682,64 @@ def set_window_in_focus(self, process_id: int, window_id: int) -> None:
window_id (int): The ID of the window to set as active.
"""
raise NotImplementedError

# --- Agent-OS-server management -----------------------------------------------
# These methods only do something meaningful for backends that talk to multiple
# Agent OS servers (`AskUiControllerClient`). Other `AgentOs` implementations
# (Playwright, Android, ...) inherit the default implementations, which raise
# `NotImplementedError`.

def add_agent_os_server(self, server: "AgentOsServer") -> "AgentOsServer":
"""Register an additional Agent OS server. Auto-connects if connected."""
raise NotImplementedError

def add_remote_agent_os_server(
self,
address: str,
description: str,
) -> "RemoteAgentOsServer":
"""Register an additional remote Agent OS server."""
raise NotImplementedError

def reset_agent_os_servers(
self,
agent_os_servers: "list[AgentOsServer] | None" = None,
) -> None:
"""Disconnect (if connected) and replace the Agent-OS-server list."""
raise NotImplementedError

def list_agent_os_servers(self) -> "list[AgentOsServer]":
"""Return all registered Agent OS servers."""
raise NotImplementedError

def get_active_agent_os_server(self, report: bool = True) -> "AgentOsServer":
"""Return the currently active Agent OS server."""
raise NotImplementedError

def switch_agent_os_server(self, computer_id: str) -> "AgentOsServer":
"""Switch the active Agent OS server by its `computer_id`."""
raise NotImplementedError

def temporary_select(self, computer_id: str) -> AbstractContextManager[Self]:
"""
Temporarily switch the active Agent OS server for the duration of a `with`
block, then restore the previously-active server on exit (even if the block
raises).

Args:
computer_id (str): Computer id of the server to activate inside the
block.

Returns:
AbstractContextManager[Self]: Context manager that yields this
`AgentOs` with the selected server active.

Example:
```python
with agent_os.temporary_select('Remote-Machine') as remote_machine:
img = remote_machine.screenshot()
img.save("remote_machine.png")
# previous active server restored here
```
"""
raise NotImplementedError
25 changes: 25 additions & 0 deletions src/askui/tools/android/agent_os.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from abc import ABC, abstractmethod
from contextlib import AbstractContextManager
from typing import List, Literal

from PIL import Image
from typing_extensions import Self

from askui.tools.android.uiautomator_hierarchy import UIElementCollection

Expand Down Expand Up @@ -502,3 +504,26 @@ def get_ui_elements(self) -> UIElementCollection:
Gets the UI elements.
"""
raise NotImplementedError

def temporary_select(self, device_sn: str) -> AbstractContextManager[Self]:
"""
Temporarily switch the active device for the duration of a `with` block,
then restore the previously-active device on exit (even if the block
raises).

Args:
device_sn (str): Serial number of the device to activate inside the
block.

Returns:
AbstractContextManager[Self]: Context manager that yields this
`AndroidAgentOs` with `device_sn` active.

Example:
```python
with android_agent_os.temporary_select('table_phone') as table_phone:
table_phone.tap(100, 200)
# previous active device restored here
```
"""
raise NotImplementedError
12 changes: 12 additions & 0 deletions src/askui/tools/android/agent_os_facade.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from collections.abc import Iterator
from contextlib import contextmanager
from typing import List, Optional, Tuple

from PIL import Image
from typing_extensions import Self

from askui.models.shared.tool_tags import ToolTags
from askui.tools.android.agent_os import ANDROID_KEY, AndroidAgentOs, AndroidDisplay
Expand Down Expand Up @@ -112,6 +115,15 @@ def set_device_by_serial_number(self, device_sn: str) -> None:
self._agent_os.set_device_by_serial_number(device_sn)
self._real_screen_resolution = None

@contextmanager
def temporary_select(self, device_sn: str) -> Iterator[Self]:
with self._agent_os.temporary_select(device_sn):
self._real_screen_resolution = None
try:
yield self
finally:
self._real_screen_resolution = None

def get_connected_devices_serial_numbers(self) -> list[str]:
return self._agent_os.get_connected_devices_serial_numbers()

Expand Down
Loading
Loading