Skip to content
2 changes: 2 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ plugins = pydantic.mypy,sqlalchemy.ext.mypy.plugin
exclude = (?x)(
^src/askui/models/ui_tars_ep/ui_tars_api\.py$
| ^src/askui/tools/askui/askui_ui_controller_grpc/.*$
| ^venv/.*$
| ^\.venv/.*$
)
mypy_path = src:tests
explicit_package_bases = true
Expand Down
5 changes: 4 additions & 1 deletion src/askui/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""AskUI Python SDK"""

__version__ = "0.32.1"
__version__ = "0.33.0"

import logging
import os
Expand Down Expand Up @@ -45,6 +45,7 @@
from .models.types.response_schemas import ResponseSchema, ResponseSchemaBase
from .retry import ConfigurableRetry, Retry
from .tools import ModifierKey, PcKey
from .tools.askui import LocalAgentOsServer, RemoteAgentOsServer
from .utils.image_utils import ImageSource
from .utils.source_utils import InputSource

Expand All @@ -69,6 +70,8 @@
logging.getLogger(__name__).addHandler(logging.NullHandler())

__all__ = [
"RemoteAgentOsServer",
"LocalAgentOsServer",
"Agent",
"AutomationError",
"ComputerAgent",
Expand Down
20 changes: 15 additions & 5 deletions src/askui/computer_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
create_computer_agent_prompt,
)
from askui.tools.computer import (
ComputerGetActiveAgentOsServerTool,
ComputerGetMousePositionTool,
ComputerGetSystemInfoTool,
ComputerKeyboardPressedTool,
ComputerKeyboardReleaseTool,
ComputerKeyboardTapTool,
ComputerListAgentOsServersTool,
ComputerListDisplaysTool,
ComputerMouseClickTool,
ComputerMouseHoldDownTool,
Expand All @@ -31,14 +33,15 @@
ComputerRetrieveActiveDisplayTool,
ComputerScreenshotTool,
ComputerSetActiveDisplayTool,
ComputerSwitchAgentOsServerTool,
ComputerTypeTool,
)
from askui.tools.exception_tool import ExceptionTool

from .reporting import CompositeReporter, Reporter
from .retry import Retry
from .tools import AgentToolbox, ComputerAgentOsFacade, ModifierKey, PcKey
from .tools.askui import AskUiControllerClient
from .tools.askui import AgentOsServer, AskUiControllerClient

logger = logging.getLogger(__name__)

Expand All @@ -53,7 +56,10 @@ class ComputerAgent(Agent):
Args:
display (int, optional): The display number to use for screen interactions. Defaults to `1`.
reporters (list[Reporter] | None, optional): List of reporter instances for logging and reporting. If `None`, an empty list is used.
tools (AgentToolbox | None, optional): Custom toolbox instance. If `None`, a default one will be created with `AskUiControllerClient`.
agent_os_servers (list[AgentOsServer] | None, optional):
Agent OS servers used by the default `AskUiControllerClient`. Must contain
at least one server, at most one local, and remote addresses must be unique.
Defaults to a single local Agent OS server.
settings (AgentSettings | None, optional): Provider-based model settings. If `None`, uses the default AskUI model stack.
retry (Retry, optional): The retry instance to use for retrying failed actions. Defaults to `ConfigurableRetry` with exponential backoff. Currently only supported for `locate()` method.
act_tools (list[Tool] | None, optional): Additional tools to make available for the `act()` method.
Expand All @@ -72,30 +78,31 @@ class ComputerAgent(Agent):
@telemetry.record_call(
exclude={
"reporters",
"tools",
"settings",
"act_tools",
"callbacks",
"truncation_strategy",
"agent_os_servers",
}
)
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
def __init__(
self,
display: Annotated[int, Field(ge=1)] = 1,
reporters: list[Reporter] | None = None,
tools: AgentToolbox | None = None,
agent_os_servers: list[AgentOsServer] | None = None,
settings: AgentSettings | None = None,
retry: Retry | None = None,
act_tools: list[Tool] | None = None,
callbacks: list[ConversationCallback] | None = None,
truncation_strategy: TruncationStrategy | None = None,
) -> None:
reporter = CompositeReporter(reporters=reporters)
self.tools = tools or AgentToolbox(
self.tools = AgentToolbox(
agent_os=AskUiControllerClient(
display=display,
reporter=reporter,
agent_os_servers=agent_os_servers,
)
)
super().__init__(
Expand Down Expand Up @@ -519,6 +526,9 @@ def get_default_tools() -> list[Tool]:
ComputerListDisplaysTool(),
ComputerRetrieveActiveDisplayTool(),
ComputerSetActiveDisplayTool(),
ComputerListAgentOsServersTool(),
ComputerSwitchAgentOsServerTool(),
ComputerGetActiveAgentOsServerTool(),
]


Expand Down
19 changes: 15 additions & 4 deletions src/askui/models/shared/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,12 +534,23 @@ def reset_tools(self, tools: list[Tool] | None = None) -> None:
"""Reset the tools in the collection with new tools."""
self._tools = tools or []

def get_agent_os_by_tags(self, tags: list[str]) -> AgentOs | AndroidAgentOs:
"""Get an agent OS by tags."""
def get_agent_os_by_tags(
self, required_tags: list[str]
) -> AgentOs | AndroidAgentOs:
"""
Find the first registered agent OS whose tags are a superset of
`required_tags`.

Every tag in `required_tags` must appear in the agent OS's tags; the
agent OS may declare additional tags beyond those.

Raises:
ValueError: when no registered agent OS satisfies the required tags.
"""
for agent_os in self._agent_os_list:
if all(tag in agent_os.tags for tag in tags):
if all(required in agent_os.tags for required in required_tags):
return agent_os
msg = f"Agent OS with tags [{', '.join(tags)}] not found"
msg = f"No agent OS satisfies required tags [{', '.join(required_tags)}]"
raise ValueError(msg)

def _initialize_tools(self) -> None:
Expand Down
67 changes: 67 additions & 0 deletions src/askui/tools/agent_os.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from abc import ABC, abstractmethod
from contextlib import AbstractContextManager
from typing import TYPE_CHECKING, Literal

from PIL import Image
from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import Self

from askui.models.shared.tool_tags import ToolTags

if TYPE_CHECKING:
from askui.tools.askui.agent_os_server import (
AgentOsServer,
RemoteAgentOsServer,
)
from askui.tools.askui.askui_ui_controller_grpc.generated import (
Controller_V1_pb2 as controller_v1_pbs,
)
Expand Down Expand Up @@ -676,3 +682,64 @@ def set_window_in_focus(self, process_id: int, window_id: int) -> None:
window_id (int): The ID of the window to set as active.
"""
raise NotImplementedError

# --- Agent-OS-server management -----------------------------------------------
# These methods only do something meaningful for backends that talk to multiple
# Agent OS servers (`AskUiControllerClient`). Other `AgentOs` implementations
# (Playwright, Android, ...) inherit the default implementations, which raise
# `NotImplementedError`.

def add_agent_os_server(self, server: "AgentOsServer") -> "AgentOsServer":
"""Register an additional Agent OS server. Auto-connects if connected."""
raise NotImplementedError

def add_remote_agent_os_server(
self,
address: str,
description: str,
) -> "RemoteAgentOsServer":
"""Register an additional remote Agent OS server."""
raise NotImplementedError

def reset_agent_os_servers(
self,
agent_os_servers: "list[AgentOsServer] | None" = None,
) -> None:
"""Disconnect (if connected) and replace the Agent-OS-server list."""
raise NotImplementedError

def list_agent_os_servers(self) -> "list[AgentOsServer]":
"""Return all registered Agent OS servers."""
raise NotImplementedError

def get_active_agent_os_server(self, report: bool = True) -> "AgentOsServer":
"""Return the currently active Agent OS server."""
raise NotImplementedError

def switch_agent_os_server(self, computer_id: str) -> "AgentOsServer":
"""Switch the active Agent OS server by its `computer_id`."""
raise NotImplementedError

def temporary_select(self, computer_id: str) -> AbstractContextManager[Self]:
"""
Temporarily switch the active Agent OS server for the duration of a `with`
block, then restore the previously-active server on exit (even if the block
raises).

Args:
computer_id (str): Computer id of the server to activate inside the
block.

Returns:
AbstractContextManager[Self]: Context manager that yields this
`AgentOs` with the selected server active.

Example:
```python
with agent_os.temporary_select('Remote-Machine') as remote_machine:
img = remote_machine.screenshot()
img.save("remote_machine.png")
# previous active server restored here
```
"""
raise NotImplementedError
25 changes: 25 additions & 0 deletions src/askui/tools/android/agent_os.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from abc import ABC, abstractmethod
from contextlib import AbstractContextManager
from typing import List, Literal

from PIL import Image
from typing_extensions import Self

from askui.tools.android.uiautomator_hierarchy import UIElementCollection

Expand Down Expand Up @@ -502,3 +504,26 @@ def get_ui_elements(self) -> UIElementCollection:
Gets the UI elements.
"""
raise NotImplementedError

def temporary_select(self, device_sn: str) -> AbstractContextManager[Self]:
"""
Temporarily switch the active device for the duration of a `with` block,
then restore the previously-active device on exit (even if the block
raises).

Args:
device_sn (str): Serial number of the device to activate inside the
block.

Returns:
AbstractContextManager[Self]: Context manager that yields this
`AndroidAgentOs` with `device_sn` active.

Example:
```python
with android_agent_os.temporary_select('table_phone') as table_phone:
table_phone.tap(100, 200)
# previous active device restored here
```
"""
raise NotImplementedError
12 changes: 12 additions & 0 deletions src/askui/tools/android/agent_os_facade.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from collections.abc import Iterator
from contextlib import contextmanager
from typing import List, Optional, Tuple

from PIL import Image
from typing_extensions import Self

from askui.models.shared.tool_tags import ToolTags
from askui.tools.android.agent_os import ANDROID_KEY, AndroidAgentOs, AndroidDisplay
Expand Down Expand Up @@ -112,6 +115,15 @@ def set_device_by_serial_number(self, device_sn: str) -> None:
self._agent_os.set_device_by_serial_number(device_sn)
self._real_screen_resolution = None

@contextmanager
def temporary_select(self, device_sn: str) -> Iterator[Self]:
with self._agent_os.temporary_select(device_sn):
self._real_screen_resolution = None
try:
yield self
finally:
self._real_screen_resolution = None

def get_connected_devices_serial_numbers(self) -> list[str]:
return self._agent_os.get_connected_devices_serial_numbers()

Expand Down
21 changes: 21 additions & 0 deletions src/askui/tools/android/ppadb_agent_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
import re
import shlex
import string
from collections.abc import Iterator
from contextlib import contextmanager
from pathlib import Path
from typing import List, Optional, get_args

from PIL import Image
from ppadb.client import Client as AdbClient
from ppadb.device import Device as AndroidDevice
from typing_extensions import Self

from askui.reporting import NULL_REPORTER, Reporter
from askui.tools.android.agent_os import (
Expand Down Expand Up @@ -202,6 +205,24 @@ def set_device_by_serial_number(self, device_sn: str) -> None:
msg = f"Device name {device_sn} not found"
raise AndroidAgentOsError(msg)

@contextmanager
def temporary_select(self, device_sn: str) -> Iterator[Self]:
previous_sn = self._device.serial if self._device is not None else None
self._reporter.add_message(
self._REPORTER_ROLE_NAME,
f"temporary_select({device_sn!r}) [previous={previous_sn!r}]",
)
self.set_device_by_serial_number(device_sn)
try:
yield self
finally:
if previous_sn is not None and previous_sn != device_sn:
self.set_device_by_serial_number(previous_sn)
self._reporter.add_message(
self._REPORTER_ROLE_NAME,
f"temporary_select({device_sn!r}) -> restored",
)

def _screenshot_without_reporting(self) -> Image.Image:
device: AndroidDevice = self._get_selected_device()
self._check_if_display_is_selected()
Expand Down
15 changes: 13 additions & 2 deletions src/askui/tools/askui/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
from .askui_controller import AskUiControllerClient, AskUiControllerServer
from .agent_os_server import (
AgentOsServer,
LocalAgentOsServer,
RemoteAgentOsServer,
)
from .agent_os_server_manager import (
AgentOsServerManager,
)
from .askui_controller import AskUiControllerClient

__all__ = [
"AgentOsServer",
"AgentOsServerManager",
"AskUiControllerClient",
"AskUiControllerServer",
"LocalAgentOsServer",
"RemoteAgentOsServer",
]
Loading
Loading