diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f762fbee..0015d7c3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,7 +20,7 @@ repos: name: Style Guide Enforcement (flake8) args: - '--max-line-length=120' - - --ignore=D100,D203,D405,W503,E203,E501,F841,E126,E712,E123,E131,F821,E121,W605,E402 + - --ignore=D100,D203,D405,W503,E203,E501,F841,E126,E712,E123,E131,F821,E121,W605,E402,E704 - repo: 'https://github.com/asottile/pyupgrade' rev: v3.21.2 hooks: @@ -62,7 +62,9 @@ repos: # args: # - '--disable=R0903,C0111,C0301,W0703,R0914,R0801,R0913,E0401,W0511,C0413,R0902,C0103,W0201,C0209,W1203,W0707,C0415,W0611' # - repo: 'https://github.com/asottile/dead' -# rev: v1.3.0 +# rev: v2.1.0 # hooks: # - id: dead +# args: [--exclude, docs/source/conf.py|src/superannotate/lib/app/interface/sdk_interface.py|src/superannotate/lib/app/interface/cli_interface.py] + exclude: src/lib/app/analytics | src/lib/app/input_converters diff --git a/docs/source/conf.py b/docs/source/conf.py index 852b95ab..09f0ad72 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -6,7 +6,7 @@ # -- Project information ----------------------------------------------------- project = "SuperAnnotate Python SDK" -copyright = "2021, SuperAnnotate AI" +copyright = "2026, SuperAnnotate AI" author = "SuperAnnotate AI" # The full version, including alpha/beta/rc tags diff --git a/src/superannotate/__init__.py b/src/superannotate/__init__.py index c10018bb..ebed1f89 100644 --- a/src/superannotate/__init__.py +++ b/src/superannotate/__init__.py @@ -2,7 +2,7 @@ import os import sys -__version__ = "4.5.4dev1" +__version__ = "4.5.5dev2" os.environ.update({"sa_version": __version__}) diff --git a/src/superannotate/lib/app/analytics/aggregators.py b/src/superannotate/lib/app/analytics/aggregators.py index b3ef289f..1bd84209 100644 --- a/src/superannotate/lib/app/analytics/aggregators.py +++ b/src/superannotate/lib/app/analytics/aggregators.py @@ -384,9 +384,7 @@ def aggregate_image_annotations_as_df(self, annotations_paths: list[str]): annotation_json = None with open(annotation_path) as fp: annotation_json = json.load(fp) - parts = Path(annotation_path).name.split(self._annotation_suffix) row_data = self.__fill_image_metadata(row_data, annotation_json["metadata"]) - annotation_instance_id = 0 # include comments for annotation in annotation_json["comments"]: @@ -433,10 +431,8 @@ def aggregate_image_annotations_as_df(self, annotations_paths: list[str]): if Path(annotation_path).parent != Path(self.project_root): folder_name = Path(annotation_path).parent.name instance_row.folderName = folder_name - num_added = 0 if not attributes: rows.append(instance_row) - num_added = 1 else: for attribute in attributes: attribute_row = copy.copy(instance_row) @@ -469,10 +465,6 @@ def aggregate_image_annotations_as_df(self, annotations_paths: list[str]): attribute_row.attributeName = attribute_name rows.append(attribute_row) - num_added += 1 - - if num_added > 0: - annotation_instance_id += 1 df = pd.DataFrame([row.__dict__ for row in rows], dtype=object) df = df.astype({"probability": float}) diff --git a/src/superannotate/lib/app/analytics/common.py b/src/superannotate/lib/app/analytics/common.py index 85222d98..dc944bc2 100644 --- a/src/superannotate/lib/app/analytics/common.py +++ b/src/superannotate/lib/app/analytics/common.py @@ -3,7 +3,6 @@ from pathlib import Path import pandas as pd -import plotly.express as px from lib.core.exceptions import AppException logger = logging.getLogger("sa") @@ -558,50 +557,3 @@ def consensus(df, item_name, annot_type): instance_id += 1 return image_data - - -def consensus_plot(consensus_df, *_, **__): - plot_data = consensus_df.copy() - - # annotator-wise boxplot - annot_box_fig = px.box( - plot_data, - x="creatorEmail", - y="score", - points="all", - color="creatorEmail", - color_discrete_sequence=px.colors.qualitative.Dark24, - ) - annot_box_fig.show() - - # project-wise boxplot - project_box_fig = px.box( - plot_data, - x="folderName", - y="score", - points="all", - color="folderName", - color_discrete_sequence=px.colors.qualitative.Dark24, - ) - project_box_fig.show() - - # scatter plot of score vs area - fig = px.scatter( - plot_data, - x="area", - y="score", - color="className", - symbol="creatorEmail", - facet_col="folderName", - color_discrete_sequence=px.colors.qualitative.Dark24, - hover_data={ - "className": False, - "itemName": True, - "folderName": False, - "area": False, - "score": False, - }, - ) - fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) - fig.for_each_trace(lambda t: t.update(name=t.name.split("=")[-1])) - fig.show() diff --git a/src/superannotate/lib/app/input_converters/conversion.py b/src/superannotate/lib/app/input_converters/conversion.py index 57f28855..dd486361 100644 --- a/src/superannotate/lib/app/input_converters/conversion.py +++ b/src/superannotate/lib/app/input_converters/conversion.py @@ -94,16 +94,6 @@ def _passes_type_sanity(params_info): ) -def _passes_list_members_type_sanity(lists_info): - for _list in lists_info: - for _list_member in _list[0]: - if not isinstance(_list_member, _list[2]): - raise AppException( - "'%s' should be list of '%s', but contains '%s'" - % (_list[1], _list[2], type(_list_member)) - ) - - def _passes_value_sanity(values_info): for value in values_info: if value[0] not in value[2]: diff --git a/src/superannotate/lib/app/input_converters/converters/coco_converters/coco_to_sa_pixel.py b/src/superannotate/lib/app/input_converters/converters/coco_converters/coco_to_sa_pixel.py deleted file mode 100644 index cd213c0b..00000000 --- a/src/superannotate/lib/app/input_converters/converters/coco_converters/coco_to_sa_pixel.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -COCO to SA conversion method -""" - -import logging - -from .coco_api import _maskfrRLE -from .coco_api import decode - -logger = logging.getLogger("sa") - - -def annot_to_bitmask(annot): - if isinstance(annot["counts"], list): - bitmask = _maskfrRLE(annot) - elif isinstance(annot["counts"], str): - bitmask = decode(annot) - - return bitmask diff --git a/src/superannotate/lib/app/input_converters/converters/coco_converters/sa_pixel_to_coco.py b/src/superannotate/lib/app/input_converters/converters/coco_converters/sa_pixel_to_coco.py deleted file mode 100644 index 92af1638..00000000 --- a/src/superannotate/lib/app/input_converters/converters/coco_converters/sa_pixel_to_coco.py +++ /dev/null @@ -1,31 +0,0 @@ -import cv2 as cv -import numpy as np - -from .coco_api import _area -from .coco_api import _toBbox - - -def __instance_object_commons_per_instance(instance, id_generator, flat_mask): - if "parts" not in instance: - return None - - anno_id = next(id_generator) - parts = [int(part["color"][1:], 16) for part in instance["parts"]] - category_id = instance["classId"] - - instance_bitmask = np.isin(flat_mask, parts) - - databytes = instance_bitmask * np.uint8(255) - contours, _ = cv.findContours(databytes, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE) - bbox = list(_toBbox(instance_bitmask)) - area = int(_area(instance_bitmask.astype(np.uint8))) - return (bbox, area, contours, category_id, anno_id) - - -def instance_object_commons(instances, id_generator, flat_mask): - commons_lst = [ - __instance_object_commons_per_instance(x, id_generator, flat_mask) - for x in instances - ] - commons_lst = [x for x in commons_lst if x is not None] - return commons_lst diff --git a/src/superannotate/lib/app/input_converters/converters/supervisely_converters/supervisely_strategies.py b/src/superannotate/lib/app/input_converters/converters/supervisely_converters/supervisely_strategies.py index 82288a59..428f09f5 100644 --- a/src/superannotate/lib/app/input_converters/converters/supervisely_converters/supervisely_strategies.py +++ b/src/superannotate/lib/app/input_converters/converters/supervisely_converters/supervisely_strategies.py @@ -24,11 +24,11 @@ def to_sa_format(self): == "supervisely_keypoint_detection_to_sa_vector" ): meta_json = json.load(open(self.export_root / "meta.json")) - sa_jsons = self.conversion_algorithm( + self.conversion_algorithm( json_files, classes_id_map, meta_json, self.output_dir ) else: - sa_jsons = self.conversion_algorithm( + self.conversion_algorithm( json_files, classes_id_map, self.task, self.output_dir ) (self.output_dir / "classes").mkdir(exist_ok=True) diff --git a/src/superannotate/lib/app/input_converters/converters/voc_converters/voc_helper.py b/src/superannotate/lib/app/input_converters/converters/voc_converters/voc_helper.py index 7c8795ff..24d73b9e 100644 --- a/src/superannotate/lib/app/input_converters/converters/voc_converters/voc_helper.py +++ b/src/superannotate/lib/app/input_converters/converters/voc_converters/voc_helper.py @@ -38,17 +38,6 @@ def _iou(bbox1, bbox2): ) -def _get_image_shape_from_xml(file_path): - with open(os.path.splitext(file_path)[0] + ".xml") as f: - tree = ET.parse(f) - - size = tree.find("size") - width = int(size.find("width").text) - height = int(size.find("height").text) - - return height, width - - def _get_image_metadata(file_path): with open(os.path.splitext(file_path)[0] + ".xml") as f: tree = ET.parse(f) diff --git a/src/superannotate/lib/app/input_converters/sa_conversion.py b/src/superannotate/lib/app/input_converters/sa_conversion.py deleted file mode 100644 index de2e0244..00000000 --- a/src/superannotate/lib/app/input_converters/sa_conversion.py +++ /dev/null @@ -1,8 +0,0 @@ -import logging -import shutil - -logger = logging.getLogger("sa") - - -def copy_file(src_path, dst_path): - shutil.copy(src_path, dst_path) diff --git a/src/superannotate/lib/app/interface/responses.py b/src/superannotate/lib/app/interface/responses.py new file mode 100644 index 00000000..a4bc1501 --- /dev/null +++ b/src/superannotate/lib/app/interface/responses.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +from collections.abc import Callable +from collections.abc import Iterator +from typing import Generic +from typing import overload +from typing import TypeVar + +T = TypeVar("T") + + +class BaseResult(list, Generic[T]): + """A generic list-like wrapper for results with lazy loading support. + + Inherits from ``list`` for full backward compatibility with code that + expects a real list (``isinstance(x, list)``, JSON serializers, etc.). + Data is fetched lazily on first access. + """ + + def __init__(self, data_fetcher: Callable[[], list[T]]) -> None: + super().__init__() + self._data_fetcher = data_fetcher + self._loaded = False + + def _ensure_data(self) -> None: + """Lazily fetch data if not already loaded.""" + if not self._loaded: + list.extend(self, self._data_fetcher()) + self._loaded = True + + def data(self) -> list[T]: + self._ensure_data() + return list(self) + + def __iter__(self) -> Iterator[T]: + self._ensure_data() + return list.__iter__(self) + + def __len__(self) -> int: + self._ensure_data() + return list.__len__(self) + + @overload + def __getitem__(self, index: int) -> T: ... + + @overload + def __getitem__(self, index: slice) -> list[T]: ... + + def __getitem__(self, index: int | slice) -> T | list[T]: + self._ensure_data() + return list.__getitem__(self, index) + + def __repr__(self) -> str: + self._ensure_data() + return list.__repr__(self) + + def __bool__(self) -> bool: + self._ensure_data() + return list.__len__(self) > 0 + + def __contains__(self, item: object) -> bool: + self._ensure_data() + return list.__contains__(self, item) + + def __eq__(self, other: object) -> bool: + self._ensure_data() + return list.__eq__(self, other) + + __hash__ = None # type: ignore[assignment] + + +class QueryResult(BaseResult[dict]): + """A list-like wrapper for query results that supports .count() method. + + This class wraps a list of query results while maintaining full backward + compatibility with list-like operations (iteration, indexing, len()). + Data is fetched lazily - only when accessed. Calling .count() does not + trigger data fetching. + """ + + def __init__( + self, + data_fetcher: Callable[[], list[dict]], + count_fetcher: Callable[[], int], + ) -> None: + super().__init__(data_fetcher) + self._count_fetcher = count_fetcher + + def count(self) -> int: + """Return the count of items matching the query from the server. + + This method does not trigger data fetching - it makes a separate + lightweight API call to get only the count. + """ + return self._count_fetcher() diff --git a/src/superannotate/lib/app/interface/sdk_interface.py b/src/superannotate/lib/app/interface/sdk_interface.py index dae44d8a..bb2ca159 100644 --- a/src/superannotate/lib/app/interface/sdk_interface.py +++ b/src/superannotate/lib/app/interface/sdk_interface.py @@ -11,6 +11,7 @@ import warnings from collections.abc import Callable from collections.abc import Iterable +from functools import partial from pathlib import Path from typing import Annotated from typing import Any @@ -80,6 +81,8 @@ from lib.infrastructure.query_builder import QueryBuilderChain from lib.infrastructure.query_builder import FieldValidationHandler +from lib.app.interface.responses import QueryResult + logger = logging.getLogger("sa") NotEmptyStr = Annotated[str, StringConstraints(strict=True, min_length=1)] @@ -101,8 +104,6 @@ ANNOTATION_TYPE = Literal["bbox", "polygon", "point", "tag"] -ANNOTATOR_ROLE = Literal["Admin", "Annotator", "QA"] - FOLDER_STATUS = Literal["NotStarted", "InProgress", "Completed", "OnHold"] @@ -152,6 +153,7 @@ def __init__( self._annotation_adapter: BaseMultimodalAnnotationAdapter | None = None self._overwrite = overwrite self._annotation: dict | None = None + self._set_component_called = False def _set_small_annotation_adapter(self, annotation: dict | None = None): self._annotation_adapter = MultimodalSmallAnnotationAdapter( @@ -213,7 +215,8 @@ def __exit__(self, exc_type, exc_val, exc_tb): if exc_type: return False - self.save() + if self._set_component_called: + self.save() return True def save(self): @@ -222,6 +225,7 @@ def save(self): else: self._set_small_annotation_adapter(self.annotation) self._annotation_adapter.save() + self._set_component_called = False def get_metadata(self): """ @@ -281,6 +285,7 @@ def set_component_value(self, component_id: str, value: Any): """ self.annotation_adapter.set_component_value(component_id, value) + self._set_component_called = True return self @@ -4267,10 +4272,12 @@ def query( project: NotEmptyStr | int | tuple[int, int] | tuple[str, str], query: NotEmptyStr | None = None, subset: NotEmptyStr | None = None, - ): + ) -> QueryResult: """Return items that satisfy the given query. Query syntax should be in SuperAnnotate query language(https://doc.superannotate.com/docs/explore-overview). + The returned QueryResult behaves like a list of dicts, and additionally exposes a .count() method. + :param project: Accepts a project as a string ("project" or "project/folder") or as a tuple (project_id, folder_id), where the folder is optional.” :type project: Union[str, int, Tuple[int, int], Tuple[str, str]] @@ -4282,14 +4289,54 @@ def query( :type subset: str :return: queried items' metadata list - :rtype: list of dicts + :rtype: QueryResult (list of dicts with .count() method) + + Request Example: + :: + + sa_client = SAClient() + + queried_items = sa_client.query( + project="Image Project", + query="metadata(lastAction.email = test@superannotate.com)" + ) + for item in queried_items: + print(item["name"]) + + .. py:method:: query.count() -> int + + Returns the total number of items matching the query. + + :return: total number of matching items + :rtype: int + + Request Example: + :: + + sa_client = SAClient() + + total = sa_client.query( + project="Image Project", + query="metadata(lastAction.email = test@superannotate.com)" + ).count() + print(f"Total matching items: {total}") """ project, folder = self.controller.get_project_folder(project) - items = self.controller.query_entities(project, folder, query, subset) - exclude = { - "meta", - } - return BaseSerializer.serialize_iterable(items, exclude=exclude) + fetch_entities = partial( + self.controller.query_entities, project, folder, query, subset + ) + return QueryResult( + data_fetcher=lambda: BaseSerializer.serialize_iterable( + fetch_entities(), exclude={"meta"} + ), + count_fetcher=partial( + self.controller.query_items_count, + project=project, + folder=folder, + query=query, + subset=subset, + ), + ) def get_item_metadata( self, diff --git a/src/superannotate/lib/core/__init__.py b/src/superannotate/lib/core/__init__.py index 4bfb7278..c7d0964b 100644 --- a/src/superannotate/lib/core/__init__.py +++ b/src/superannotate/lib/core/__init__.py @@ -28,8 +28,6 @@ LOG_FILE_LOCATION = f"{HOME_PATH}/logs" DEFAULT_LOGGING_LEVEL = "INFO" -_loggers = {} - def setup_logging(level=DEFAULT_LOGGING_LEVEL, file_path=LOG_FILE_LOCATION): logger = logging.getLogger("sa") @@ -85,10 +83,6 @@ def setup_logging(level=DEFAULT_LOGGING_LEVEL, file_path=LOG_FILE_LOCATION): MAX_IMAGE_SIZE = 100 * 1024 * 1024 # 100 MB limit TOKEN_UUID = "token" -ALREADY_EXISTING_FILES_WARNING = ( - "{} already existing file(s) found that won't be uploaded." -) - ATTACHING_FILES_MESSAGE = "Attaching {} file(s) to project {}." ATTACHING_UPLOAD_STATE_ERROR = "You cannot attach URLs in this type of project. Please attach it in an external storage project." diff --git a/src/superannotate/lib/core/entities/base.py b/src/superannotate/lib/core/entities/base.py index a1beb230..008f6ea6 100644 --- a/src/superannotate/lib/core/entities/base.py +++ b/src/superannotate/lib/core/entities/base.py @@ -12,11 +12,6 @@ from pydantic import PlainSerializer from pydantic_extra_types.color import Color -DATE_TIME_FORMAT_ERROR_MESSAGE = ( - "does not match expected format YYYY-MM-DDTHH:MM:SS.fffZ" -) -DATE_REGEX = r"\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d(?:\.\d{3})Z" - _missing = object() diff --git a/src/superannotate/lib/core/entities/classes.py b/src/superannotate/lib/core/entities/classes.py index 8075589c..ff9c440c 100644 --- a/src/superannotate/lib/core/entities/classes.py +++ b/src/superannotate/lib/core/entities/classes.py @@ -11,11 +11,6 @@ from pydantic import StrictInt from pydantic import StrictStr -DATE_REGEX = r"\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d(?:\.\d{3})Z" -DATE_TIME_FORMAT_ERROR_MESSAGE = ( - "does not match expected format YYYY-MM-DDTHH:MM:SS.fffZ" -) - class GroupTypeEnum(str, Enum): RADIO = "radio" diff --git a/src/superannotate/lib/core/entities/multimodal_form.py b/src/superannotate/lib/core/entities/multimodal_form.py index 72d79732..0ae732fd 100644 --- a/src/superannotate/lib/core/entities/multimodal_form.py +++ b/src/superannotate/lib/core/entities/multimodal_form.py @@ -253,13 +253,6 @@ class FormModel(BaseModel): code: str | list | None = "" environments: list[Any] = [] - @property - def code_as_string(self) -> str: - """Convert code to string if it's a list""" - if isinstance(self.code, list): - return "\n".join(str(item) for item in self.code) - return self.code or "" - def _extract_all_components( self, components: list[dict[str, Any]] ) -> list[dict[str, Any]]: diff --git a/src/superannotate/lib/core/entities/work_managament.py b/src/superannotate/lib/core/entities/work_managament.py index a66c1fe0..2e3c8853 100644 --- a/src/superannotate/lib/core/entities/work_managament.py +++ b/src/superannotate/lib/core/entities/work_managament.py @@ -1,6 +1,5 @@ from __future__ import annotations -import datetime from enum import auto from enum import Enum from typing import Any @@ -51,13 +50,6 @@ def __repr__(self): return self._name_ -def _validate_string_date_wm(v: datetime.datetime) -> str: - """Convert datetime to string format for WM entities.""" - if isinstance(v, str): - return v - return v.strftime("%Y-%m-%dT%H:%M:%S+00:00") - - class WMProjectEntity(TimedBaseModel): model_config = ConfigDict(extra="ignore") diff --git a/src/superannotate/lib/core/service_types.py b/src/superannotate/lib/core/service_types.py index b5f95a93..78aaaf55 100644 --- a/src/superannotate/lib/core/service_types.py +++ b/src/superannotate/lib/core/service_types.py @@ -7,7 +7,6 @@ from lib.core.entities.work_managament import WMProjectEntity from lib.core.entities.work_managament import WMScoreEntity from lib.core.entities.work_managament import WMUserEntity -from lib.core.enums import ProjectType from lib.core.exceptions import AppException from pydantic import BaseModel from pydantic import ConfigDict @@ -166,10 +165,6 @@ class UserResponse(ServiceResponse): res_data: entities.UserEntity = None -class ModelListResponse(ServiceResponse): - res_data: list[entities.AnnotationClassEntity] = None - - class _IntegrationResponse(ServiceResponse): integrations: list[entities.IntegrationEntity] = Field(default_factory=list) @@ -190,10 +185,6 @@ class SubsetListResponse(ServiceResponse): res_data: list[entities.SubSetEntity] = None -class SubsetResponse(ServiceResponse): - res_data: entities.SubSetEntity = None - - class UploadAnnotationsResponse(ServiceResponse): res_data: UploadAnnotations | None = None @@ -210,10 +201,6 @@ class UserLimitsResponse(ServiceResponse): res_data: UserLimits = None -class ItemListResponse(ServiceResponse): - res_data: list[entities.BaseItemEntity] = None - - class FolderResponse(ServiceResponse): res_data: entities.FolderEntity = None @@ -268,15 +255,3 @@ class WMScoreListResponse(ServiceResponse): class TelemetryScoreListResponse(ServiceResponse): res_data: list[TelemetryScoreEntity] = None - - -PROJECT_TYPE_RESPONSE_MAP = { - ProjectType.VECTOR: ImageResponse, - ProjectType.OTHER: ClassificationResponse, - ProjectType.VIDEO: VideoResponse, - ProjectType.TILED: TiledResponse, - ProjectType.PIXEL: ImageResponse, - ProjectType.DOCUMENT: DocumentResponse, - ProjectType.POINT_CLOUD: PointCloudResponse, - ProjectType.MULTIMODAL: ImageResponse, -} diff --git a/src/superannotate/lib/core/serviceproviders.py b/src/superannotate/lib/core/serviceproviders.py index 6a28e04e..d92de4b9 100644 --- a/src/superannotate/lib/core/serviceproviders.py +++ b/src/superannotate/lib/core/serviceproviders.py @@ -750,7 +750,9 @@ def saqul_query( def query_item_count( self, project: entities.ProjectEntity, + folder: entities.FolderEntity = None, query: str = None, + subset_id: int = None, ) -> ServiceResponse: raise NotImplementedError diff --git a/src/superannotate/lib/core/usecases/annotations.py b/src/superannotate/lib/core/usecases/annotations.py index 4fea9b7e..85801320 100644 --- a/src/superannotate/lib/core/usecases/annotations.py +++ b/src/superannotate/lib/core/usecases/annotations.py @@ -41,7 +41,6 @@ from lib.core.response import Response from lib.core.service_types import UploadAnnotationAuthData from lib.core.serviceproviders import BaseServiceProvider -from lib.core.serviceproviders import ServiceResponse from lib.core.serviceproviders import UploadAnnotationsResponse from lib.core.types import PriorityScoreEntity from lib.core.usecases.base import BaseReportableUseCase @@ -71,13 +70,6 @@ class Report: missing_attrs: list -def get_or_raise(response: ServiceResponse): - if response.ok: - return response.data - else: - raise AppException(response.error) - - def log_report( report: Report, ): @@ -245,7 +237,6 @@ async def _upload_big_annotation(item_data: ItemToUpload) -> tuple[str, bool]: class UploadAnnotationsUseCase(BaseReportableUseCase): CHUNK_SIZE = 500 - CHUNK_SIZE_MB = 10 * 1024 * 1024 URI_THRESHOLD = 4 * 1024 - 120 def __init__( @@ -452,10 +443,6 @@ class UploadAnnotationsFromFolderUseCase(BaseReportableUseCase): MAX_WORKERS = 16 CHUNK_SIZE = 100 CHUNK_SIZE_PATHS = 500 - CHUNK_SIZE_MB = 10 * 1024 * 1024 - STATUS_CHANGE_CHUNK_SIZE = 100 - AUTH_DATA_CHUNK_SIZE = 500 - THREADS_COUNT = 4 URI_THRESHOLD = 4 * 1024 - 120 def __init__( @@ -569,13 +556,6 @@ def prepare_annotation(self, annotation: dict, size) -> dict: ) return annotation - @staticmethod - def get_mask_path(path: str) -> str: - - replacement = ".json" - parts = path.rsplit(replacement, 1) - return constants.ANNOTATION_MASK_POSTFIX.join(parts) - async def get_annotation( self, path: str ) -> (tuple[io.StringIO] | None, io.BytesIO | None): @@ -1756,7 +1736,6 @@ def execute(self): class UploadMultiModalAnnotationsUseCase(BaseReportableUseCase): CHUNK_SIZE = 500 - CHUNK_SIZE_MB = 10 * 1024 * 1024 URI_THRESHOLD = 4 * 1024 - 120 def __init__( diff --git a/src/superannotate/lib/core/usecases/items.py b/src/superannotate/lib/core/usecases/items.py index 5a1f2177..b4e44121 100644 --- a/src/superannotate/lib/core/usecases/items.py +++ b/src/superannotate/lib/core/usecases/items.py @@ -170,13 +170,17 @@ def __init__( self, reporter: Reporter, project: ProjectEntity, + folder: FolderEntity, service_provider: BaseServiceProvider, query: str, + subset: str = None, ): super().__init__(reporter) self._project = project + self._folder = folder self._service_provider = service_provider self._query = query + self._subset = subset def validate_arguments(self): if self._query: @@ -197,9 +201,40 @@ def validate_arguments(self): if not response.ok: raise AppException(response.error) + if not any([self._query, self._subset]): + raise AppException( + "The query and subset params cannot have the value None at the same time." + ) + if self._subset and not self._folder.is_root: + raise AppException( + "The folder name should be specified in the query string." + ) + def execute(self) -> Response: if self.is_valid(): - query_kwargs = {"query": self._query} + query_kwargs = {} + if self._subset: + response = self._service_provider.explore.list_subsets(self._project) + if response.ok: + subset = next( + (_sub for _sub in response.data if _sub.name == self._subset), + None, + ) + else: + self._response.errors = response.error + return self._response + if not subset: + self._response.errors = AppException( + "Subset not found. Use the superannotate." + "get_subsets() function to get a list of the available subsets." + ) + return self._response + query_kwargs["subset_id"] = subset.id + if self._query: + query_kwargs["query"] = self._query + query_kwargs["folder"] = ( + None if self._folder.name == "root" else self._folder + ) service_response = self._service_provider.explore.query_item_count( self._project, **query_kwargs, @@ -470,198 +505,6 @@ def execute(self) -> Response: return self._response -class CopyItems(BaseReportableUseCase): - """ - Copy items in bulk between folders in a project. - Return skipped item names. - """ - - CHUNK_SIZE = 500 - - def __init__( - self, - reporter: Reporter, - project: ProjectEntity, - from_folder: FolderEntity, - to_folder: FolderEntity, - item_names: list[str], - service_provider: BaseServiceProvider, - include_annotations: bool, - ): - super().__init__(reporter) - self._project = project - self._from_folder = from_folder - self._to_folder = to_folder - self._item_names = item_names - self._service_provider = service_provider - self._include_annotations = include_annotations - - def _validate_limitations(self, items_count): - response = self._service_provider.get_limitations( - project=self._project, - folder=self._to_folder, - ) - if not response.ok: - raise AppValidationException(response.error) - if items_count > response.data.folder_limit.remaining_image_count: - raise AppValidationException(constants.COPY_FOLDER_LIMIT_ERROR_MESSAGE) - if items_count > response.data.project_limit.remaining_image_count: - raise AppValidationException(constants.COPY_PROJECT_LIMIT_ERROR_MESSAGE) - - def validate_item_names(self): - if self._item_names: - self._item_names = list(set(self._item_names)) - - def execute(self): - if self.is_valid(): - if self._item_names: - items = self._item_names - else: - data = self._service_provider.item_service.list( - self._project.id, self._from_folder.id, EmptyQuery() - ) - items = [i.name for i in data] - existing_items = [] - for i in range(0, len(items), self.CHUNK_SIZE): - query = Filter( - "name", items[i : i + self.CHUNK_SIZE], OperatorEnum.IN - ) # noqa - data = self._service_provider.item_service.list( - self._project.id, self._to_folder.id, query - ) - if not data: - continue - existing_items += data - duplications = [item.name for item in existing_items] - items_to_copy = list(set(items) - set(duplications)) - skipped_items = duplications - try: - self._validate_limitations(len(items_to_copy)) - except AppValidationException as e: - self._response.errors = e - return self._response - if items_to_copy: - for i in range(0, len(items_to_copy), self.CHUNK_SIZE): - chunk_to_copy = items_to_copy[i : i + self.CHUNK_SIZE] # noqa: E203 - response = self._service_provider.items.copy_multiple( - project=self._project, - from_folder=self._from_folder, - to_folder=self._to_folder, - item_names=chunk_to_copy, - include_annotations=self._include_annotations, - ) - if not response.ok or not response.data.get("poll_id"): - skipped_items.extend(chunk_to_copy) - continue - try: - self._service_provider.items.await_copy( - project=self._project, - poll_id=response.data["poll_id"], - items_count=len(chunk_to_copy), - ) - except BackendError as e: - self._response.errors = AppException(e) - return self._response - existing_items = [] - for i in range(0, len(items), self.CHUNK_SIZE): - data = self._service_provider.item_service.list( - self._project.id, - self._to_folder.id, - Filter( - "name", items[i : i + self.CHUNK_SIZE], OperatorEnum.IN - ), # noqa - ) - existing_items += data - - existing_item_names_set = {item.name for item in existing_items} - items_to_copy_names_set = set(items_to_copy) - copied_items = existing_item_names_set.intersection( - items_to_copy_names_set - ) - skipped_items.extend(list(items_to_copy_names_set - copied_items)) - self.reporter.log_info( - f"Copied {len(copied_items)}/{len(items)} item(s) from " - f"{self._project.name}{'' if self._from_folder.is_root else f'/{self._from_folder.name}'} to " - f"{self._project.name}{'' if self._to_folder.is_root else f'/{self._to_folder.name}'}" - ) - self._response.data = skipped_items - return self._response - - -class MoveItems(BaseReportableUseCase): - CHUNK_SIZE = 1000 - - def __init__( - self, - reporter: Reporter, - project: ProjectEntity, - from_folder: FolderEntity, - to_folder: FolderEntity, - item_names: list[str], - service_provider: BaseServiceProvider, - ): - super().__init__(reporter) - self._project = project - self._from_folder = from_folder - self._to_folder = to_folder - self._item_names = item_names - self._service_provider = service_provider - - def validate_item_names(self): - if self._item_names: - self._item_names = list(set(self._item_names)) - - def _validate_limitations(self, items_count): - response = self._service_provider.get_limitations( - project=self._project, - folder=self._to_folder, - ) - if not response.ok: - raise AppValidationException(response.error) - if items_count > response.data.folder_limit.remaining_image_count: - raise AppValidationException(constants.MOVE_FOLDER_LIMIT_ERROR_MESSAGE) - if items_count > response.data.project_limit.remaining_image_count: - raise AppValidationException(constants.MOVE_PROJECT_LIMIT_ERROR_MESSAGE) - - def execute(self): - if self.is_valid(): - if not self._item_names: - items = [ - i.name - for i in self._service_provider.item_service.list( - self._project.id, self._from_folder.id, EmptyQuery() - ) - ] - else: - items = self._item_names - try: - self._validate_limitations(len(items)) - except AppValidationException as e: - self._response.errors = e - return self._response - moved_images = [] - for i in range(0, len(items), self.CHUNK_SIZE): - response = self._service_provider.items.move_multiple( - project=self._project, - from_folder=self._from_folder, - to_folder=self._to_folder, - item_names=items[i : i + self.CHUNK_SIZE], # noqa: E203 - ) - if not response.ok: - raise AppException(response.error) - if response.ok and response.data.get("done"): - moved_images.extend(response.data["done"]) - - self.reporter.log_info( - f"Moved {len(moved_images)}/{len(items)} item(s) from " - f"{self._project.name}{'' if self._from_folder.is_root else f'/{self._from_folder.name}'} to " - f"{self._project.name}{'' if self._to_folder.is_root else f'/{self._to_folder.name}'}" - ) - - self._response.data = list(set(items) - set(moved_images)) - return self._response - - class CopyMoveItems(BaseReportableUseCase): """ Copy/Move items in bulk between folders in a project. @@ -862,7 +705,7 @@ def execute(self): item_names=self._item_names[i : i + self.CHUNK_SIZE], # noqa: E203, annotation_status=self._annotation_status_code, ) - if not status_changed: + if not status_changed.ok: self._response.errors = AppException(self.ERROR_MESSAGE) break return self._response diff --git a/src/superannotate/lib/infrastructure/controller.py b/src/superannotate/lib/infrastructure/controller.py index febb40bf..f88edd7b 100644 --- a/src/superannotate/lib/infrastructure/controller.py +++ b/src/superannotate/lib/infrastructure/controller.py @@ -943,26 +943,6 @@ def _determine_condition_and_key(keys: list[str]) -> tuple[OperatorEnum, str]: condition = OperatorEnum.EQ return condition, ".".join(keys) - def _build_query( - self, project: ProjectEntity, filters: dict, include: list[str] - ) -> Query: - """Build the query object based on filters and include fields.""" - filter_annotations = ItemFilters.__annotations__.keys() - query = EmptyQuery() - _include = set(include if include else []) - for key, val in filters.items(): - if key in filter_annotations: - _keys = key.split("__") - entity = PROJECT_ITEM_ENTITY_MAP.get(project.type, BaseItemEntity) - if _keys[0] not in entity.__fields__: - _include.add(_keys[0]) - val = self._handle_special_fields(project, _keys, val) - condition, _key = self._determine_condition_and_key(_keys) - query &= Filter(_key, val, condition) - for i in _include: - query &= Join(i) - return query - @staticmethod def process_response( service_provider, @@ -1652,11 +1632,6 @@ def s3_repo(self): class Controller(BaseController): DEFAULT = None - @classmethod - def set_default(cls, obj): - cls.DEFAULT = obj - return cls.DEFAULT - def get_folder_by_id(self, folder_id: int, project_id: int): response = self.folders.get_by_id( folder_id=folder_id, project_id=project_id, team_id=self.team_id @@ -2039,13 +2014,20 @@ def query_entities( self.service_provider, items, project, folder, map_fields=False ) - def query_items_count(self, project_name: str, query: str = None) -> int: - project = self.get_project(project_name) + def query_items_count( + self, + project: ProjectEntity, + folder: FolderEntity, + query: str = None, + subset: str = None, + ) -> int: use_case = usecases.QueryEntitiesCountUseCase( reporter=self.get_default_reporter(), project=project, + folder=folder, query=query, + subset=subset, service_provider=self.service_provider, ) response = use_case.execute() diff --git a/src/superannotate/lib/infrastructure/serviceprovider.py b/src/superannotate/lib/infrastructure/serviceprovider.py index ec2590aa..c39979b9 100644 --- a/src/superannotate/lib/infrastructure/serviceprovider.py +++ b/src/superannotate/lib/infrastructure/serviceprovider.py @@ -37,7 +37,6 @@ class ServiceProvider(BaseServiceProvider): URL_USER = "user/ME" URL_USERS = "users" URL_GET_EXPORT = "export/{}" - URL_PREDICTION = "images/prediction" URL_FOLDERS_IMAGES = "images-folders" URL_INVITE_CONTRIBUTORS = "api/v1/team/{}/inviteUsers" URL_ANNOTATION_UPLOAD_PATH_TOKEN = "images/getAnnotationsPathsAndTokens" diff --git a/src/superannotate/lib/infrastructure/services/annotation.py b/src/superannotate/lib/infrastructure/services/annotation.py index cc7ed0eb..5fed8fb6 100644 --- a/src/superannotate/lib/infrastructure/services/annotation.py +++ b/src/superannotate/lib/infrastructure/services/annotation.py @@ -28,7 +28,6 @@ class AnnotationService(BaseAnnotationService): ASSETS_PROVIDER_VERSION = "v4" - DEFAULT_CHUNK_SIZE = 5000 URL_GET_ANNOTATIONS = "items/annotations/download" URL_UPLOAD_ANNOTATIONS = "items/annotations/upload" @@ -85,7 +84,7 @@ async def _sync_large_annotation( headers=self.client.default_headers, raise_for_status=True, ) as session: - _response = await session.request("post", sync_url, params=sync_params) + await session.request("post", sync_url, params=sync_params) sync_params.pop("current_source") sync_params.pop("desired_source") diff --git a/src/superannotate/lib/infrastructure/services/explore.py b/src/superannotate/lib/infrastructure/services/explore.py index 4afbd544..341fae49 100644 --- a/src/superannotate/lib/infrastructure/services/explore.py +++ b/src/superannotate/lib/infrastructure/services/explore.py @@ -196,13 +196,19 @@ def saqul_query( def query_item_count( self, project: entities.ProjectEntity, + folder: entities.FolderEntity = None, query: str = None, + subset_id: int = None, ) -> ServiceResponse: params = { "project_id": project.id, "includeFolderNames": True, } + if folder: + params["folder_id"] = folder.id + if subset_id: + params["subset_id"] = subset_id data = {"query": query} response = self.client.request( urljoin(self.explore_service_url, self.URL_QUERY_COUNT), diff --git a/src/superannotate/lib/infrastructure/services/item_service.py b/src/superannotate/lib/infrastructure/services/item_service.py index e8d52936..0528bd2f 100644 --- a/src/superannotate/lib/infrastructure/services/item_service.py +++ b/src/superannotate/lib/infrastructure/services/item_service.py @@ -9,7 +9,6 @@ class ItemService(SuperannotateServiceProvider): - MAX_URI_LENGTH = 15_000 URL_LIST = "items/search" URL_GET = "items/{item_id}" diff --git a/src/superannotate/lib/infrastructure/stream_data_handler.py b/src/superannotate/lib/infrastructure/stream_data_handler.py index 9dcd90d4..27a25c1a 100644 --- a/src/superannotate/lib/infrastructure/stream_data_handler.py +++ b/src/superannotate/lib/infrastructure/stream_data_handler.py @@ -42,13 +42,6 @@ def __init__( self._items_downloaded = 0 self._active_sessions = set() - def get_json(self, data: bytes): - try: - return json.loads(data) - except json.decoder.JSONDecodeError as e: - self._reporter.log_error(f"Invalud chunk: {str(e)}") - return None - @async_retry_on_generator((BackendError,)) async def fetch( self, diff --git a/src/superannotate/lib/infrastructure/validators.py b/src/superannotate/lib/infrastructure/validators.py index a96b66f1..04d53582 100644 --- a/src/superannotate/lib/infrastructure/validators.py +++ b/src/superannotate/lib/infrastructure/validators.py @@ -40,26 +40,6 @@ def all_literal_values(type_: typing.Any) -> tuple[typing.Any, ...]: return () -def make_literal_validator( - type_: typing.Any, -) -> typing.Callable[[typing.Any], typing.Any]: - """ - Adding ability to input literal in the lower case. - """ - permitted_choices = all_literal_values(type_) - allowed_choices = { - v.lower() if isinstance(v, str) and v else v: v for v in permitted_choices - } - - def literal_validator(v: typing.Any) -> typing.Any: - try: - return allowed_choices[v.lower() if isinstance(v, str) else v] - except (KeyError, AttributeError): - raise WrongConstantError(given=v, permitted=permitted_choices) - - return literal_validator - - def get_tabulation() -> int: try: return int(os.get_terminal_size().columns / 2) diff --git a/tests/integration/items/test_item_context.py b/tests/integration/items/test_item_context.py index 454e4879..7bcd7ede 100644 --- a/tests/integration/items/test_item_context.py +++ b/tests/integration/items/test_item_context.py @@ -1,8 +1,12 @@ import json import os from pathlib import Path +from unittest import TestCase +from unittest.mock import MagicMock +from unittest.mock import patch from src.superannotate import FileChangedError +from src.superannotate import ItemContext from src.superannotate import SAClient from tests.integration.base import BaseTestCase @@ -135,3 +139,63 @@ def tearDown(self) -> None: sa.delete_project(self.PROJECT_NAME) except Exception: ... + + +class TestItemContextSetComponentCalledFlag(TestCase): + def _make_context(self): + ic = ItemContext( + controller=MagicMock(), + project=MagicMock(), + folder=MagicMock(), + item=MagicMock(), + overwrite=True, + ) + ic._annotation_adapter = MagicMock() + ic._annotation_adapter.annotation = {"metadata": {}, "data": {}} + return ic + + def test_dirty_flag_initial_state(self): + ic = self._make_context() + self.assertFalse(ic._set_component_called) + + def test_set_component_value_marks_dirty(self): + ic = self._make_context() + ic.set_component_value("component_id", "value") + self.assertTrue(ic._set_component_called) + + def test_save_called_on_exit_after_set_component_value(self): + ic = self._make_context() + with patch.object(ItemContext, "save", autospec=True) as save_mock: + with ic: + ic.set_component_value("component_id", "value") + save_mock.assert_called_once_with(ic) + + def test_dirty_flag_reset_after_save(self): + ic = self._make_context() + with patch.object(ic, "_set_small_annotation_adapter"), patch.object( + ic, "_set_large_annotation_adapter" + ): + ic.set_component_value("component_id", "value") + self.assertTrue(ic._set_component_called) + ic.save() + self.assertFalse(ic._set_component_called) + + def test_no_double_save_on_exit_after_manual_save(self): + ic = self._make_context() + with patch.object(ic, "_set_small_annotation_adapter"), patch.object( + ic, "_set_large_annotation_adapter" + ): + with ic: + ic.set_component_value("component_id", "value") + ic.save() + self.assertEqual(ic._annotation_adapter.save.call_count, 1) + self.assertEqual(ic._annotation_adapter.save.call_count, 1) + + def test_save_not_called_when_exception_raised(self): + ic = self._make_context() + with patch.object(ItemContext, "save", autospec=True) as save_mock: + with self.assertRaises(RuntimeError): + with ic: + ic.set_component_value("component_id", "value") + raise RuntimeError("boom") + save_mock.assert_not_called() diff --git a/tests/integration/items/test_saqul_query.py b/tests/integration/items/test_saqul_query.py index 1e964c3b..4a2a2412 100644 --- a/tests/integration/items/test_saqul_query.py +++ b/tests/integration/items/test_saqul_query.py @@ -59,13 +59,51 @@ def test_query_on_100(self): sa.attach_items(self.PROJECT_NAME, os.path.join(DATA_SET_PATH, "100_urls.csv")) entities = sa.query(self.PROJECT_NAME, "metadata(status = NotStarted)") assert len(entities) == 100 - assert ( - sa.controller.query_items_count( - self.PROJECT_NAME, "metadata(status = NotStarted)" - ) - == 100 + assert entities.count() == len(entities) + + def test_query_result_list_like_behavior(self): + sa.attach_items(self.PROJECT_NAME, os.path.join(DATA_SET_PATH, "100_urls.csv")) + result = sa.query(self.PROJECT_NAME, "metadata(status = NotStarted)") + + self.assertEqual(len(result), 100) + self.assertIsInstance(result[0], dict) + self.assertIn("name", result[0]) + self.assertIsInstance(result[-1], dict) + self.assertEqual(len(result[0:5]), 5) + + items = [item for item in result] + self.assertEqual(len(items), 100) + self.assertIsInstance(list(result), list) + + def test_query_result_lazy_count(self): + sa.attach_items(self.PROJECT_NAME, os.path.join(DATA_SET_PATH, "100_urls.csv")) + result = sa.query(self.PROJECT_NAME, "metadata(status = NotStarted)") + + self.assertFalse(result._loaded) + self.assertEqual(result.count(), 100) + self.assertFalse(result._loaded) + + _ = result[0] + self.assertTrue(result._loaded) + + def test_query_result_count_respects_subset(self): + subset_name = "subset_a" + sa.attach_items(self.PROJECT_NAME, os.path.join(DATA_SET_PATH, "100_urls.csv")) + all_items = sa.query(self.PROJECT_NAME, "metadata(status = NotStarted)") + subset_items = [ + {"name": item["name"], "path": self.PROJECT_NAME} for item in all_items[:30] + ] + sa.add_items_to_subset(self.PROJECT_NAME, subset_name, subset_items) + + result = sa.query( + self.PROJECT_NAME, + "metadata(status = NotStarted)", + subset=subset_name, ) + self.assertEqual(result.count(), len(subset_items)) + self.assertEqual(result.count(), len(list(result))) + def test_validate_saqul_query(self): try: self.assertRaises(