From fe1d780a6254932e4fd714243f95971f05f08fba Mon Sep 17 00:00:00 2001 From: jdsika Date: Thu, 2 Apr 2026 17:22:31 +0200 Subject: [PATCH 01/15] fix(owlgen): warn on covering axiom edge cases for abstract classes Emit warnings for abstract class covering axiom edge cases: - Zero children: warn that no covering axiom will be generated - One child: warn that the covering axiom degenerates to an equivalence (Parent = Child), recommending --skip-abstract-class-as-unionof-subclasses Both axioms are still emitted when applicable (semantically correct per OWL 2), but warnings alert users who extend the ontology downstream. Tests verify warnings are logged, flag suppression works, the single-child covering axiom triple is correctly asserted, plus negative tests for multi-child and concrete class cases, and the mixin-only children edge case. Refs: linkml/linkml#3309, linkml/linkml#3219 Signed-off-by: jdsika Signed-off-by: Carlo van Driesten --- .../linkml/src/linkml/generators/owlgen.py | 30 +++- tests/linkml/test_generators/test_owlgen.py | 170 ++++++++++++++++++ 2 files changed, 198 insertions(+), 2 deletions(-) diff --git a/packages/linkml/src/linkml/generators/owlgen.py b/packages/linkml/src/linkml/generators/owlgen.py index 38f47823bf..5d149a43d8 100644 --- a/packages/linkml/src/linkml/generators/owlgen.py +++ b/packages/linkml/src/linkml/generators/owlgen.py @@ -208,7 +208,11 @@ class OwlSchemaGenerator(Generator): one direct ``is_a`` child, the generator adds ``AbstractClass rdfs:subClassOf (Child1 or Child2 or …)``, expressing the open-world covering constraint that every instance of the abstract class must also be an instance of one of its - direct subclasses.""" + direct subclasses. + + .. note:: An info message is emitted when an abstract class has no children (no axiom generated). + A warning is emitted when there is only one child (covering axiom degenerates to equivalence + Parent ≡ Child). Use this flag to suppress covering axioms entirely if equivalence is undesired.""" @staticmethod def _present(values: Iterable[_T | None]) -> list[_T]: @@ -504,6 +508,26 @@ def condition_to_bnode(expr: AnonymousClassExpression) -> OWL_EXPRESSION | None: # must be an instance of at least one of its direct subclasses. if cls.abstract and not self.skip_abstract_class_as_unionof_subclasses: children = sorted(sv.class_children(cls.name, imports=self.mergeimports, mixins=False, is_a=True)) + if not children: + logger.info( + "Abstract class '%s' has no children. No covering axiom will be generated.", + cls.name, + ) + elif len(children) == 1: + # Warn: with one child C, the covering axiom degenerates to + # Parent ⊑ C which, combined with C ⊑ Parent (from is_a), + # creates Parent ≡ C (equivalence). This is semantically + # correct per OWL 2 but may be surprising for extensible + # ontologies where more children are added later. + logger.warning( + "Abstract class '%s' has only 1 direct child ('%s'). " + "The covering axiom makes them equivalent (%s ≡ %s). " + "Use --skip-abstract-class-as-unionof-subclasses to suppress.", + cls.name, + children[0], + cls.name, + children[0], + ) if children: child_uris = [self._class_uri(child) for child in children] union_node = self._union_of(child_uris) @@ -1653,7 +1677,9 @@ def slot_owl_type(self, slot: SlotDefinition) -> URIRef: show_default=True, help=( "If true, suppress rdfs:subClassOf owl:unionOf(subclasses) covering axioms for abstract classes. " - "By default such axioms are emitted for every abstract class that has direct is_a children." + "By default such axioms are emitted for every abstract class that has direct is_a children. " + "Note: an info message is logged for abstract classes with zero children (no axiom); " + "a warning is emitted for one child (equivalence)." ), ) @click.option( diff --git a/tests/linkml/test_generators/test_owlgen.py b/tests/linkml/test_generators/test_owlgen.py index ead3359ee2..062d4c31ac 100644 --- a/tests/linkml/test_generators/test_owlgen.py +++ b/tests/linkml/test_generators/test_owlgen.py @@ -1,3 +1,4 @@ +import logging from enum import Enum import pytest @@ -526,6 +527,175 @@ def test_abstract_class_without_subclasses_gets_no_union_of_axiom(): assert _union_members(g, EX.Orphan) is None +def test_abstract_class_with_no_children_emits_info(caplog): + """An abstract class with no children emits an info message about missing coverage. + + When an abstract class has zero subclasses, no covering axiom can be + generated. An info message alerts users that the class hierarchy is + incomplete — this is not a warning because abstract leaf classes are + a normal pattern in base schemas designed for downstream extension. + + See: mgskjaeveland's review on linkml/linkml#3309. + See: matentzn's review on linkml/linkml#3309. + """ + sb = SchemaBuilder() + sb.add_class("Orphan", abstract=True) + sb.add_defaults() + + with caplog.at_level(logging.INFO, logger="linkml.generators.owlgen"): + g = _owl_graph(sb) + + # No covering axiom emitted + assert _union_members(g, EX.Orphan) is None + + # An info message must be logged (not a warning) + assert any("has no children" in msg for msg in caplog.messages), ( + "Expected an info message about abstract class with no children" + ) + assert any("No covering axiom" in msg for msg in caplog.messages), ( + "Info message should mention that no covering axiom will be generated" + ) + + +def test_no_children_info_suppressed_by_skip_flag(caplog): + """When --skip-abstract-class-as-unionof-subclasses is set, no info for zero children.""" + sb = SchemaBuilder() + sb.add_class("Orphan", abstract=True) + sb.add_defaults() + + with caplog.at_level(logging.INFO, logger="linkml.generators.owlgen"): + _owl_graph(sb, skip_abstract_class_as_unionof_subclasses=True) + + assert not any("has no children" in msg for msg in caplog.messages) + + +def test_abstract_class_with_single_child_emits_warning(caplog): + """An abstract class with one child still gets a covering axiom but emits a warning. + + Per OWL 2 semantics, the covering axiom with a single child creates an + equivalence (Parent ≡ Child). This is logically correct but may surprise + users who plan to extend the ontology later. The generator should warn + and recommend ``--skip-abstract-class-as-unionof-subclasses``. + + See: W3C OWL 2 Primer §4.2 — bidirectional rdfs:subClassOf = equivalence. + See: mgskjaeveland's review on linkml/linkml#3309. + """ + sb = SchemaBuilder() + sb.add_class("GrandParent") + sb.add_class("Parent", is_a="GrandParent", abstract=True) + sb.add_class("Child", is_a="Parent") + sb.add_defaults() + + with caplog.at_level(logging.WARNING, logger="linkml.generators.owlgen"): + g = _owl_graph(sb) + + # Covering axiom IS still emitted (single child → equivalence is OWL-correct). + # With one child, _union_of returns the child URI directly (no owl:unionOf wrapper), + # so the covering axiom materialises as Parent rdfs:subClassOf Child. + # Combined with Child rdfs:subClassOf Parent (from is_a), this is the equivalence. + assert (EX.Parent, RDFS.subClassOf, EX.Child) in g, ( + "Covering axiom should produce Parent rdfs:subClassOf Child for single-child case" + ) + assert (EX.Child, RDFS.subClassOf, EX.Parent) in g + assert (EX.Parent, RDFS.subClassOf, EX.GrandParent) in g + + # But a warning must be logged + assert any("only 1 direct child" in msg for msg in caplog.messages), ( + "Expected a warning about single-child covering axiom creating equivalence" + ) + assert any("--skip-abstract-class-as-unionof-subclasses" in msg for msg in caplog.messages), ( + "Warning should recommend the skip flag" + ) + + +def test_single_child_warning_suppressed_by_skip_flag(caplog): + """When --skip-abstract-class-as-unionof-subclasses is set, no warning is emitted. + + The skip flag suppresses covering axioms entirely, so the single-child + equivalence case never arises. + """ + sb = SchemaBuilder() + sb.add_class("Parent", abstract=True) + sb.add_class("Child", is_a="Parent") + sb.add_defaults() + + with caplog.at_level(logging.WARNING, logger="linkml.generators.owlgen"): + g = _owl_graph(sb, skip_abstract_class_as_unionof_subclasses=True) + + # No covering axiom emitted + assert (EX.Parent, RDFS.subClassOf, EX.Child) not in g + # No warning either + assert not any("only 1 direct child" in msg for msg in caplog.messages) + + +def test_multiple_children_no_warning(caplog): + """An abstract class with 2+ children must NOT emit a warning. + + The covering axiom is a proper union (not a degenerate equivalence), + so no warning is needed. + """ + sb = SchemaBuilder() + sb.add_class("Animal", abstract=True) + sb.add_class("Dog", is_a="Animal") + sb.add_class("Cat", is_a="Animal") + sb.add_defaults() + + with caplog.at_level(logging.WARNING, logger="linkml.generators.owlgen"): + g = _owl_graph(sb) + + # Covering axiom emitted (proper union) + members = _union_members(g, EX.Animal) + assert members == {EX.Dog, EX.Cat} + + # No warning about children count + assert not any("has no children" in msg for msg in caplog.messages) + assert not any("only 1 direct child" in msg for msg in caplog.messages) + + +def test_non_abstract_class_no_warning(caplog): + """A non-abstract class must NOT emit covering axiom warnings. + + Covering axioms only apply to abstract classes. Concrete classes + should be silently skipped regardless of child count. + """ + sb = SchemaBuilder() + sb.add_class("Parent") # not abstract + sb.add_class("Child", is_a="Parent") + sb.add_defaults() + + with caplog.at_level(logging.WARNING, logger="linkml.generators.owlgen"): + g = _owl_graph(sb) + + # No covering axiom for non-abstract class + assert _union_members(g, EX.Parent) is None + assert (EX.Parent, RDFS.subClassOf, EX.Child) not in g + + # No warning either + assert not any("has no children" in msg for msg in caplog.messages) + assert not any("only 1 direct child" in msg for msg in caplog.messages) + + +def test_abstract_class_with_only_mixin_children_emits_info(caplog): + """An abstract class whose only children are via mixins (not is_a) gets the no-children info. + + The covering axiom only considers direct is_a children (not mixins). + If an abstract class has mixin children but no is_a children, it should + log an info message about having no children for covering axiom purposes. + """ + sb = SchemaBuilder() + sb.add_class("Base", abstract=True) + sb.add_class("MixinChild", mixins=["Base"]) + sb.add_defaults() + + with caplog.at_level(logging.INFO, logger="linkml.generators.owlgen"): + g = _owl_graph(sb) + + assert _union_members(g, EX.Base) is None + assert any("has no children" in msg for msg in caplog.messages), ( + "Abstract class with only mixin children should log info about no is_a children" + ) + + @pytest.mark.parametrize("skip", [False, True]) def test_union_of_axiom_only_covers_direct_children(skip: bool): """Union-of axiom lists only direct is_a children, not grandchildren. From 1a56d476d4dd5c6c9e76fe8e61bd8aa7f36fe2cc Mon Sep 17 00:00:00 2001 From: jdsika Date: Thu, 2 Apr 2026 17:21:36 +0200 Subject: [PATCH 02/15] feat(generators): add --normalize-prefixes flag for well-known prefix names Add an opt-in --normalize-prefixes flag to OWL, SHACL, and JSON-LD Context generators that normalises non-standard prefix aliases to well-known names from a static prefix map (derived from rdflib 7.x defaults, cross-checked against prefix.cc consensus). Key design decisions: - Static frozen map (MappingProxyType) instead of runtime Graph().namespaces() lookup eliminates rdflib version dependency - Both http://schema.org/ and https://schema.org/ map to 'schema' - Shared normalize_graph_prefixes() helper used by OWL and SHACL - Two-phase graph normalisation: Phase 1 normalises schema-declared prefixes, Phase 2 cleans up runtime-injected bindings - Collision detection: skip with warning when standard prefix name is already user-declared for a different namespace - Phase 2 guard prevents overwriting HTTPS bindings with HTTP variants The flag defaults to off, preserving existing behaviour. Tests cover OWL, SHACL, and context generators with sdo->schema, dce->dc, http/https edge case, custom prefix preservation, flag-off backward compatibility, cross-generator consistency, prefix collision detection, schema1 regression prevention, Phase 2 HTTPS guard, empty schema edge case, and static map integrity. Signed-off-by: jdsika Signed-off-by: Carlo van Driesten --- packages/linkml/pyproject.toml | 9 +- .../src/linkml/generators/jsonldcontextgen.py | 82 ++- .../linkml/src/linkml/generators/jsonldgen.py | 2 + .../linkml/src/linkml/generators/owlgen.py | 6 +- .../linkml/src/linkml/generators/shaclgen.py | 6 +- packages/linkml/src/linkml/utils/generator.py | 170 +++++- .../test_generators/test_jsonldcontextgen.py | 115 ++++ .../test_normalize_prefixes.py | 545 ++++++++++++++++++ uv.lock | 15 +- 9 files changed, 932 insertions(+), 18 deletions(-) create mode 100644 tests/linkml/test_generators/test_normalize_prefixes.py diff --git a/packages/linkml/pyproject.toml b/packages/linkml/pyproject.toml index cd61d81bc3..52f58d32da 100644 --- a/packages/linkml/pyproject.toml +++ b/packages/linkml/pyproject.toml @@ -49,7 +49,10 @@ dependencies = [ # Specifier syntax: https://peps.python.org/pep-0631/ "openpyxl", "parse", "prefixcommons >= 0.1.7", - "prefixmaps >= 0.2.2", + # TODO(prefixmaps-0.2.8): Replace git pin with "prefixmaps >= 0.2.8" once released, + # then remove [tool.hatch.metadata] allow-direct-references and regenerate uv.lock. + # Tracked in: https://github.com/linkml/prefixmaps/issues/82 + "prefixmaps @ git+https://github.com/linkml/prefixmaps@75435150a1b31760b9780af2b64a265943a9b263", "pydantic >= 2.0.0, < 3.0.0", "pyjsg >= 0.11.6", "pyshex >= 0.7.20", @@ -196,6 +199,10 @@ vcs = "git" style = "pep440" fallback-version = "0.0.0" +[tool.hatch.metadata] +# TODO(prefixmaps-0.2.8): Remove this section once the git pin is replaced with >= 0.2.8 +allow-direct-references = true + [tool.hatch.version] source = "uv-dynamic-versioning" diff --git a/packages/linkml/src/linkml/generators/jsonldcontextgen.py b/packages/linkml/src/linkml/generators/jsonldcontextgen.py index c30afc72a5..38dd938860 100644 --- a/packages/linkml/src/linkml/generators/jsonldcontextgen.py +++ b/packages/linkml/src/linkml/generators/jsonldcontextgen.py @@ -15,7 +15,7 @@ from linkml._version import __version__ from linkml.utils.deprecation import deprecated_fields -from linkml.utils.generator import Generator, shared_arguments +from linkml.utils.generator import Generator, shared_arguments, well_known_prefix_map from linkml_runtime.linkml_model.meta import ClassDefinition, EnumDefinition, SlotDefinition from linkml_runtime.linkml_model.types import SHEX from linkml_runtime.utils.formatutils import camelcase, underscore @@ -90,6 +90,9 @@ class ContextGenerator(Generator): frame_root: str | None = None def __post_init__(self) -> None: + # Must be set before super().__post_init__() because the parent triggers + # the visitor pattern (visit_schema), which accesses _prefix_remap. + self._prefix_remap: dict[str, str] = {} super().__post_init__() if self.namespaces is None: raise TypeError("Schema text must be supplied to context generator. Preparsed schema will not work") @@ -127,8 +130,14 @@ def _collect_external_elements(sv: SchemaView) -> tuple[set[str], set[str]]: external_slots.update(schema_def.slots.keys()) return external_classes, external_slots + def add_prefix(self, ncname: str) -> None: + """Add a prefix, applying well-known prefix normalisation when enabled.""" + super().add_prefix(self._prefix_remap.get(ncname, ncname)) + def visit_schema(self, base: str | Namespace | None = None, output: str | None = None, **_): - # Add any explicitly declared prefixes + # Add any explicitly declared prefixes. + # Direct .add() is safe here: the normalisation block below explicitly + # rewrites emit_prefixes entries for any renamed prefixes (Cases 1-3). for prefix in self.schema.prefixes.values(): self.emit_prefixes.add(prefix.prefix_prefix) @@ -136,6 +145,68 @@ def visit_schema(self, base: str | Namespace | None = None, output: str | None = for pfx in self.schema.emit_prefixes: self.add_prefix(pfx) + # Normalise well-known prefix names when --normalize-prefixes is set. + # If the schema declares a non-standard alias for a namespace that has + # a well-known standard name (e.g. ``sdo`` for + # ``https://schema.org/``), replace the alias with the standard name + # so that generated JSON-LD contexts use the conventional prefix. + # + # Three cases are handled: + # 1. Standard prefix is not yet bound → just rebind from old to new. + # 2. Standard prefix is bound to a *different* URI: + # a. User-declared (in schema.prefixes) → collision, skip with warning. + # b. Runtime default (e.g. linkml-runtime's ``schema: http://…``) + # → remove stale binding, then rebind. + # 3. Standard prefix is already bound to the *same* URI (duplicate) + # → just drop the non-standard alias. + # + # A remap dict is stored for ``_build_element_id`` because + # ``prefix_suffix()`` splits CURIEs on ``:`` without looking up the + # namespace dict. + self._prefix_remap.clear() + if self.normalize_prefixes: + wk = well_known_prefix_map() + for old_pfx in list(self.namespaces): + url = str(self.namespaces[old_pfx]) + std_pfx = wk.get(url) + if not std_pfx or std_pfx == old_pfx: + continue + if std_pfx in self.namespaces: + if str(self.namespaces[std_pfx]) != url: + # Case 2: std_pfx is bound to a different URI. + # If the user explicitly declared std_pfx in the schema, + # it is intentional — skip to avoid data loss. + if std_pfx in self.schema.prefixes: + self.logger.warning( + "Prefix collision: cannot rename '%s' to '%s' because '%s' is " + "already declared for <%s>; skipping normalisation for <%s>", + old_pfx, + std_pfx, + std_pfx, + str(self.namespaces[std_pfx]), + url, + ) + continue + # Not user-declared (e.g. linkml-runtime default) — safe to remove + self.emit_prefixes.discard(std_pfx) + del self.namespaces[std_pfx] + else: + # Case 3: standard prefix already bound to same URI + # — just drop the non-standard alias + del self.namespaces[old_pfx] + if old_pfx in self.emit_prefixes: + self.emit_prefixes.discard(old_pfx) + self.emit_prefixes.add(std_pfx) + self._prefix_remap[old_pfx] = std_pfx + continue + # Case 1 (or Case 2 after stale removal): bind standard name + self.namespaces[std_pfx] = self.namespaces[old_pfx] + del self.namespaces[old_pfx] + if old_pfx in self.emit_prefixes: + self.emit_prefixes.discard(old_pfx) + self.emit_prefixes.add(std_pfx) + self._prefix_remap[old_pfx] = std_pfx + # Add the default prefix if self.schema.default_prefix: dflt = self.namespaces.prefix_for(self.schema.default_prefix) @@ -143,6 +214,8 @@ def visit_schema(self, base: str | Namespace | None = None, output: str | None = self.default_ns = dflt if self.default_ns: default_uri = self.namespaces[self.default_ns] + # Direct .add() is safe: default_ns is already resolved from + # the (possibly normalised) namespace bindings above. self.emit_prefixes.add(self.default_ns) else: default_uri = self.schema.default_prefix @@ -486,6 +559,11 @@ def _build_element_id(self, definition: Any, uri: str) -> None: @return: None """ uri_prefix, uri_suffix = self.namespaces.prefix_suffix(uri) + # Apply well-known prefix normalisation (e.g. sdo → schema). + # prefix_suffix() splits CURIEs on ':' without checking the + # namespace dict, so it may return a stale alias. + if uri_prefix and uri_prefix in self._prefix_remap: + uri_prefix = self._prefix_remap[uri_prefix] is_default_namespace = uri_prefix == self.context_body["@vocab"] or uri_prefix == self.namespaces.prefix_for( self.context_body["@vocab"] ) diff --git a/packages/linkml/src/linkml/generators/jsonldgen.py b/packages/linkml/src/linkml/generators/jsonldgen.py index 75d2068e16..ee2fd0cf4e 100644 --- a/packages/linkml/src/linkml/generators/jsonldgen.py +++ b/packages/linkml/src/linkml/generators/jsonldgen.py @@ -179,6 +179,8 @@ def end_schema(self, context: str | Sequence[str] | None = None, context_kwargs: # TODO: The _visit function above alters the schema in situ # force some context_kwargs context_kwargs["metadata"] = False + # Forward prefix normalisation into the inline @context. + context_kwargs.setdefault("normalize_prefixes", self.normalize_prefixes) add_prefixes = ContextGenerator(self.original_schema, **context_kwargs).serialize() add_prefixes_json = loads(add_prefixes) metamodel_ctx = self.metamodel_context or METAMODEL_CONTEXT_URI diff --git a/packages/linkml/src/linkml/generators/owlgen.py b/packages/linkml/src/linkml/generators/owlgen.py index 5d149a43d8..91b8a76507 100644 --- a/packages/linkml/src/linkml/generators/owlgen.py +++ b/packages/linkml/src/linkml/generators/owlgen.py @@ -21,7 +21,7 @@ from linkml._version import __version__ from linkml.generators.common.subproperty import is_xsd_anyuri_range from linkml.utils.deprecation import deprecation_warning -from linkml.utils.generator import Generator, shared_arguments +from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments from linkml_runtime import SchemaView from linkml_runtime.linkml_model.meta import ( AnonymousClassExpression, @@ -268,6 +268,10 @@ def as_graph(self) -> Graph: self.graph.bind(prefix, self.metamodel.namespaces[prefix]) for pfx in schema.prefixes.values(): self.graph.namespace_manager.bind(pfx.prefix_prefix, URIRef(pfx.prefix_reference)) + if self.normalize_prefixes: + normalize_graph_prefixes( + graph, {str(v.prefix_prefix): str(v.prefix_reference) for v in schema.prefixes.values()} + ) graph.add((base, RDF.type, OWL.Ontology)) # Add main schema elements diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index 2f2e6b5d1e..f6888cf0a8 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -13,7 +13,7 @@ from linkml.generators.common.subproperty import get_subproperty_values, is_uri_range from linkml.generators.shacl.shacl_data_type import ShaclDataType from linkml.generators.shacl.shacl_ifabsent_processor import ShaclIfAbsentProcessor -from linkml.utils.generator import Generator, shared_arguments +from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName from linkml_runtime.utils.formatutils import underscore from linkml_runtime.utils.yamlutils import TypedNode, extended_float, extended_int, extended_str @@ -105,6 +105,10 @@ def as_graph(self) -> Graph: for pfx in self.schema.prefixes.values(): g.bind(str(pfx.prefix_prefix), pfx.prefix_reference) + if self.normalize_prefixes: + normalize_graph_prefixes( + g, {str(v.prefix_prefix): str(v.prefix_reference) for v in self.schema.prefixes.values()} + ) for c in sv.all_classes(imports=not self.exclude_imports).values(): diff --git a/packages/linkml/src/linkml/utils/generator.py b/packages/linkml/src/linkml/utils/generator.py index 88fc485851..72b977eaa7 100644 --- a/packages/linkml/src/linkml/utils/generator.py +++ b/packages/linkml/src/linkml/utils/generator.py @@ -20,11 +20,12 @@ import os import re import sys +import types from collections.abc import Callable, Mapping from dataclasses import dataclass, field from functools import lru_cache from pathlib import Path -from typing import ClassVar, TextIO, Union, cast +from typing import TYPE_CHECKING, ClassVar, TextIO, Union, cast import click from click import Argument, Command, Option @@ -58,6 +59,9 @@ from linkml_runtime.utils.formatutils import camelcase, underscore from linkml_runtime.utils.namespaces import Namespaces +if TYPE_CHECKING: + from rdflib import Graph + logger = logging.getLogger(__name__) @@ -78,6 +82,154 @@ def _resolved_metamodel(mergeimports): return metamodel +def well_known_prefix_map() -> dict[str, str]: + """Return a mapping from namespace URI to standard prefix name. + + Primary source: the ``linked_data`` context from `prefixmaps + `_ — the canonical curated + registry maintained by the LinkML team. This context provides + correct, community-consensus prefix names (e.g. ``sh`` not ``shacl``, + ``schema`` not ``sdo``). + + Secondary source: the ``merged`` context from prefixmaps, which + combines prefix.cc, bioregistry, and other sources for broad coverage. + + A small ``_PREFIX_OVERRIDES`` map corrects the few cases where the + merged context disagrees with rdflib/W3C canonical names. + + Both ``http`` and ``https`` variants of schema.org and wgs84 are + included because the linkml-runtime historically binds the HTTP form + while rdflib (and the W3C) prefer HTTPS. + + .. note:: + Requires ``prefixmaps >= 0.2.7``. For entries added in + linkml/prefixmaps#81 (W3C/OGC standard prefixes), pin to + ``prefixmaps @ git+https://github.com/linkml/prefixmaps@75435150`` + until v0.2.8 is released. + """ + return dict(_cached_well_known_prefix_map()) + + +@lru_cache(maxsize=1) +def _cached_well_known_prefix_map() -> dict[str, str]: + """Internal cached builder for well_known_prefix_map().""" + from prefixmaps import load_context + + # Layer 1: merged context (broad coverage, first-seen-wins for duplicates). + merged = load_context("merged") + ns_to_prefix: dict[str, str] = {} + for rec in merged.prefix_expansions: + if rec.namespace not in ns_to_prefix: + ns_to_prefix[rec.namespace] = rec.prefix + + # Layer 2: linked_data context (curated, correct names) overrides merged. + ld = load_context("linked_data") + for rec in ld.prefix_expansions: + ns_to_prefix[rec.namespace] = rec.prefix + + # Layer 3: overrides for the few cases where merged/linked_data disagrees + # with the rdflib/W3C canonical forms used by the RDF community. + for ns, pfx in _PREFIX_OVERRIDES.items(): + ns_to_prefix[ns] = pfx + + # Ensure both HTTP/HTTPS schema.org variants resolve to 'schema'. + ns_to_prefix.setdefault("https://schema.org/", "schema") + ns_to_prefix["http://schema.org/"] = "schema" + + # Ensure both HTTP/HTTPS wgs84 variants resolve to 'wgs'. + ns_to_prefix.setdefault("https://www.w3.org/2003/01/geo/wgs84_pos#", "wgs") + + return ns_to_prefix + + +# Overrides: corrections where prefixmaps merged context uses non-standard names +# that differ from rdflib 7.x / W3C canonical forms. +_PREFIX_OVERRIDES: types.MappingProxyType[str, str] = types.MappingProxyType( + { + # merged gives 'geosparql', rdflib/W3C uses 'geo' + "http://www.opengis.net/ont/geosparql#": "geo", + # merged gives 'sc', rdflib/W3C uses 'schema' + "https://schema.org/": "schema", + # merged gives 'WGS84', rdflib uses 'wgs' + "https://www.w3.org/2003/01/geo/wgs84_pos#": "wgs", + "http://www.w3.org/2003/01/geo/wgs84_pos#": "wgs", + } +) + + +def normalize_graph_prefixes(graph: "Graph", schema_prefixes: dict[str, str]) -> None: + """Normalise non-standard prefix aliases in an rdflib Graph. + + For each prefix bound in *schema_prefixes* (mapping prefix name → + namespace URI), check whether ``well_known_prefix_map()`` knows a + standard name for that URI. If the standard name differs from the + schema-declared name, rebind the namespace to the standard name. + + This is the **shared implementation** used by OWL, SHACL, and (via a + different code-path) JSON-LD context generators so that all serialisation + formats agree on prefix names when ``--normalize-prefixes`` is active. + + :param graph: rdflib Graph whose namespace bindings should be adjusted. + :param schema_prefixes: mapping of prefix name → namespace URI string, + typically from ``schema.prefixes``. + """ + from rdflib import Namespace + + wk = well_known_prefix_map() + + # Phase 1: normalise schema-declared prefixes. + for old_pfx, ns_uri in schema_prefixes.items(): + ns_str = str(ns_uri) + std_pfx = wk.get(ns_str) + if not std_pfx or std_pfx == old_pfx: + continue + # Collision: the user explicitly declared std_pfx for a different + # namespace — do not clobber their binding. + if std_pfx in schema_prefixes and schema_prefixes[std_pfx] != ns_str: + logger.warning( + "Prefix collision: cannot rename '%s' to '%s' because '%s' is already " + "declared for <%s>; skipping normalisation for <%s>", + old_pfx, + std_pfx, + std_pfx, + schema_prefixes[std_pfx], + ns_str, + ) + continue + # Rebind: remove old prefix, add standard prefix. + # ``replace=True`` forces the new prefix even if the prefix name + # is already bound to a different namespace. + graph.bind(std_pfx, Namespace(ns_str), override=True, replace=True) + + # Phase 2: normalise runtime-injected bindings (e.g. metamodel defaults). + # The linkml-runtime / rdflib may inject well-known namespaces under + # non-standard prefix names. After Phase 1 rebinds schema-declared + # prefixes, orphaned runtime bindings can appear as ``schema1``, ``dc0``, + # etc. Scan the graph's current bindings and fix any that map to a + # well-known namespace under a non-standard name, provided the standard + # name isn't already claimed by the user for a different namespace. + # + # Guard: if Phase 1 already bound std_pfx to a different URI (e.g. + # ``schema`` → ``https://schema.org/``), do not clobber it with the + # HTTP variant (``http://schema.org/``). Build a snapshot of the + # current bindings after Phase 1 to detect this. + current_bindings = {str(p): str(n) for p, n in graph.namespaces()} + for pfx, ns in list(graph.namespaces()): + pfx_str, ns_str = str(pfx), str(ns) + std_pfx = wk.get(ns_str) + if not std_pfx or std_pfx == pfx_str: + continue + # Same collision check as Phase 1: respect user-declared prefixes. + if std_pfx in schema_prefixes and schema_prefixes[std_pfx] != ns_str: + continue + # Guard: if std_pfx is already bound to a different (correct) URI + # by Phase 1, do not overwrite it. This prevents the HTTP variant + # of schema.org from clobbering the HTTPS binding. + if std_pfx in current_bindings and current_bindings[std_pfx] != ns_str: + continue + graph.bind(std_pfx, Namespace(ns_str), override=True, replace=True) + + @dataclass class Generator(metaclass=abc.ABCMeta): """ @@ -180,6 +332,12 @@ class Generator(metaclass=abc.ABCMeta): stacktrace: bool = False """True means print stack trace, false just error message""" + normalize_prefixes: bool = False + """True means normalise non-standard prefix aliases to well-known names + from the ``prefixmaps`` package (linked_data + merged contexts, with + overrides for rdflib/W3C canonical forms). E.g. ``sdo`` → ``schema`` + for ``https://schema.org/``.""" + include: str | Path | SchemaDefinition | None = None """If set, include extra schema outside of the imports mechanism""" @@ -986,6 +1144,16 @@ def decorator(f: Command) -> Command: callback=stacktrace_callback, ) ) + f.params.append( + Option( + ("--normalize-prefixes/--no-normalize-prefixes",), + default=False, + show_default=True, + help="Normalise non-standard prefix aliases to rdflib's curated default names " + "(e.g. sdo → schema for https://schema.org/). " + "Supported by OWL, SHACL, and JSON-LD Context generators.", + ) + ) return f diff --git a/tests/linkml/test_generators/test_jsonldcontextgen.py b/tests/linkml/test_generators/test_jsonldcontextgen.py index 6e3170d5ac..3a1081ceeb 100644 --- a/tests/linkml/test_generators/test_jsonldcontextgen.py +++ b/tests/linkml/test_generators/test_jsonldcontextgen.py @@ -1637,3 +1637,118 @@ def test_kitchen_sink_employment_event_type_falls_back(kitchen_sink_path): slot_def = ctx["employed_at"] if isinstance(slot_def, dict) and "@context" in slot_def: assert "@vocab" not in slot_def.get("@context", {}) + + +def test_normalize_prefixes_renames_nonstandard_alias(tmp_path): + """When --normalize-prefixes is set, non-standard aliases are replaced by rdflib defaults. + + rdflib binds ``dc`` to ``http://purl.org/dc/elements/1.1/`` by default. + A schema that declares ``dce`` for the same URI should have it normalised + to ``dc`` when the flag is enabled. + + See: rdflib default namespace bindings. + """ + schema = tmp_path / "schema.yaml" + schema.write_text( + """\ +id: https://example.org/test +name: test_normalize +default_prefix: ex +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + dce: http://purl.org/dc/elements/1.1/ +imports: + - linkml:types +classes: + Record: + class_uri: ex:Record + attributes: + title: + range: string + slot_uri: dce:title +""", + encoding="utf-8", + ) + + # Flag OFF (default): non-standard alias preserved + ctx_off = json.loads(ContextGenerator(str(schema), normalize_prefixes=False).serialize())["@context"] + assert "dce" in ctx_off, "With flag off, original prefix 'dce' must be preserved" + + # Flag ON: rdflib default name used + ctx_on = json.loads(ContextGenerator(str(schema), normalize_prefixes=True).serialize())["@context"] + assert "dc" in ctx_on, "With flag on, 'dce' should be normalised to 'dc'" + assert "dce" not in ctx_on, "With flag on, original alias 'dce' should be removed" + assert ctx_on["dc"] == "http://purl.org/dc/elements/1.1/" + + +def test_normalize_prefixes_default_is_off(tmp_path): + """The --normalize-prefixes flag defaults to False — no prefix renaming. + + Ensures backward compatibility: existing schemas produce identical output. + """ + schema = tmp_path / "schema.yaml" + schema.write_text( + """\ +id: https://example.org/test +name: test_default +default_prefix: ex +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + sdo: https://schema.org/ +imports: + - linkml:types +classes: + Thing: + class_uri: sdo:Thing + attributes: + name: + range: string + slot_uri: sdo:name +""", + encoding="utf-8", + ) + + ctx = json.loads(ContextGenerator(str(schema)).serialize())["@context"] + # Without the flag, the schema's own prefix name must be preserved + assert "sdo" in ctx, "Default behavior must preserve schema-declared prefix 'sdo'" + + +def test_normalize_prefixes_curie_remapping(tmp_path): + """CURIEs in element @id values use the normalised prefix name. + + When ``sdo`` is normalised to ``schema``, slot URIs like ``sdo:name`` + must appear as ``schema:name`` in the generated context. + """ + schema = tmp_path / "schema.yaml" + schema.write_text( + """\ +id: https://example.org/test +name: test_curie +default_prefix: ex +prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + sdo: https://schema.org/ +imports: + - linkml:types +classes: + Person: + class_uri: sdo:Person + attributes: + full_name: + range: string + slot_uri: sdo:name +""", + encoding="utf-8", + ) + + ctx = json.loads(ContextGenerator(str(schema), normalize_prefixes=True).serialize())["@context"] + # The prefix declaration must use the standard name + assert "schema" in ctx, "Normalised prefix 'schema' must appear" + # Element @id must use the normalised prefix + person = ctx.get("Person", {}) + assert person.get("@id", "").startswith("schema:"), ( + f"Person @id should use normalised prefix 'schema:', got {person}" + ) diff --git a/tests/linkml/test_generators/test_normalize_prefixes.py b/tests/linkml/test_generators/test_normalize_prefixes.py new file mode 100644 index 0000000000..0a832a5791 --- /dev/null +++ b/tests/linkml/test_generators/test_normalize_prefixes.py @@ -0,0 +1,545 @@ +"""Tests for the --normalize-prefixes flag across all generators. + +Verifies that non-standard prefix aliases (e.g. ``sdo`` for ``https://schema.org/``) +are normalised to well-known names (e.g. ``schema``) consistently in OWL, SHACL, +and JSON-LD context output. + +References: +- prefix.cc — community consensus RDF prefix registry +- rdflib 7.x curated default namespace bindings +- W3C Turtle §2.4 — prefix declarations are syntactic sugar +""" + +import json +import logging +import re +import textwrap + +import pytest + +# ── Shared test schema ────────────────────────────────────────────── + +SCHEMA_SDO = textwrap.dedent("""\ + id: https://example.org/test + name: test_normalize + default_prefix: ex + prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + sdo: https://schema.org/ + imports: + - linkml:types + classes: + Person: + class_uri: sdo:Person + attributes: + full_name: + range: string + slot_uri: sdo:name +""") + +SCHEMA_DCE = textwrap.dedent("""\ + id: https://example.org/test + name: test_normalize_dce + default_prefix: ex + prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + dce: http://purl.org/dc/elements/1.1/ + imports: + - linkml:types + classes: + Record: + class_uri: ex:Record + attributes: + title: + range: string + slot_uri: dce:title +""") + +# HTTP variant — linkml-runtime historically binds schema: http://schema.org/ +# while rdflib (and the W3C) prefer https://schema.org/. The normalize flag +# must handle both. +SCHEMA_HTTP_SDO = textwrap.dedent("""\ + id: https://example.org/test + name: test_http_schema + default_prefix: ex + prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + sdo: http://schema.org/ + imports: + - linkml:types + classes: + Place: + class_uri: sdo:Place + attributes: + geo: + range: string + slot_uri: sdo:geo +""") + +# Collision scenario: user declares 'foaf' for a custom namespace AND 'myfoaf' +# for http://xmlns.com/foaf/0.1/. Normalisation must NOT clobber the user's 'foaf'. +# Uses 'foaf' instead of 'schema' because 'schema' is declared in linkml:types, +# which causes a SchemaLoader merge conflict before normalisation even runs. +SCHEMA_COLLISION = textwrap.dedent("""\ + id: https://example.org/test + name: test_collision + default_prefix: ex + prefixes: + ex: https://example.org/ + linkml: https://w3id.org/linkml/ + foaf: https://something-else.org/ + myfoaf: http://xmlns.com/foaf/0.1/ + imports: + - linkml:types + classes: + Agent: + class_uri: myfoaf:Agent + attributes: + label: + range: string + slot_uri: myfoaf:name +""") + + +def _write_schema(tmp_path, content: str, name: str = "schema.yaml") -> str: + """Write schema content to a temporary file and return its path as string.""" + p = tmp_path / name + p.write_text(content, encoding="utf-8") + return str(p) + + +def _turtle_prefixes(ttl: str) -> dict[str, str]: + """Extract @prefix declarations from Turtle output → {prefix: namespace}.""" + result = {} + for m in re.finditer(r"@prefix\s+(\w+):\s+<([^>]+)>", ttl): + result[m.group(1)] = m.group(2) + return result + + +# ── OWL Generator Tests ───────────────────────────────────────────── + + +def test_owl_sdo_normalised_to_schema(tmp_path): + """sdo → schema when --normalize-prefixes is active.""" + from linkml.generators.owlgen import OwlSchemaGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + ttl = OwlSchemaGenerator(schema_path, normalize_prefixes=True).serialize() + pfx = _turtle_prefixes(ttl) + assert "schema" in pfx, f"Expected 'schema' prefix in OWL output, got: {sorted(pfx)}" + assert pfx["schema"] == "https://schema.org/" + assert "sdo" not in pfx, "Non-standard 'sdo' prefix should be removed" + + +def test_owl_flag_off_preserves_original(tmp_path): + """Without the flag, schema-declared prefix names are preserved.""" + from linkml.generators.owlgen import OwlSchemaGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + ttl = OwlSchemaGenerator(schema_path, normalize_prefixes=False).serialize() + pfx = _turtle_prefixes(ttl) + assert "sdo" in pfx, "With flag off, original prefix 'sdo' must be preserved" + + +def test_owl_dce_normalised_to_dc(tmp_path): + """dce → dc for http://purl.org/dc/elements/1.1/ in graph bindings. + + Note: rdflib's Turtle serializer only emits @prefix declarations for + namespaces actually used in triples. Since the OWL generator may not + produce triples using dc:elements URIs for simple attribute schemas, + we verify the graph's namespace bindings directly. + """ + from linkml.generators.owlgen import OwlSchemaGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_DCE) + gen = OwlSchemaGenerator(schema_path, normalize_prefixes=True) + graph = gen.as_graph() + bound = {str(p): str(n) for p, n in graph.namespaces()} + assert "dc" in bound, f"Expected 'dc' in graph bindings, got: {sorted(bound)}" + assert bound["dc"] == "http://purl.org/dc/elements/1.1/" + + +def test_owl_custom_prefix_not_affected(tmp_path): + """Domain-specific prefixes (e.g. 'ex') are not touched by normalisation.""" + from linkml.generators.owlgen import OwlSchemaGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + ttl = OwlSchemaGenerator(schema_path, normalize_prefixes=True).serialize() + pfx = _turtle_prefixes(ttl) + assert "ex" in pfx, "Custom prefix 'ex' must survive normalisation" + assert pfx["ex"] == "https://example.org/" + + +def test_owl_http_schema_org_normalised(tmp_path): + """http://schema.org/ (HTTP variant) also normalises to 'schema'. + + The linkml-runtime historically binds ``schema: http://schema.org/`` + while the W3C and rdflib prefer ``https://schema.org/``. Both + variants must be recognised by the static well-known prefix map. + """ + from linkml.generators.owlgen import OwlSchemaGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_HTTP_SDO) + ttl = OwlSchemaGenerator(schema_path, normalize_prefixes=True).serialize() + pfx = _turtle_prefixes(ttl) + assert "schema" in pfx, f"Expected 'schema' prefix for http://schema.org/, got: {sorted(pfx)}" + assert "sdo" not in pfx + + +def test_owl_no_schema1_from_runtime_http_binding(tmp_path): + """Runtime-injected ``schema: http://schema.org/`` must not create ``schema1``. + + The linkml metamodel (types.yaml) declares ``schema: http://schema.org/`` + (HTTP). When a user schema declares ``sdo: https://schema.org/`` (HTTPS), + normalisation must clean up *both* variants so the output never contains + auto-generated suffixed prefixes like ``schema1``. + """ + from linkml.generators.owlgen import OwlSchemaGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + ttl = OwlSchemaGenerator(schema_path, normalize_prefixes=True).serialize() + pfx = _turtle_prefixes(ttl) + suffixed = [p for p in pfx if re.match(r"schema\d+", p)] + assert not suffixed, ( + f"Auto-generated suffixed prefix(es) {suffixed} found — runtime http://schema.org/ binding was not cleaned up" + ) + + +# ── SHACL Generator Tests ─────────────────────────────────────────── + + +def test_shacl_sdo_normalised_to_schema(tmp_path): + """sdo → schema when --normalize-prefixes is active.""" + from linkml.generators.shaclgen import ShaclGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + ttl = ShaclGenerator(schema_path, normalize_prefixes=True).serialize() + pfx = _turtle_prefixes(ttl) + assert "schema" in pfx, f"Expected 'schema' prefix in SHACL output, got: {sorted(pfx)}" + assert pfx["schema"] == "https://schema.org/" + assert "sdo" not in pfx, "Non-standard 'sdo' prefix should be removed" + + +def test_shacl_flag_off_preserves_original(tmp_path): + """Without the flag, schema-declared prefix names are preserved.""" + from linkml.generators.shaclgen import ShaclGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + ttl = ShaclGenerator(schema_path, normalize_prefixes=False).serialize() + pfx = _turtle_prefixes(ttl) + assert "sdo" in pfx, "With flag off, original prefix 'sdo' must be preserved" + + +def test_shacl_dce_normalised_to_dc(tmp_path): + """dce → dc for http://purl.org/dc/elements/1.1/.""" + from linkml.generators.shaclgen import ShaclGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_DCE) + ttl = ShaclGenerator(schema_path, normalize_prefixes=True).serialize() + pfx = _turtle_prefixes(ttl) + assert "dc" in pfx, f"Expected 'dc' prefix in SHACL output, got: {sorted(pfx)}" + assert pfx["dc"] == "http://purl.org/dc/elements/1.1/" + assert "dce" not in pfx, "Non-standard 'dce' prefix should be removed" + + +def test_shacl_custom_prefix_not_affected(tmp_path): + """Domain-specific prefixes (e.g. 'ex') are not touched by normalisation. + + Note: rdflib only emits @prefix for namespaces used in triples. + We verify graph bindings directly. + """ + from linkml.generators.shaclgen import ShaclGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + gen = ShaclGenerator(schema_path, normalize_prefixes=True) + graph = gen.as_graph() + bound = {str(p): str(n) for p, n in graph.namespaces()} + assert "ex" in bound, f"Custom prefix 'ex' must survive in graph bindings, got: {sorted(bound)}" + assert bound["ex"] == "https://example.org/" + + +def test_shacl_http_schema_org_normalised(tmp_path): + """http://schema.org/ (HTTP variant) also normalises to 'schema'.""" + from linkml.generators.shaclgen import ShaclGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_HTTP_SDO) + ttl = ShaclGenerator(schema_path, normalize_prefixes=True).serialize() + pfx = _turtle_prefixes(ttl) + assert "schema" in pfx, f"Expected 'schema' prefix for http://schema.org/, got: {sorted(pfx)}" + assert "sdo" not in pfx + + +def test_shacl_no_schema1_from_runtime_http_binding(tmp_path): + """Runtime-injected ``schema: http://schema.org/`` must not create ``schema1``. + + Same scenario as the OWL test: linkml:types imports bring in + ``schema: http://schema.org/`` while the user schema has + ``sdo: https://schema.org/``. Phase 2 of normalisation must + clean up the orphaned HTTP binding. + """ + from linkml.generators.shaclgen import ShaclGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + ttl = ShaclGenerator(schema_path, normalize_prefixes=True).serialize() + pfx = _turtle_prefixes(ttl) + suffixed = [p for p in pfx if re.match(r"schema\d+", p)] + assert not suffixed, ( + f"Auto-generated suffixed prefix(es) {suffixed} found — runtime http://schema.org/ binding was not cleaned up" + ) + + +# ── JSON-LD Context Generator Tests ───────────────────────────────── + + +def test_context_http_schema_org_normalised(tmp_path): + """http://schema.org/ (HTTP variant) normalises to 'schema' in JSON-LD context. + + This covers the edge case where linkml-runtime's ``schema: http://schema.org/`` + conflicts with rdflib's ``schema: https://schema.org/``. The stale binding + must be removed and replaced with the correct one. + """ + from linkml.generators.jsonldcontextgen import ContextGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_HTTP_SDO) + ctx = json.loads(ContextGenerator(schema_path, normalize_prefixes=True).serialize())["@context"] + assert "schema" in ctx, "HTTP schema.org should normalise to 'schema'" + assert "sdo" not in ctx, "Non-standard 'sdo' should be removed" + # The namespace URI must match the schema-declared one (http, not https) + schema_val = ctx["schema"] + if isinstance(schema_val, dict): + schema_val = schema_val.get("@id", "") + assert schema_val == "http://schema.org/", f"Namespace URI must be preserved: got {schema_val}" + + +# ── Static Prefix Map Tests ───────────────────────────────────────── + + +def test_well_known_prefix_map_returns_dict(): + from linkml.utils.generator import well_known_prefix_map + + wk = well_known_prefix_map() + assert isinstance(wk, dict) + assert len(wk) >= 29, f"Expected ≥29 entries, got {len(wk)}" + + +def test_well_known_prefix_map_schema_https(): + from linkml.utils.generator import well_known_prefix_map + + wk = well_known_prefix_map() + assert wk["https://schema.org/"] == "schema" + + +def test_well_known_prefix_map_schema_http_variant(): + """Both http and https schema.org must map to 'schema'.""" + from linkml.utils.generator import well_known_prefix_map + + wk = well_known_prefix_map() + assert wk["http://schema.org/"] == "schema" + + +def test_well_known_prefix_map_dc_elements(): + from linkml.utils.generator import well_known_prefix_map + + wk = well_known_prefix_map() + assert wk["http://purl.org/dc/elements/1.1/"] == "dc" + + +def test_well_known_prefix_map_returns_copy(): + """Callers should not be able to mutate the internal map.""" + from linkml.utils.generator import well_known_prefix_map + + wk1 = well_known_prefix_map() + wk1["http://never-in-any-real-prefix-map.test/"] = "test" + wk2 = well_known_prefix_map() + assert "http://never-in-any-real-prefix-map.test/" not in wk2 + + +def test_well_known_prefix_map_fully_resolved_from_prefixmaps(): + """All rdflib defaults must be resolved from prefixmaps (no residual map). + + This is the proof that pinning prefixmaps to the commit containing + linkml/prefixmaps#81 resolves all well-known prefixes without any + hardcoded fallback. If this test fails after a prefixmaps update, + add the missing prefix to the upstream linked_data.curated.yaml. + """ + from rdflib import Graph as RdfGraph + + from linkml.utils.generator import well_known_prefix_map + + wk = well_known_prefix_map() + rdflib_map = {str(ns): str(pfx) for pfx, ns in RdfGraph().namespaces() if str(pfx)} + missing = {ns: pfx for ns, pfx in rdflib_map.items() if ns not in wk} + assert not missing, f"Prefix map missing rdflib defaults (add to prefixmaps upstream): {missing}" + + +# ── Cross-Generator Consistency Tests ──────────────────────────────── + + +def test_all_generators_normalise_sdo_to_schema(tmp_path): + """OWL, SHACL, and JSON-LD context must all use 'schema' for schema.org.""" + from linkml.generators.jsonldcontextgen import ContextGenerator + from linkml.generators.owlgen import OwlSchemaGenerator + from linkml.generators.shaclgen import ShaclGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + + owl_ttl = OwlSchemaGenerator(schema_path, normalize_prefixes=True).serialize() + shacl_ttl = ShaclGenerator(schema_path, normalize_prefixes=True).serialize() + ctx = json.loads(ContextGenerator(schema_path, normalize_prefixes=True).serialize())["@context"] + + owl_pfx = _turtle_prefixes(owl_ttl) + shacl_pfx = _turtle_prefixes(shacl_ttl) + + assert "schema" in owl_pfx, "OWL must use 'schema'" + assert "schema" in shacl_pfx, "SHACL must use 'schema'" + assert "schema" in ctx, "JSON-LD context must use 'schema'" + + assert "sdo" not in owl_pfx, "OWL must not have 'sdo'" + assert "sdo" not in shacl_pfx, "SHACL must not have 'sdo'" + assert "sdo" not in ctx, "JSON-LD context must not have 'sdo'" + + +# ── Prefix Collision Tests ──────────────────────────────────────────── + + +@pytest.mark.parametrize( + "generator_cls,generator_module", + [ + ("OwlSchemaGenerator", "linkml.generators.owlgen"), + ("ShaclGenerator", "linkml.generators.shaclgen"), + ], + ids=["owl", "shacl"], +) +def test_graph_generator_collision_skips_rename(tmp_path, caplog, generator_cls, generator_module): + """Graph generators: myfoaf must NOT be renamed to 'foaf' when user claims that name.""" + import importlib + + mod = importlib.import_module(generator_module) + cls = getattr(mod, generator_cls) + + schema_path = _write_schema(tmp_path, SCHEMA_COLLISION) + with caplog.at_level(logging.WARNING): + gen = cls(schema_path, normalize_prefixes=True) + graph = gen.as_graph() + bound = {str(p): str(n) for p, n in graph.namespaces()} + assert "myfoaf" in bound, "Non-standard 'myfoaf' must remain when collision prevents renaming" + assert bound["myfoaf"] == "http://xmlns.com/foaf/0.1/" + assert "collision" in caplog.text.lower(), f"Expected collision warning, got: {caplog.text}" + + +def test_context_collision_preserves_user_prefix(tmp_path, caplog): + """JSON-LD: user's 'foaf: https://something-else.org/' must survive.""" + from linkml.generators.jsonldcontextgen import ContextGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_COLLISION) + with caplog.at_level(logging.WARNING): + ctx = json.loads(ContextGenerator(schema_path, normalize_prefixes=True).serialize())["@context"] + # User's 'foaf' binding preserved + foaf_val = ctx.get("foaf") + if isinstance(foaf_val, dict): + foaf_val = foaf_val.get("@id", "") + assert foaf_val == "https://something-else.org/", f"User's 'foaf' binding must be preserved, got: {foaf_val}" + # myfoaf must remain (not renamed to foaf) + assert "myfoaf" in ctx, "Non-standard 'myfoaf' must remain when collision prevents renaming" + # Warning emitted + assert "collision" in caplog.text.lower(), f"Expected collision warning, got: {caplog.text}" + + +# ── JSONLDGenerator Flag Forwarding Tests ───────────────────────────── + + +def test_jsonld_generator_forwards_normalize_prefixes(tmp_path): + """JSONLDGenerator must pass normalize_prefixes to embedded ContextGenerator. + + Without forwarding, the inline @context in JSON-LD output would keep + non-standard prefix aliases even when --normalize-prefixes is set. + """ + from linkml.generators.jsonldgen import JSONLDGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + out = JSONLDGenerator(schema_path, normalize_prefixes=True).serialize() + parsed = json.loads(out) + # The @context may be a list; find the dict entry + ctx = parsed.get("@context", {}) + if isinstance(ctx, list): + for item in ctx: + if isinstance(item, dict): + ctx = item + break + assert "sdo" not in ctx, "normalize_prefixes not forwarded: 'sdo' still in embedded @context" + + +# ── Phase 2 HTTP/HTTPS Overwrite Bug Tests ──────────────────────────── + + +def test_phase2_does_not_overwrite_https_with_http(tmp_path): + """When Phase 1 binds schema → https://schema.org/, Phase 2 must not + overwrite it with http://schema.org/ from the runtime metamodel. + + Reproduction: linkml:types imports bring schema: http://schema.org/ + (HTTP) while the user schema has sdo: https://schema.org/ (HTTPS). + Phase 1 normalises sdo → schema (HTTPS). Phase 2 must not then + rebind schema → http://schema.org/ when it encounters the runtime + HTTP binding. + """ + from linkml.generators.owlgen import OwlSchemaGenerator + + schema_path = _write_schema(tmp_path, SCHEMA_SDO) + gen = OwlSchemaGenerator(schema_path, normalize_prefixes=True) + graph = gen.as_graph() + bound = {str(p): str(n) for p, n in graph.namespaces()} + assert "schema" in bound, f"Expected 'schema' in bindings, got: {sorted(bound)}" + # MUST be HTTPS (from the user's schema), not HTTP (from runtime) + assert bound["schema"] == "https://schema.org/", ( + f"Phase 2 overwrote HTTPS with HTTP: schema bound to {bound['schema']}" + ) + + +def test_normalize_graph_prefixes_phase2_guard(): + """Direct unit test for the Phase 2 guard in normalize_graph_prefixes. + + Simulates the exact scenario: Phase 1 binds schema → https://schema.org/, + then Phase 2 encounters schema1 → http://schema.org/ and must NOT rebind. + """ + from rdflib import Graph, Namespace, URIRef + + from linkml.utils.generator import normalize_graph_prefixes + + g = Graph(bind_namespaces="none") + # Simulate Phase 1 result + g.bind("schema", Namespace("https://schema.org/")) + # Simulate runtime-injected HTTP variant (would appear as schema1) + g.bind("schema1", Namespace("http://schema.org/")) + # Add a triple so the graph isn't empty + g.add((URIRef("https://example.org/s"), URIRef("https://schema.org/name"), URIRef("https://example.org/o"))) + + normalize_graph_prefixes(g, {"sdo": "https://schema.org/"}) + + bound = {str(p): str(n) for p, n in g.namespaces()} + assert bound.get("schema") == "https://schema.org/", f"Phase 2 guard failed: schema bound to {bound.get('schema')}" + + +def test_empty_schema_no_crash(tmp_path): + """A schema with no custom prefixes must not crash normalize_graph_prefixes.""" + from linkml.generators.owlgen import OwlSchemaGenerator + + (tmp_path / "empty.yaml").write_text( + textwrap.dedent("""\ + id: https://example.org/empty + name: empty + default_prefix: ex + prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/ + imports: + - linkml:types + """), + encoding="utf-8", + ) + # Should not raise + gen = OwlSchemaGenerator(str(tmp_path / "empty.yaml"), normalize_prefixes=True) + ttl = gen.serialize() + assert len(ttl) > 0 diff --git a/uv.lock b/uv.lock index a0d9d6942c..49c5573281 100644 --- a/uv.lock +++ b/uv.lock @@ -1156,7 +1156,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/ed/6bfa4109fcb23a58819600392564fea69cdc6551ffd5e69ccf1d52a40cbc/greenlet-3.2.4-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8c68325b0d0acf8d91dde4e6f930967dd52a5302cd4062932a6b2e7c2969f47c", size = 271061, upload-time = "2025-08-07T13:17:15.373Z" }, { url = "https://files.pythonhosted.org/packages/2a/fc/102ec1a2fc015b3a7652abab7acf3541d58c04d3d17a8d3d6a44adae1eb1/greenlet-3.2.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:94385f101946790ae13da500603491f04a76b6e4c059dab271b3ce2e283b2590", size = 629475, upload-time = "2025-08-07T13:42:54.009Z" }, { url = "https://files.pythonhosted.org/packages/c5/26/80383131d55a4ac0fb08d71660fd77e7660b9db6bdb4e8884f46d9f2cc04/greenlet-3.2.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f10fd42b5ee276335863712fa3da6608e93f70629c631bf77145021600abc23c", size = 640802, upload-time = "2025-08-07T13:45:25.52Z" }, - { url = "https://files.pythonhosted.org/packages/9f/7c/e7833dbcd8f376f3326bd728c845d31dcde4c84268d3921afcae77d90d08/greenlet-3.2.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c8c9e331e58180d0d83c5b7999255721b725913ff6bc6cf39fa2a45841a4fd4b", size = 636703, upload-time = "2025-08-07T13:53:12.622Z" }, { url = "https://files.pythonhosted.org/packages/e9/49/547b93b7c0428ede7b3f309bc965986874759f7d89e4e04aeddbc9699acb/greenlet-3.2.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58b97143c9cc7b86fc458f215bd0932f1757ce649e05b640fea2e79b54cedb31", size = 635417, upload-time = "2025-08-07T13:18:25.189Z" }, { url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" }, { url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" }, @@ -1167,7 +1166,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" }, { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" }, { url = "https://files.pythonhosted.org/packages/ae/8f/95d48d7e3d433e6dae5b1682e4292242a53f22df82e6d3dda81b1701a960/greenlet-3.2.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:94abf90142c2a18151632371140b3dba4dee031633fe614cb592dbb6c9e17bc3", size = 644646, upload-time = "2025-08-07T13:45:26.523Z" }, - { url = "https://files.pythonhosted.org/packages/d5/5e/405965351aef8c76b8ef7ad370e5da58d57ef6068df197548b015464001a/greenlet-3.2.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:4d1378601b85e2e5171b99be8d2dc85f594c79967599328f95c1dc1a40f1c633", size = 640519, upload-time = "2025-08-07T13:53:13.928Z" }, { url = "https://files.pythonhosted.org/packages/25/5d/382753b52006ce0218297ec1b628e048c4e64b155379331f25a7316eb749/greenlet-3.2.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0db5594dce18db94f7d1650d7489909b57afde4c580806b8d9203b6e79cdc079", size = 639707, upload-time = "2025-08-07T13:18:27.146Z" }, { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, @@ -1178,7 +1176,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" }, - { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" }, { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" }, { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, @@ -1189,7 +1186,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, - { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, @@ -1200,7 +1196,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" }, - { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" }, @@ -2102,7 +2097,7 @@ requires-dist = [ { name = "openpyxl" }, { name = "parse" }, { name = "prefixcommons", specifier = ">=0.1.7" }, - { name = "prefixmaps", specifier = ">=0.2.2" }, + { name = "prefixmaps", git = "https://github.com/linkml/prefixmaps?rev=75435150a1b31760b9780af2b64a265943a9b263" }, { name = "pydantic", specifier = ">=2.0.0,<3.0.0" }, { name = "pyjsg", specifier = ">=0.11.6" }, { name = "pyshex", specifier = ">=0.7.20" }, @@ -3321,16 +3316,12 @@ wheels = [ [[package]] name = "prefixmaps" -version = "0.2.6" -source = { registry = "https://pypi.org/simple" } +version = "0.2.7.post2.dev0+7543515" +source = { git = "https://github.com/linkml/prefixmaps?rev=75435150a1b31760b9780af2b64a265943a9b263#75435150a1b31760b9780af2b64a265943a9b263" } dependencies = [ { name = "curies" }, { name = "pyyaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4d/cf/f588bcdfd2c841839b9d59ce219a46695da56aa2805faff937bbafb9ee2b/prefixmaps-0.2.6.tar.gz", hash = "sha256:7421e1244eea610217fa1ba96c9aebd64e8162a930dc0626207cd8bf62ecf4b9", size = 709899, upload-time = "2024-10-17T16:30:57.738Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/89/b2/2b2153173f2819e3d7d1949918612981bc6bd895b75ffa392d63d115f327/prefixmaps-0.2.6-py3-none-any.whl", hash = "sha256:f6cef28a7320fc6337cf411be212948ce570333a0ce958940ef684c7fb192a62", size = 754732, upload-time = "2024-10-17T16:30:55.731Z" }, -] [[package]] name = "prettytable" From 6b3dc09a93e1124db2b7bf7d3bbe2185fd64a24d Mon Sep 17 00:00:00 2001 From: Carlo van Driesten Date: Sat, 25 Apr 2026 18:12:28 +0200 Subject: [PATCH 03/15] feat(generators): add --default-language flag for language-tagged literals Add a `--default-language` CLI option to both gen-owl and gen-shacl that emits BCP 47 language-tagged string literals for human-readable annotations. gen-owl changes: - New `default_language` field on OwlSchemaGenerator - `_LANGUAGE_TAGGABLE_RANGES` frozenset (string, ncname) guards tagging - `_resolve_language()` checks element-level in_language first, then default - `_literal()` helper creates properly tagged Literal objects - `add_metadata()` tags string-range and fallback-range literals - `add_enum()` PV labels respect language tags - New `--default-language` Click option gen-shacl changes: - New `default_language` field on ShaclGenerator - NodeShape rdfs:label / rdfs:comment get language tags - PropertyShape sh:name / sh:description get language tags via prop_pv_text() - Numeric literals (sh:order, sh:minCount, etc.) are never tagged - New `--default-language` Click option Tests: - 3 new OWL tests: tagged labels, backward-compat plain literals, URI ranges - 4 new SHACL tests: NodeShape, PropertyShape, plain literals, numeric guard Signed-off-by: Carlo van Driesten --- .../linkml/src/linkml/generators/owlgen.py | 96 ++++++- .../linkml/src/linkml/generators/shaclgen.py | 83 +++++- tests/linkml/test_generators/test_owlgen.py | 268 ++++++++++++++++++ tests/linkml/test_generators/test_shaclgen.py | 220 ++++++++++++++ 4 files changed, 658 insertions(+), 9 deletions(-) diff --git a/packages/linkml/src/linkml/generators/owlgen.py b/packages/linkml/src/linkml/generators/owlgen.py index 91b8a76507..1d69890c0b 100644 --- a/packages/linkml/src/linkml/generators/owlgen.py +++ b/packages/linkml/src/linkml/generators/owlgen.py @@ -2,12 +2,13 @@ import logging import os +import re from collections import defaultdict from collections.abc import Iterable, Sequence from copy import copy from dataclasses import dataclass, field from enum import Enum, unique -from typing import Any, TypeAlias, TypeVar +from typing import Any, ClassVar, TypeAlias, TypeVar import click import rdflib @@ -238,6 +239,73 @@ def _present(values: Iterable[_T | None]) -> list[_T]: - have no ``rdfs:range`` restriction (any IRI is valid) """ + default_language: str | None = None + """Default BCP 47 language tag for human-readable string literals. + + When set, ``rdfs:label``, ``rdfs:comment``, ``skos:definition``, + ``dcterms:title``, and other annotation literals are emitted with the + specified language tag (e.g. ``"Person"@en``). An element-level + ``in_language`` value overrides this default for that element. + + Technical literals (URIs, numeric constraints, XSD facets) are never + language-tagged. Conforms to :rfc:`5646` (BCP 47). + """ + + # Metaslot ranges that represent human-readable text (eligible for language tags). + # Everything else (uri, uriorcurie, datetime, boolean, integer, classes, …) is technical. + _LANGUAGE_TAGGABLE_RANGES: ClassVar[frozenset[str]] = frozenset({"string", "ncname"}) + + # Syntactic validator for BCP 47 language tags (RFC 5646 §2.1 ABNF). + # Each group maps 1:1 to an ABNF production: language, script, region, + # variant, extension, privateuse, and grandfathered (irregular + regular). + _BCP47_RE: ClassVar[re.Pattern[str]] = re.compile( + r"^(?:" + r"(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3})|[A-Za-z]{4}|[A-Za-z]{5,8})" + r"(?:-[A-Za-z]{4})?" + r"(?:-(?:[A-Za-z]{2}|\d{3}))?" + r"(?:-(?:[A-Za-z\d]{5,8}|\d[A-Za-z\d]{3}))*" + r"(?:-[0-9A-WY-Za-wy-z](?:-[A-Za-z\d]{2,8})+)*" + r"(?:-x(?:-[A-Za-z\d]{1,8})+)?" + r"|x(?:-[A-Za-z\d]{1,8})+" + r"|en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon" + r"|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu" + r"|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE" + r"|art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu" + r"|zh-hakka|zh-min|zh-min-nan|zh-xiang" + r")$", + re.ASCII, + ) + + def _resolve_language(self, element: "Definition | PermissibleValue | None" = None) -> str | None: + """Return the BCP 47 language tag for *element*, or ``None``. + + Resolution order: + 1. ``element.in_language`` (element-level override) + 2. ``self.default_language`` (generator-level default) + + Empty or whitespace-only strings are normalised to ``None``. + Tags that do not conform to RFC 5646 §2.1 syntax produce a warning. + """ + if element is not None: + element_lang = getattr(element, "in_language", None) + if element_lang and element_lang.strip(): + tag = element_lang.strip() + if not self._BCP47_RE.match(tag): + logger.warning("in_language value %r is not a well-formed BCP 47 tag (RFC 5646 §2.1)", tag) + return tag + tag = (self.default_language or "").strip() or None + if tag is not None and not self._BCP47_RE.match(tag): + logger.warning("--default-language value %r is not a well-formed BCP 47 tag (RFC 5646 §2.1)", tag) + return tag + + def _literal(self, value: str, element: "Definition | PermissibleValue | None" = None) -> Literal: + """Create a language-tagged ``Literal`` for a human-readable string. + + If no language tag is resolved, falls back to a plain literal. + """ + lang = self._resolve_language(element) + return Literal(value, lang=lang) if lang else Literal(value) + def as_graph(self) -> Graph: """ Generate an rdflib Graph from the LinkML schema. @@ -314,6 +382,8 @@ def add_metadata(self, e: Definition | PermissibleValue, uri: URIRef) -> None: Add annotation properties. Set the profile attribute to the appropriate OWL profile. + Human-readable string literals are language-tagged when + ``default_language`` is set or the element has ``in_language``. :param e: schema element :param uri: URI representation of schema element @@ -323,6 +393,7 @@ def add_metadata(self, e: Definition | PermissibleValue, uri: URIRef) -> None: msv = self.metamodel_schemaview this_sv = self.schemaview sn_mappings = msv.slot_name_mappings() + lang = self._resolve_language(e) # iterate through all the assigned metamodel slots for metaslot_name, metaslot_value in vars(e).items(): @@ -347,6 +418,8 @@ def add_metadata(self, e: Definition | PermissibleValue, uri: URIRef) -> None: obj = URIRef(v) elif metaslot_range == "uriorcurie": obj = URIRef(this_sv.expand_curie(v)) + elif metaslot_range in self._LANGUAGE_TAGGABLE_RANGES and lang: + obj = Literal(v, lang=lang) else: obj = Literal(v) elif metaslot_range in msv.all_subsets(): @@ -358,7 +431,7 @@ def add_metadata(self, e: Definition | PermissibleValue, uri: URIRef) -> None: # else: # logger.debug(f"Skipping {uri} {metaslot_uri} => {v}") else: - obj = Literal(v) + obj = Literal(v, lang=lang) if lang else Literal(v) self.graph.add((uri, metaslot_uri, obj)) for k, v in e.annotations.items(): @@ -375,7 +448,11 @@ def add_metadata(self, e: Definition | PermissibleValue, uri: URIRef) -> None: if k_uri == k: k_uri = None if k_uri: - self.graph.add((uri, URIRef(k_uri), Literal(v.value))) + if isinstance(v.value, str): + obj = self._literal(v.value, e) + else: + obj = Literal(v.value) + self.graph.add((uri, URIRef(k_uri), obj)) def add_class(self, cls: ClassDefinition) -> None: """ @@ -1106,7 +1183,7 @@ def add_enum(self, e: EnumDefinition) -> None: if not isinstance(pv_node, Literal): self.add_metadata(pv, pv_node) g.add((pv_node, RDF.type, pv_owl_type)) - g.add((pv_node, RDFS.label, Literal(pv.text))) + g.add((pv_node, RDFS.label, self._literal(pv.text, pv))) # TODO: make this configurable # self._add_element_properties(pv_uri, pv) if self.metaclasses: @@ -1697,6 +1774,17 @@ def slot_owl_type(self, slot: SlotDefinition) -> URIRef: "the JSON-LD context generator (--xsd-anyuri-as-iri → @type: @id)." ), ) +@click.option( + "--default-language", + default=None, + show_default=True, + help=( + "Default BCP 47 language tag for human-readable string literals " + "(e.g. en, de, zh-Hans). When set, rdfs:label, rdfs:comment, " + "skos:definition and other text annotations are emitted with the " + "specified language tag. Element-level in_language overrides this." + ), +) @click.version_option(__version__, "-V", "--version") def cli(yamlfile: str, metadata_profile: str, **kwargs: Any) -> None: """Generate an OWL representation of a LinkML model diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index f6888cf0a8..d876f7f826 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -1,5 +1,6 @@ import logging import os +import re from collections.abc import Callable from dataclasses import dataclass @@ -74,6 +75,15 @@ class ShaclGenerator(Generator): """ expand_subproperty_of: bool = True """If True, expand subproperty_of to sh:in constraints with slot descendants""" + + default_language: str | None = None + """Default BCP 47 language tag for human-readable string literals. + + When set, ``sh:name``, ``sh:description``, ``rdfs:label``, and + ``rdfs:comment`` literals are emitted with the specified language tag. + Conforms to :rfc:`5646` (BCP 47). + """ + generatorname = os.path.basename(__file__) generatorversion = "0.0.1" valid_formats = ["ttl"] @@ -81,6 +91,49 @@ class ShaclGenerator(Generator): visit_all_class_slots = False uses_schemaloader = False + # Syntactic validator for BCP 47 language tags (RFC 5646 §2.1 ABNF). + # Each group maps 1:1 to an ABNF production: language, script, region, + # variant, extension, privateuse, and grandfathered (irregular + regular). + _BCP47_RE: re.Pattern[str] = re.compile( + r"^(?:" + r"(?:(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3})|[A-Za-z]{4}|[A-Za-z]{5,8})" + r"(?:-[A-Za-z]{4})?" + r"(?:-(?:[A-Za-z]{2}|\d{3}))?" + r"(?:-(?:[A-Za-z\d]{5,8}|\d[A-Za-z\d]{3}))*" + r"(?:-[0-9A-WY-Za-wy-z](?:-[A-Za-z\d]{2,8})+)*" + r"(?:-x(?:-[A-Za-z\d]{1,8})+)?" + r"|x(?:-[A-Za-z\d]{1,8})+" + r"|en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon" + r"|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu" + r"|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE" + r"|art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu" + r"|zh-hakka|zh-min|zh-min-nan|zh-xiang" + r")$", + re.ASCII, + ) + + def _resolve_language(self, element=None) -> str | None: + """Return the BCP 47 language tag for *element*, or ``None``. + + Resolution order: + 1. ``element.in_language`` (element-level override) + 2. ``self.default_language`` (generator-level default) + + Empty or whitespace-only strings are normalised to ``None``. + Tags that do not conform to RFC 5646 §2.1 syntax produce a warning. + """ + if element is not None: + element_lang = getattr(element, "in_language", None) + if element_lang and element_lang.strip(): + tag = element_lang.strip() + if not self._BCP47_RE.match(tag): + logger.warning("in_language value %r is not a well-formed BCP 47 tag (RFC 5646 §2.1)", tag) + return tag + tag = (self.default_language or "").strip() or None + if tag is not None and not self._BCP47_RE.match(tag): + logger.warning("--default-language value %r is not a well-formed BCP 47 tag (RFC 5646 §2.1)", tag) + return tag + def __post_init__(self) -> None: super().__post_init__() self.generate_header() @@ -136,13 +189,13 @@ def shape_pv(p, v): if c.title is not None: # Use rdfs:label for NodeShape titles per SHACL spec. # sh:name has rdfs:domain of sh:PropertyShape. See issue #3059. - shape_pv(RDFS.label, Literal(c.title)) + shape_pv(RDFS.label, Literal(c.title, lang=self._resolve_language(c))) if c.description is not None: # Use rdfs:comment for NodeShape descriptions per SHACL spec. # sh:description has rdfs:domain of sh:PropertyShape, so using it # on NodeShapes causes RDFS-aware validators to incorrectly infer # the NodeShape is also a PropertyShape. See issue #3059. - shape_pv(RDFS.comment, Literal(c.description)) + shape_pv(RDFS.comment, Literal(c.description, lang=self._resolve_language(c))) shape_pv(SH.ignoredProperties, self._build_ignored_properties(g, c)) @@ -167,11 +220,15 @@ def prop_pv_literal(p, v): if v is not None: g.add((pnode, p, Literal(v))) + def prop_pv_text(p, v): + if v is not None: + g.add((pnode, p, Literal(v, lang=self._resolve_language(s)))) + prop_pv(SH.path, slot_uri) prop_pv_literal(SH.order, order) order += 1 - prop_pv_literal(SH.name, s.title) - prop_pv_literal(SH.description, s.description) + prop_pv_text(SH.name, s.title) + prop_pv_text(SH.description, s.description) # minCount if s.minimum_cardinality: prop_pv_literal(SH.minCount, s.minimum_cardinality) @@ -433,9 +490,14 @@ def _add_annotations(self, func: Callable, item) -> None: else: N_predicate = Literal(a["tag"], datatype=XSD.string) # If the value is a string and ':' is in the value, treat it as a CURIE, - # otherwise treat as Literal with derived XSD datatype + # otherwise treat as Literal with derived XSD datatype. + # String annotations are language-tagged when default_language is set; + # non-string types (bool, int, float) keep their XSD datatype. + lang = self._resolve_language(item) if type(a["value"]) is extended_str and ":" in a["value"]: N_object = URIRef(sv.expand_curie(a["value"])) + elif isinstance(a["value"], str) and lang: + N_object = Literal(a["value"], lang=lang) else: N_object = Literal(a["value"], datatype=self._getXSDtype(a["value"])) @@ -530,6 +592,17 @@ def add_simple_data_type(func: Callable, r: ElementName) -> None: help="If --expand-subproperty-of (default), slots with subproperty_of will generate sh:in constraints " "containing all slot descendants. Use --no-expand-subproperty-of to disable this behavior.", ) +@click.option( + "--default-language", + default=None, + show_default=True, + help=( + "Default BCP 47 language tag for human-readable string literals " + "(e.g. en, de, zh-Hans). When set, sh:name, sh:description, " + "rdfs:label and rdfs:comment are emitted with the specified " + "language tag." + ), +) @click.version_option(__version__, "-V", "--version") def cli(yamlfile, **args): """Generate SHACL turtle from a LinkML model""" diff --git a/tests/linkml/test_generators/test_owlgen.py b/tests/linkml/test_generators/test_owlgen.py index 062d4c31ac..9d4c714f53 100644 --- a/tests/linkml/test_generators/test_owlgen.py +++ b/tests/linkml/test_generators/test_owlgen.py @@ -994,3 +994,271 @@ def test_children_are_mutually_disjoint( members_node = list(g.objects(disjoint_nodes[0], OWL.members))[0] members = set(Collection(g, members_node)) assert members == {EX[name] for name in child_names} + + +# --------------------------------------------------------------------------- +# --default-language tests +# --------------------------------------------------------------------------- + + +def _build_lang_test_schema(): + """Build a small schema with classes, slots, and an enum for language-tag testing.""" + sb = SchemaBuilder() + sb.add_slot( + SlotDefinition( + "vehicle_name", + range="string", + description="The vehicle name.", + title="Name", + ) + ) + sb.add_slot( + SlotDefinition( + "color", + range="ColorEnum", + description="Paint color.", + ) + ) + sb.add_class( + "Vehicle", + slots=["vehicle_name", "color"], + description="A road vehicle.", + title="Vehicle", + ) + sb.add_enum( + "ColorEnum", + permissible_values=[ + PermissibleValue(text="Red", description="A warm color."), + PermissibleValue(text="Blue", description="A cool color."), + ], + ) + sb.add_defaults() + return sb.schema + + +def test_default_language_tags_owl_labels(): + """With --default-language en, rdfs:label and skos:definition get @en.""" + schema = _build_lang_test_schema() + owl = OwlSchemaGenerator( + schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + default_language="en", + ).serialize() + g = Graph() + g.parse(data=owl, format="turtle") + + # Class label + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert Literal("Vehicle", lang="en") in labels + + # Class description + defs = list(g.objects(EX.Vehicle, SKOS.definition)) + assert Literal("A road vehicle.", lang="en") in defs + + # Enum PV label — PVs are emitted as <{enum_uri}#{pv_text}> + pv_red = URIRef(str(EX.ColorEnum) + "#Red") + pv_labels = list(g.objects(pv_red, RDFS.label)) + assert Literal("Red", lang="en") in pv_labels + + # No plain (untagged) literals should be present for these predicates + for lit in labels + defs + pv_labels: + assert lit.language == "en", f"Expected @en, got lang={lit.language!r} on {lit!r}" + + +def test_no_default_language_produces_plain_literals(): + """Without --default-language, literals have no language tag (backward-compat).""" + schema = _build_lang_test_schema() + owl = OwlSchemaGenerator( + schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + ).serialize() + g = Graph() + g.parse(data=owl, format="turtle") + + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert Literal("Vehicle") in labels + for lit in labels: + assert lit.language is None, f"Expected no language tag, got {lit.language!r}" + + +def test_default_language_does_not_tag_uri_range_metaslots(): + """Metaslots with range 'uri' or 'uriorcurie' must produce URIRef, never tagged literals.""" + schema = _build_lang_test_schema() + # id_prefixes has range uriorcurie — set it to verify no language tag + schema.id_prefixes = ["http://example.org/"] + owl = OwlSchemaGenerator( + schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + default_language="de", + ).serialize() + g = Graph() + g.parse(data=owl, format="turtle") + + # Verify labels do get the tag + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert Literal("Vehicle", lang="de") in labels + + # Verify integer/boolean metaslots (if any) don't get tags + # The schema title should be tagged (string range) + assert any(isinstance(o, Literal) and o.language == "de" for o in g.objects(None, RDFS.label)), ( + "At least one label should be @de" + ) + + +def test_default_language_in_language_override(): + """Element-level in_language overrides the generator default_language.""" + schema = _build_lang_test_schema() + schema.classes["Vehicle"].in_language = "de" + owl = OwlSchemaGenerator( + schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + default_language="en", + ).serialize() + g = Graph() + g.parse(data=owl, format="turtle") + + # Vehicle class should use element-level "de", not default "en" + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert Literal("Vehicle", lang="de") in labels + assert Literal("Vehicle", lang="en") not in labels + + # ColorEnum should still use the default "en" (no override) + enum_labels = list(g.objects(EX.ColorEnum, RDFS.label)) + assert Literal("ColorEnum", lang="en") in enum_labels + + +def test_default_language_annotations_tagged(): + """OWL annotations with string values are language-tagged.""" + from linkml_runtime.linkml_model.meta import Annotation, Prefix + + sb = SchemaBuilder() + sb.add_class("Widget", description="A widget.") + sb.add_defaults() + sb.schema.prefixes["skos"] = Prefix( + prefix_prefix="skos", + prefix_reference="http://www.w3.org/2004/02/skos/core#", + ) + sb.schema.classes["Widget"].annotations["skos:altLabel"] = Annotation(tag="skos:altLabel", value="Gadget") + + owl = OwlSchemaGenerator( + sb.schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + default_language="en", + ).serialize() + g = Graph() + g.parse(data=owl, format="turtle") + + alt_labels = list(g.objects(EX.Widget, SKOS.altLabel)) + assert Literal("Gadget", lang="en") in alt_labels + + +def test_default_language_empty_string_treated_as_none(): + """An empty string default_language is normalised to None (no tags).""" + schema = _build_lang_test_schema() + owl = OwlSchemaGenerator( + schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + default_language="", + ).serialize() + g = Graph() + g.parse(data=owl, format="turtle") + + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert Literal("Vehicle") in labels + for lit in labels: + assert lit.language is None, f"Expected no lang tag, got {lit.language!r}" + + +def test_default_language_whitespace_only_treated_as_none(): + """A whitespace-only default_language is normalised to None (no tags).""" + schema = _build_lang_test_schema() + owl = OwlSchemaGenerator( + schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + default_language=" ", + ).serialize() + g = Graph() + g.parse(data=owl, format="turtle") + + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert Literal("Vehicle") in labels + for lit in labels: + assert lit.language is None, f"Expected no lang tag, got {lit.language!r}" + + +def test_default_language_bcp47_warning(caplog): + """A malformed BCP 47 tag logs a warning but still produces output.""" + import logging + + schema = _build_lang_test_schema() + # "toolongtag" passes rdflib's lax regex but fails strict BCP 47 (max 8 chars for subtag). + with caplog.at_level(logging.WARNING): + owl = OwlSchemaGenerator( + schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + default_language="toolongtag", + ).serialize() + g = Graph() + g.parse(data=owl, format="turtle") + + # Tag is still applied (warning, not error) + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert any(lit.language == "toolongtag" for lit in labels) + # Warning was emitted + assert any("not a well-formed BCP 47 tag" in rec.message for rec in caplog.records) + + +def test_default_language_bcp47_valid_no_warning(caplog): + """A well-formed BCP 47 tag does not log any warning.""" + import logging + + schema = _build_lang_test_schema() + with caplog.at_level(logging.WARNING): + OwlSchemaGenerator( + schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + default_language="en", + ).serialize() + assert not any("BCP 47" in rec.message for rec in caplog.records) + + +def test_default_language_in_language_override_bcp47_warning(caplog): + """A malformed in_language value logs a warning.""" + import logging + + schema = _build_lang_test_schema() + # "toolongtag" passes rdflib but fails strict BCP 47. + schema.classes["Vehicle"].in_language = "toolongtag" + with caplog.at_level(logging.WARNING): + owl = OwlSchemaGenerator( + schema, + mergeimports=False, + metaclasses=False, + type_objects=False, + default_language="en", + ).serialize() + g = Graph() + g.parse(data=owl, format="turtle") + + # Vehicle uses the (malformed) in_language, not the default + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert any(lit.language == "toolongtag" for lit in labels) + assert any("in_language" in rec.message and "toolongtag" in rec.message for rec in caplog.records) diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index c99547df7e..a7c21620f9 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -7,6 +7,8 @@ from linkml.generators.shacl.shacl_data_type import ShaclDataType from linkml.generators.shaclgen import ShaclGenerator +from linkml_runtime.linkml_model import SlotDefinition +from linkml_runtime.utils.schema_builder import SchemaBuilder EXPECTED = [ ( @@ -1160,3 +1162,221 @@ def test_nodeidentifier_range_produces_blank_node_or_iri(): uri_ref = props["https://example.org/uriRef"] uri_kinds = list(g.objects(uri_ref, SH.nodeKind)) assert SH.IRI in uri_kinds, f"Expected sh:IRI for uri, got {uri_kinds}" + + +# --------------------------------------------------------------------------- +# --default-language tests +# --------------------------------------------------------------------------- + +EX = rdflib.Namespace("http://example.org/test-schema/") + + +def _build_shacl_lang_schema(): + """Build a schema with title/description for language-tag testing.""" + sb = SchemaBuilder() + sb.add_slot( + SlotDefinition( + "vehicle_name", + range="string", + description="The vehicle name.", + title="Name", + ) + ) + sb.add_class( + "Vehicle", + slots=["vehicle_name"], + description="A road vehicle.", + title="Vehicle", + ) + sb.add_defaults() + return sb.schema + + +def _parse_shacl(schema, **kwargs): + shacl = ShaclGenerator(schema, mergeimports=False, **kwargs).serialize() + g = rdflib.Graph() + g.parse(data=shacl) + return g + + +def _get_prop_objects(g, shape_uri, prop_path_uri, predicate): + """Get predicate values for the property shape with the given sh:path.""" + for prop_node in g.objects(shape_uri, SH.property): + paths = list(g.objects(prop_node, SH.path)) + if paths and paths[0] == prop_path_uri: + return list(g.objects(prop_node, predicate)) + return [] + + +def test_shacl_default_language_node_shape(): + """NodeShape rdfs:label and rdfs:comment get @en with --default-language.""" + schema = _build_shacl_lang_schema() + g = _parse_shacl(schema, default_language="en") + + vehicle_shape = EX.Vehicle + + labels = list(g.objects(vehicle_shape, RDFS.label)) + assert Literal("Vehicle", lang="en") in labels + + comments = list(g.objects(vehicle_shape, RDFS.comment)) + assert Literal("A road vehicle.", lang="en") in comments + + +def test_shacl_default_language_property_shape(): + """PropertyShape sh:name and sh:description get @en with --default-language.""" + schema = _build_shacl_lang_schema() + g = _parse_shacl(schema, default_language="en") + + vehicle_shape = EX.Vehicle + slot_uri = EX.vehicle_name + + sh_names = _get_prop_objects(g, vehicle_shape, slot_uri, SH["name"]) + assert Literal("Name", lang="en") in sh_names + + sh_descs = _get_prop_objects(g, vehicle_shape, slot_uri, SH.description) + assert Literal("The vehicle name.", lang="en") in sh_descs + + +def test_shacl_no_default_language_plain_literals(): + """Without --default-language, literals have no language tag (backward-compat).""" + schema = _build_shacl_lang_schema() + g = _parse_shacl(schema) + + vehicle_shape = EX.Vehicle + + labels = list(g.objects(vehicle_shape, RDFS.label)) + assert Literal("Vehicle") in labels + for lit in labels: + assert lit.language is None, f"Expected no lang tag, got {lit.language!r}" + + slot_uri = EX.vehicle_name + sh_names = _get_prop_objects(g, vehicle_shape, slot_uri, SH["name"]) + assert Literal("Name") in sh_names + for lit in sh_names: + assert lit.language is None, f"Expected no lang tag, got {lit.language!r}" + + +def test_shacl_default_language_numeric_literals_untagged(): + """Numeric literals (sh:order, sh:minCount, etc.) must never get language tags.""" + schema = _build_shacl_lang_schema() + schema.slots["vehicle_name"].required = True + g = _parse_shacl(schema, default_language="fr") + + vehicle_shape = EX.Vehicle + slot_uri = EX.vehicle_name + + orders = _get_prop_objects(g, vehicle_shape, slot_uri, SH.order) + for lit in orders: + assert lit.language is None, f"sh:order must not be language-tagged: {lit!r}" + + min_counts = _get_prop_objects(g, vehicle_shape, slot_uri, SH.minCount) + for lit in min_counts: + assert lit.language is None, f"sh:minCount must not be language-tagged: {lit!r}" + + +def test_shacl_default_language_annotations_tagged(): + """SHACL string annotations are language-tagged with --default-language.""" + from linkml_runtime.linkml_model.meta import Annotation, Prefix + + schema = _build_shacl_lang_schema() + schema.prefixes["skos"] = Prefix( + prefix_prefix="skos", + prefix_reference="http://www.w3.org/2004/02/skos/core#", + ) + schema.classes["Vehicle"].annotations["skos:altLabel"] = Annotation(tag="skos:altLabel", value="Car") + g = _parse_shacl(schema, default_language="en", include_annotations=True) + + vehicle_shape = EX.Vehicle + SKOS = rdflib.Namespace("http://www.w3.org/2004/02/skos/core#") + alt_labels = list(g.objects(vehicle_shape, SKOS.altLabel)) + assert Literal("Car", lang="en") in alt_labels + + +def test_shacl_default_language_empty_string_treated_as_none(): + """An empty string default_language is normalised to None (no tags).""" + schema = _build_shacl_lang_schema() + g = _parse_shacl(schema, default_language="") + + vehicle_shape = EX.Vehicle + + labels = list(g.objects(vehicle_shape, RDFS.label)) + assert Literal("Vehicle") in labels + for lit in labels: + assert lit.language is None, f"Expected no lang tag, got {lit.language!r}" + + +def test_shacl_default_language_whitespace_only_treated_as_none(): + """A whitespace-only default_language is normalised to None (no tags).""" + schema = _build_shacl_lang_schema() + g = _parse_shacl(schema, default_language=" ") + + vehicle_shape = EX.Vehicle + + labels = list(g.objects(vehicle_shape, RDFS.label)) + assert Literal("Vehicle") in labels + for lit in labels: + assert lit.language is None, f"Expected no lang tag, got {lit.language!r}" + + +def test_shacl_default_language_in_language_override(): + """Element-level in_language overrides the generator default_language in SHACL.""" + schema = _build_shacl_lang_schema() + schema.classes["Vehicle"].in_language = "de" + g = _parse_shacl(schema, default_language="en") + + vehicle_shape = EX.Vehicle + + # Vehicle class should use element-level "de", not default "en" + labels = list(g.objects(vehicle_shape, RDFS.label)) + assert Literal("Vehicle", lang="de") in labels + assert Literal("Vehicle", lang="en") not in labels + + comments = list(g.objects(vehicle_shape, RDFS.comment)) + assert Literal("A road vehicle.", lang="de") in comments + assert Literal("A road vehicle.", lang="en") not in comments + + +def test_shacl_default_language_bcp47_warning(caplog): + """A malformed BCP 47 tag logs a warning but still produces output.""" + import logging + + schema = _build_shacl_lang_schema() + # "toolongtag" passes rdflib's lax regex but fails strict BCP 47. + with caplog.at_level(logging.WARNING): + shacl = ShaclGenerator(schema, mergeimports=False, default_language="toolongtag").serialize() + g = rdflib.Graph() + g.parse(data=shacl) + + # Tag is still applied (warning, not error) + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert any(lit.language == "toolongtag" for lit in labels) + # Warning was emitted + assert any("not a well-formed BCP 47 tag" in rec.message for rec in caplog.records) + + +def test_shacl_default_language_bcp47_valid_no_warning(caplog): + """A well-formed BCP 47 tag does not log any warning.""" + import logging + + schema = _build_shacl_lang_schema() + with caplog.at_level(logging.WARNING): + ShaclGenerator(schema, mergeimports=False, default_language="en").serialize() + assert not any("BCP 47" in rec.message for rec in caplog.records) + + +def test_shacl_default_language_in_language_bcp47_warning(caplog): + """A malformed in_language value logs a warning in SHACL generator.""" + import logging + + schema = _build_shacl_lang_schema() + # "toolongtag" passes rdflib but fails strict BCP 47. + schema.classes["Vehicle"].in_language = "toolongtag" + with caplog.at_level(logging.WARNING): + shacl = ShaclGenerator(schema, mergeimports=False, default_language="en").serialize() + g = rdflib.Graph() + g.parse(data=shacl) + + # Vehicle uses the (malformed) in_language, not the default + labels = list(g.objects(EX.Vehicle, RDFS.label)) + assert any(lit.language == "toolongtag" for lit in labels) + assert any("in_language" in rec.message and "toolongtag" in rec.message for rec in caplog.records) From 52fbd08198166eec5b20d9026f7e9448021bc4b8 Mon Sep 17 00:00:00 2001 From: jdsika Date: Sat, 25 Apr 2026 18:16:00 +0200 Subject: [PATCH 04/15] feat(gen-shacl): add --message-template for sh:message on property shapes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new --message-template option that attaches sh:message literals to each property shape using a user-defined template string. Supported placeholders: {name} — slot name (underscore-separated) {title} — slot title (human-readable), falls back to name {description} — slot description, falls back to empty string {comments} — slot comments joined with "; ", falls back to empty string {class} — enclosing class name {path} — property IRI (compact or full) The resulting message is stripped of leading/trailing whitespace and omitted entirely when empty (avoids blank sh:message literals). When --default-language is also set, the literal is language-tagged. Example: gen-shacl --message-template "{name} ({class}): {description} [{comments}]" Signed-off-by: Carlo van Driesten --- .../linkml/src/linkml/generators/shaclgen.py | 51 ++++ tests/linkml/test_generators/test_shaclgen.py | 229 ++++++++++++++++++ 2 files changed, 280 insertions(+) diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index d876f7f826..f917aaf8b4 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -84,6 +84,25 @@ class ShaclGenerator(Generator): Conforms to :rfc:`5646` (BCP 47). """ + message_template: str | None = None + """Template for ``sh:message`` on property shapes. + + When set, each property shape receives an ``sh:message`` literal built from + this template. The following placeholders are expanded: + + * ``{name}`` — the slot name (underscore-separated LinkML name) + * ``{title}`` — the slot title (human-readable), falls back to *name* + * ``{description}`` — the slot description, falls back to empty string + * ``{comments}`` — the slot comments joined with ``; ``, falls back to empty string + * ``{class}`` — the enclosing class name + * ``{path}`` — the property IRI (compact or full) + + Example: ``"Validation of {name} failed!"`` → + ``sh:message "Validation of has_speed failed!"`` + + If ``default_language`` is also set the literal is language-tagged. + """ + generatorname = os.path.basename(__file__) generatorversion = "0.0.1" valid_formats = ["ttl"] @@ -136,6 +155,7 @@ def _resolve_language(self, element=None) -> str | None: def __post_init__(self) -> None: super().__post_init__() + self.message_template = (self.message_template or "").strip() or None self.generate_header() def generate_header(self) -> str: @@ -229,6 +249,25 @@ def prop_pv_text(p, v): order += 1 prop_pv_text(SH.name, s.title) prop_pv_text(SH.description, s.description) + + # sh:message from template + if self.message_template is not None: + try: + msg_text = self.message_template.format( + name=s.name, + title=s.title or s.name, + description=s.description or "", + comments="; ".join(s.comments) if s.comments else "", + **{"class": c.name}, + path=str(slot_uri), + ).strip() + except (KeyError, IndexError, ValueError) as exc: + raise ValueError( + f"Invalid placeholder {exc} in --message-template. " + f"Allowed: {{name}}, {{title}}, {{description}}, {{comments}}, {{class}}, {{path}}" + ) from None + if msg_text: + prop_pv_text(SH.message, msg_text) # minCount if s.minimum_cardinality: prop_pv_literal(SH.minCount, s.minimum_cardinality) @@ -603,6 +642,18 @@ def add_simple_data_type(func: Callable, r: ElementName) -> None: "language tag." ), ) +@click.option( + "--message-template", + default=None, + show_default=True, + help=( + "Template string for sh:message on each property shape. " + "Placeholders: {name} (slot name), {title} (slot title or name), " + "{description} (slot description), {comments} (slot comments joined with '; '), " + "{class} (class name), {path} (property IRI). " + 'Example: "{name} ({class}): {description} [{comments}]"' + ), +) @click.version_option(__version__, "-V", "--version") def cli(yamlfile, **args): """Generate SHACL turtle from a LinkML model""" diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index a7c21620f9..a3c6aa5aa0 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -1192,6 +1192,34 @@ def _build_shacl_lang_schema(): return sb.schema +def _build_message_test_schema(): + """Build a schema for sh:message testing (includes a second slot without title).""" + sb = SchemaBuilder() + sb.add_slot( + SlotDefinition( + "vehicle_name", + range="string", + description="The vehicle name.", + title="Name", + required=True, + ) + ) + sb.add_slot( + SlotDefinition( + "speed", + range="integer", + description="Speed in km/h.", + ) + ) + sb.add_class( + "Vehicle", + slots=["vehicle_name", "speed"], + description="A road vehicle.", + ) + sb.add_defaults() + return sb.schema + + def _parse_shacl(schema, **kwargs): shacl = ShaclGenerator(schema, mergeimports=False, **kwargs).serialize() g = rdflib.Graph() @@ -1380,3 +1408,204 @@ def test_shacl_default_language_in_language_bcp47_warning(caplog): labels = list(g.objects(EX.Vehicle, RDFS.label)) assert any(lit.language == "toolongtag" for lit in labels) assert any("in_language" in rec.message and "toolongtag" in rec.message for rec in caplog.records) + + +# --------------------------------------------------------------------------- +# --message-template tests +# --------------------------------------------------------------------------- + + +def test_message_template_basic(): + """--message-template emits sh:message on every property shape.""" + schema = _build_message_test_schema() + g = _parse_shacl(schema, message_template="Validation of {name} failed!") + + vehicle_shape = EX.Vehicle + + msgs = _get_prop_objects(g, vehicle_shape, EX.vehicle_name, SH.message) + assert Literal("Validation of vehicle_name failed!") in msgs + + msgs = _get_prop_objects(g, vehicle_shape, EX.speed, SH.message) + assert Literal("Validation of speed failed!") in msgs + + +def test_message_template_title_placeholder(): + """{title} expands to slot title, falling back to slot name.""" + schema = _build_message_test_schema() + g = _parse_shacl(schema, message_template="{title} is invalid") + + vehicle_shape = EX.Vehicle + + # vehicle_name has title="Name" + msgs = _get_prop_objects(g, vehicle_shape, EX.vehicle_name, SH.message) + assert Literal("Name is invalid") in msgs + + # speed has no title → falls back to slot name + msgs = _get_prop_objects(g, vehicle_shape, EX.speed, SH.message) + assert Literal("speed is invalid") in msgs + + +def test_message_template_class_placeholder(): + """{class} expands to the enclosing class name.""" + schema = _build_message_test_schema() + g = _parse_shacl(schema, message_template="{class}.{name} constraint violated") + + vehicle_shape = EX.Vehicle + + msgs = _get_prop_objects(g, vehicle_shape, EX.vehicle_name, SH.message) + assert Literal("Vehicle.vehicle_name constraint violated") in msgs + + +def test_message_template_description_placeholder(): + """{description} expands to the slot description, empty string when absent.""" + schema = _build_message_test_schema() + g = _parse_shacl(schema, message_template="{name} ({class}): {description}") + + vehicle_shape = EX.Vehicle + + # vehicle_name has description="The vehicle name." + msgs = _get_prop_objects(g, vehicle_shape, EX.vehicle_name, SH.message) + assert Literal("vehicle_name (Vehicle): The vehicle name.") in msgs + + # speed has description="Speed in km/h." + msgs = _get_prop_objects(g, vehicle_shape, EX.speed, SH.message) + assert Literal("speed (Vehicle): Speed in km/h.") in msgs + + +def test_message_template_description_fallback_empty(): + """{description} falls back to empty string when slot has no description.""" + sb = SchemaBuilder() + sb.add_slot(SlotDefinition("bare_slot", range="string")) + sb.add_class("Thing", slots=["bare_slot"]) + sb.add_defaults() + g = _parse_shacl(sb.schema, message_template="{name}: {description}") + + msgs = _get_prop_objects(g, EX.Thing, EX.bare_slot, SH.message) + assert Literal("bare_slot:") in msgs + + +def test_message_template_comments_placeholder(): + """{comments} expands to slot comments joined with '; '.""" + sb = SchemaBuilder() + sb.add_slot( + SlotDefinition( + "wind_speed", + range="float", + description="Wind speed in metres per second.", + comments=["ISO 34503:2023, Section 10.2.3"], + ) + ) + sb.add_class("Weather", slots=["wind_speed"]) + sb.add_defaults() + g = _parse_shacl(sb.schema, message_template="{name} ({class}): {description} [{comments}]") + + msgs = _get_prop_objects(g, EX.Weather, EX.wind_speed, SH.message) + assert Literal("wind_speed (Weather): Wind speed in metres per second. [ISO 34503:2023, Section 10.2.3]") in msgs + + +def test_message_template_comments_multiple(): + """{comments} joins multiple comments with '; '.""" + sb = SchemaBuilder() + sb.add_slot( + SlotDefinition( + "temperature", + range="float", + comments=["ISO 34503:2023, Section 10.2", "Unit: Celsius"], + ) + ) + sb.add_class("Weather", slots=["temperature"]) + sb.add_defaults() + g = _parse_shacl(sb.schema, message_template="{comments}") + + msgs = _get_prop_objects(g, EX.Weather, EX.temperature, SH.message) + assert Literal("ISO 34503:2023, Section 10.2; Unit: Celsius") in msgs + + +def test_message_template_comments_fallback_empty(): + """{comments} falls back to empty string when slot has no comments.""" + sb = SchemaBuilder() + sb.add_slot(SlotDefinition("bare_slot", range="string")) + sb.add_class("Thing", slots=["bare_slot"]) + sb.add_defaults() + g = _parse_shacl(sb.schema, message_template="{name}: {comments}") + + msgs = _get_prop_objects(g, EX.Thing, EX.bare_slot, SH.message) + assert Literal("bare_slot:") in msgs + + +def test_no_message_template_no_sh_message(): + """Without --message-template, no sh:message is emitted (backward-compat).""" + schema = _build_message_test_schema() + g = _parse_shacl(schema) + + vehicle_shape = EX.Vehicle + + msgs = _get_prop_objects(g, vehicle_shape, EX.vehicle_name, SH.message) + assert msgs == [] + + msgs = _get_prop_objects(g, vehicle_shape, EX.speed, SH.message) + assert msgs == [] + + +def test_message_template_invalid_placeholder_raises(): + """An invalid placeholder in --message-template raises ValueError.""" + import pytest + + schema = _build_message_test_schema() + with pytest.raises(ValueError, match="Invalid placeholder"): + _parse_shacl(schema, message_template="Error: {invalid}") + + +def test_message_template_positional_placeholder_raises(): + """Positional placeholders like {0} raise ValueError.""" + import pytest + + schema = _build_message_test_schema() + with pytest.raises(ValueError, match="Invalid placeholder"): + _parse_shacl(schema, message_template="Error: {0}") + + +def test_message_template_format_spec_raises(): + """Format specs like {name:d} raise ValueError.""" + import pytest + + schema = _build_message_test_schema() + with pytest.raises(ValueError, match="Invalid placeholder"): + _parse_shacl(schema, message_template="Error: {name:d}") + + +def test_message_template_empty_string_treated_as_none(): + """An empty message_template is normalised to None (no sh:message).""" + schema = _build_message_test_schema() + g = _parse_shacl(schema, message_template="") + + vehicle_shape = EX.Vehicle + msgs = _get_prop_objects(g, vehicle_shape, EX.vehicle_name, SH.message) + assert msgs == [] + + +def test_message_template_whitespace_only_treated_as_none(): + """A whitespace-only message_template is normalised to None (no sh:message).""" + schema = _build_message_test_schema() + g = _parse_shacl(schema, message_template=" ") + + vehicle_shape = EX.Vehicle + msgs = _get_prop_objects(g, vehicle_shape, EX.vehicle_name, SH.message) + assert msgs == [] + + +def test_message_template_with_default_language(): + """sh:message is language-tagged when both --message-template and --default-language are set.""" + schema = _build_message_test_schema() + g = _parse_shacl( + schema, + message_template="Validation of {name} failed!", + default_language="en", + ) + + vehicle_shape = EX.Vehicle + msgs = _get_prop_objects(g, vehicle_shape, EX.vehicle_name, SH.message) + assert Literal("Validation of vehicle_name failed!", lang="en") in msgs + + # Verify the message is NOT a plain literal + assert Literal("Validation of vehicle_name failed!") not in msgs From f6d4a0d50710ccb8053c651b3e1c10d27995d46c Mon Sep 17 00:00:00 2001 From: jdsika Date: Mon, 27 Apr 2026 21:30:12 +0200 Subject: [PATCH 05/15] feat(gen-shacl): generate sh:sparql constraints from LinkML rules Implement SHACL-SPARQL constraint generation for the boolean-guard pattern commonly used in conditional validation rules. When a LinkML class has rules: blocks with preconditions (value_presence: PRESENT) and postconditions (equals_string: true), the generator now emits sh:SPARQLConstraint nodes on the corresponding sh:NodeShape. Features: - New _add_rules() method translates recognised rule patterns to SPARQL - Boolean-guard pattern: if value present then flag must be true - Rule description mapped to sh:message on the constraint - Deactivated rules are skipped - Warnings emitted for bidirectional/open_world rule flags - New --emit-rules/--no-emit-rules CLI flag (default: enabled) - Full URI references in SPARQL (no PREFIX declarations needed) The generated SPARQL follows W3C SHACL Section 5 and uses the pre-bound \ variable per Section 5.3.1. Constraints are validated by pyshacl with advanced=True. Refs: linkml/linkml#2464 Signed-off-by: Carlo van Driesten --- .../linkml/src/linkml/generators/shaclgen.py | 247 +++++- .../input/shaclgen/boolean_guard_rules.yaml | 70 ++ tests/linkml/test_generators/test_shaclgen.py | 756 +++++++++++++++++- 3 files changed, 1071 insertions(+), 2 deletions(-) create mode 100644 tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index f917aaf8b4..1de99eaef4 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -15,7 +15,7 @@ from linkml.generators.shacl.shacl_data_type import ShaclDataType from linkml.generators.shacl.shacl_ifabsent_processor import ShaclIfAbsentProcessor from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments -from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName +from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName, PresenceEnum from linkml_runtime.utils.formatutils import underscore from linkml_runtime.utils.yamlutils import TypedNode, extended_float, extended_int, extended_str @@ -103,6 +103,22 @@ class ShaclGenerator(Generator): If ``default_language`` is also set the literal is language-tagged. """ + emit_rules: bool = True + """Emit ``sh:sparql`` constraints from LinkML ``rules:`` blocks. + + When ``True`` (default), recognised rule patterns are translated into + SHACL-SPARQL constraints (``sh:SPARQLConstraint``) on the corresponding + ``sh:NodeShape``. Currently two patterns are recognised: + + * *Boolean guard* — a precondition with ``value_presence: PRESENT`` on a + value slot and a postcondition with ``equals_string: "true"`` on a + boolean flag slot. + * *Exclusive value* — a precondition with ``equals_string`` on a slot and + a postcondition with ``maximum_cardinality`` on the *same* slot. + + See `W3C SHACL §5 `_ + and `linkml/linkml#2464 `_. + """ generatorname = os.path.basename(__file__) generatorversion = "0.0.1" valid_formats = ["ttl"] @@ -383,10 +399,228 @@ def st_node_pv(p, v): if default_value: prop_pv(SH.defaultValue, default_value) + if self.emit_rules: + self._add_rules(g, class_uri_with_suffix, c) + return g LINKML_ANY_URI = "https://w3id.org/linkml/Any" + # ------------------------------------------------------------------- + # Rules → sh:sparql + # ------------------------------------------------------------------- + + def _add_rules(self, g: Graph, shape_uri: URIRef, cls: ClassDefinition) -> None: + """Emit ``sh:sparql`` constraints from LinkML ``rules:`` blocks. + + Each recognised rule is converted into an ``sh:SPARQLConstraint`` + attached to *shape_uri*. Unrecognised patterns are logged at + ``DEBUG`` level and silently skipped. + + Currently recognised patterns: + + * **Boolean guard** — a *precondition* with + ``value_presence: PRESENT`` on a value slot and a *postcondition* + with ``equals_string: "true"`` on a boolean flag slot. + + * **Exclusive value** — a *precondition* with ``equals_string`` on + a slot and a *postcondition* with ``maximum_cardinality`` on the + *same* slot. Enforces that when a specific value is present in a + multivalued slot, the total number of values must not exceed the + given cardinality (typically 1 for mutual exclusion). + + See `W3C SHACL §5 `_. + """ + if not cls.rules: + return + + sv = self.schemaview + for rule in cls.rules: + if getattr(rule, "deactivated", False): + continue + + if getattr(rule, "bidirectional", False): + logger.warning( + "Rule in class %r has bidirectional=true; " + "SHACL-SPARQL generation does not yet support bidirectional rules. " + "Only the forward direction is emitted.", + cls.name, + ) + + if getattr(rule, "open_world", False): + logger.warning( + "Rule in class %r has open_world=true; " + "SHACL operates under closed-world assumption. " + "The constraint is emitted but may not match open-world semantics.", + cls.name, + ) + + sparql_query = self._rule_to_sparql(sv, cls, rule) + if sparql_query is None: + logger.debug( + "Skipping unsupported rule pattern in class %r: %s", + cls.name, + getattr(rule, "description", "(no description)"), + ) + continue + + constraint = BNode() + g.add((shape_uri, SH.sparql, constraint)) + g.add((constraint, RDF.type, SH.SPARQLConstraint)) + + message = getattr(rule, "description", None) + if message: + g.add((constraint, SH.message, Literal(message))) + + g.add((constraint, SH.select, Literal(sparql_query))) + + def _rule_to_sparql(self, sv, cls: ClassDefinition, rule) -> str | None: + """Convert a ``ClassRule`` to a SPARQL SELECT query string. + + Returns ``None`` when the rule does not match any supported pattern. + """ + pre = getattr(rule, "preconditions", None) + post = getattr(rule, "postconditions", None) + if not pre or not post: + return None + + pre_slots = getattr(pre, "slot_conditions", None) or {} + post_slots = getattr(post, "slot_conditions", None) or {} + + # Pattern: boolean guard + # preconditions: exactly one slot with value_presence PRESENT + # postconditions: exactly one slot with equals_string "true" + if len(pre_slots) == 1 and len(post_slots) == 1: + pre_slot_name = next(iter(pre_slots)) + post_slot_name = next(iter(post_slots)) + + pre_cond = pre_slots[pre_slot_name] + post_cond = post_slots[post_slot_name] + + is_value_present = getattr(pre_cond, "value_presence", None) == PresenceEnum(PresenceEnum.PRESENT) + is_flag_true = getattr(post_cond, "equals_string", None) == "true" + + if is_value_present and is_flag_true: + return self._build_boolean_guard_sparql(sv, cls, post_slot_name, pre_slot_name) + + # Pattern: exclusive value + # preconditions: slot X has equals_string (a specific enum value) + # postconditions: same slot X has maximum_cardinality N + # Semantics: "If value V is present in slot X, then X has at most N values." + pre_equals = getattr(pre_cond, "equals_string", None) + post_max_card = getattr(post_cond, "maximum_cardinality", None) + + if pre_equals is not None and post_max_card is not None and pre_slot_name == post_slot_name: + return self._build_exclusive_value_sparql(sv, cls, pre_slot_name, pre_equals, int(post_max_card)) + + return None + + def _build_boolean_guard_sparql(self, sv, cls: ClassDefinition, flag_slot_name: str, value_slot_name: str) -> str: + """Build a SPARQL SELECT query for the boolean-guard pattern. + + The query detects violations where the value property is present + but the boolean flag is absent or not ``true``. + + Conforms to `SHACL §5.3.1 + `_: + ``$this`` is pre-bound to each focus node. + """ + flag_uri = self._slot_uri(sv, flag_slot_name, cls) + value_uri = self._slot_uri(sv, value_slot_name, cls) + + return ( + f"SELECT $this WHERE {{\n" + f" OPTIONAL {{ $this <{flag_uri}> ?flag . }}\n" + f" OPTIONAL {{ $this <{value_uri}> ?value . }}\n" + f" FILTER (\n" + f" ( !BOUND(?flag) || ?flag != true ) &&\n" + f" BOUND(?value)\n" + f" )\n" + f"}}" + ) + + def _build_exclusive_value_sparql( + self, + sv, + cls: ClassDefinition, + slot_name: str, + value_name: str, + max_card: int, + ) -> str | None: + """Build a SPARQL SELECT query for the exclusive-value pattern. + + Detects violations where a specific value is present in a multivalued + slot but the total number of values exceeds *max_card*. + + For the common case ``max_card == 1``, the query checks whether the + exclusive value coexists with any other value (simple existence test). + For ``max_card > 1``, a subquery counts all values and checks against + the limit. + + The exclusive value is resolved to its full IRI via the slot's enum + ``meaning`` field. If the slot is not an enum or the value has no + ``meaning``, the value is compared as a plain literal. + + Conforms to `SHACL §5.3.1 + `_: + ``$this`` is pre-bound to each focus node. + """ + slot_uri = self._slot_uri(sv, slot_name, cls) + value_ref = self._resolve_enum_value_ref(sv, slot_name, value_name) + + if max_card == 1: + return ( + f"SELECT $this WHERE {{\n" + f" $this <{slot_uri}> {value_ref} .\n" + f" $this <{slot_uri}> ?other .\n" + f" FILTER (?other != {value_ref})\n" + f"}}" + ) + + return ( + f"SELECT $this WHERE {{\n" + f" $this <{slot_uri}> {value_ref} .\n" + f" {{\n" + f" SELECT $this (COUNT(?val) AS ?count)\n" + f" WHERE {{ $this <{slot_uri}> ?val . }}\n" + f" GROUP BY $this\n" + f" HAVING (?count > {max_card})\n" + f" }}\n" + f"}}" + ) + + def _resolve_enum_value_ref(self, sv, slot_name: str, value_name: str) -> str: + """Resolve an enum value name to a SPARQL term (IRI or literal). + + Looks up the slot's range as an enum, finds the permissible value + matching *value_name*, and returns its ``meaning`` as a full IRI + wrapped in angle brackets. Falls back to a quoted literal if the + slot is not an enum or the value lacks a ``meaning``. + """ + slot = sv.get_slot(slot_name) + if slot: + range_name = slot.range + if range_name and range_name in sv.all_enums(): + enum = sv.get_enum(range_name) + pv = enum.permissible_values.get(value_name) + if pv and pv.meaning: + iri = sv.expand_curie(pv.meaning) + return f"<{iri}>" + return f'"{value_name}"' + + def _slot_uri(self, sv, slot_name: str, cls: ClassDefinition) -> str: + """Resolve a slot name to a full IRI string for use in SPARQL queries. + + Mirrors the resolution logic used for ``sh:path`` in the main slot loop: + prefer ``sv.get_uri()`` for slots registered in the schema map, fall + back to ``default_prefix:underscored_name``. + """ + slot = sv.get_slot(slot_name) + if slot and slot_name in sv.element_by_schema_map(): + return sv.get_uri(slot, expand=True) + pfx = sv.schema.default_prefix + return sv.expand_curie(f"{pfx}:{underscore(slot_name)}") + def _add_class(self, func: Callable, r: ElementName) -> None: """Add an sh:class constraint for range class *r*. @@ -654,6 +888,17 @@ def add_simple_data_type(func: Callable, r: ElementName) -> None: 'Example: "{name} ({class}): {description} [{comments}]"' ), ) +@click.option( + "--emit-rules/--no-emit-rules", + default=True, + show_default=True, + help=( + "Emit sh:sparql constraints from LinkML rules: blocks. " + "When enabled (default), recognised rule patterns (e.g. boolean-guard) " + "are translated into SHACL-SPARQL constraints on the corresponding " + "sh:NodeShape. Use --no-emit-rules to suppress rule generation." + ), +) @click.version_option(__version__, "-V", "--version") def cli(yamlfile, **args): """Generate SHACL turtle from a LinkML model""" diff --git a/tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml b/tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml new file mode 100644 index 0000000000..f56c2eca6a --- /dev/null +++ b/tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml @@ -0,0 +1,70 @@ +id: https://example.org/boolean-guards +name: boolean_guard_rules +description: >- + Test schema for SHACL generation of sh:sparql constraints from LinkML rules. + Models the boolean-guard pattern where a boolean flag must be true if a + corresponding value property is present. + +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/boolean-guards/ + +imports: + - linkml:types + +default_prefix: ex +default_range: string + +slots: + WeatherWind: + description: Whether wind conditions are present. + range: boolean + slot_uri: ex:WeatherWind + weatherWindValue: + description: Wind speed value. + range: decimal + slot_uri: ex:weatherWindValue + WeatherRain: + description: Whether rain conditions are present. + range: boolean + slot_uri: ex:WeatherRain + weatherRainValue: + description: Rain intensity value. + range: decimal + slot_uri: ex:weatherRainValue + Temperature: + description: Ambient temperature. + range: decimal + slot_uri: ex:Temperature + +classes: + Environment: + description: Environmental conditions. + class_uri: ex:Environment + slots: + - WeatherWind + - weatherWindValue + - WeatherRain + - weatherRainValue + - Temperature + rules: + - description: >- + If weatherWindValue is provided, WeatherWind must be true. + preconditions: + slot_conditions: + weatherWindValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherWind: + equals_string: "true" + - description: >- + If weatherRainValue is provided, WeatherRain must be true. + preconditions: + slot_conditions: + weatherRainValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherRain: + equals_string: "true" diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index a3c6aa5aa0..e5a57a5f13 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -1219,7 +1219,6 @@ def _build_message_test_schema(): sb.add_defaults() return sb.schema - def _parse_shacl(schema, **kwargs): shacl = ShaclGenerator(schema, mergeimports=False, **kwargs).serialize() g = rdflib.Graph() @@ -1609,3 +1608,758 @@ def test_message_template_with_default_language(): # Verify the message is NOT a plain literal assert Literal("Validation of vehicle_name failed!") not in msgs +# --------------------------------------------------------------------------- +# --emit-rules / sh:sparql tests +# --------------------------------------------------------------------------- + +_RULES_SCHEMA_YAML = """ +id: https://example.org/boolean-guards +name: boolean_guard_rules +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/boolean-guards/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + WeatherWind: + range: boolean + slot_uri: ex:WeatherWind + weatherWindValue: + description: Wind speed value. + range: decimal + slot_uri: ex:weatherWindValue + WeatherRain: + range: boolean + slot_uri: ex:WeatherRain + weatherRainValue: + description: Rain intensity value. + range: decimal + slot_uri: ex:weatherRainValue + Temperature: + range: decimal + slot_uri: ex:Temperature +classes: + Environment: + class_uri: ex:Environment + slots: + - WeatherWind + - weatherWindValue + - WeatherRain + - weatherRainValue + - Temperature + rules: + - description: If weatherWindValue is provided, WeatherWind must be true. + preconditions: + slot_conditions: + weatherWindValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherWind: + equals_string: "true" + - description: If weatherRainValue is provided, WeatherRain must be true. + preconditions: + slot_conditions: + weatherRainValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherRain: + equals_string: "true" +""" + +EX_RULES = rdflib.Namespace("https://example.org/boolean-guards/") + + +def test_rule_boolean_guard_generates_sparql(): + """Boolean-guard rules produce sh:sparql constraints on the NodeShape.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 2, f"Expected 2 sh:sparql constraints, got {len(sparql_nodes)}" + + for node in sparql_nodes: + assert (node, RDF.type, SH.SPARQLConstraint) in g + selects = list(g.objects(node, SH.select)) + assert len(selects) == 1, "Each constraint must have exactly one sh:select" + query = str(selects[0]) + assert "$this" in query, "SPARQL must use $this pre-bound variable" + assert "OPTIONAL" in query, "SPARQL must use OPTIONAL for flag/value" + assert "FILTER" in query, "SPARQL must have a FILTER clause" + assert "BOUND" in query, "SPARQL must use BOUND()" + + +def test_rule_with_description_generates_message(): + """Rule description is emitted as sh:message on the SPARQLConstraint.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + + messages = set() + for node in sparql_nodes: + for msg in g.objects(node, SH.message): + messages.add(str(msg)) + + assert "If weatherWindValue is provided, WeatherWind must be true." in messages + assert "If weatherRainValue is provided, WeatherRain must be true." in messages + + +def test_rule_sparql_contains_correct_uris(): + """SPARQL queries reference the correct slot URIs.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + + queries = [str(list(g.objects(n, SH.select))[0]) for n in sparql_nodes] + all_sparql = "\n".join(queries) + + assert str(EX_RULES.WeatherWind) in all_sparql + assert str(EX_RULES.weatherWindValue) in all_sparql + assert str(EX_RULES.WeatherRain) in all_sparql + assert str(EX_RULES.weatherRainValue) in all_sparql + + +_DEACTIVATED_RULE_SCHEMA_YAML = """ +id: https://example.org/deactivated-test +name: deactivated_rule_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/deactivated-test/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + Flag: + range: boolean + slot_uri: ex:Flag + flagValue: + range: decimal + slot_uri: ex:flagValue +classes: + TestClass: + class_uri: ex:TestClass + slots: + - Flag + - flagValue + rules: + - description: This rule is deactivated. + deactivated: true + preconditions: + slot_conditions: + flagValue: + value_presence: PRESENT + postconditions: + slot_conditions: + Flag: + equals_string: "true" +""" + + +def test_rule_deactivated_skipped(): + """Deactivated rules do not produce sh:sparql constraints.""" + g = _parse_shacl(_DEACTIVATED_RULE_SCHEMA_YAML) + + shape = URIRef("https://example.org/deactivated-test/TestClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0, f"Deactivated rule should not emit sh:sparql, got {len(sparql_nodes)}" + + +_UNSUPPORTED_RULE_SCHEMA_YAML = """ +id: https://example.org/unsupported-test +name: unsupported_rule_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/unsupported-test/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + slotA: + range: string + slot_uri: ex:slotA + slotB: + range: string + slot_uri: ex:slotB +classes: + TestClass: + class_uri: ex:TestClass + slots: + - slotA + - slotB + rules: + - description: Rule with no postconditions. + preconditions: + slot_conditions: + slotA: + value_presence: PRESENT +""" + + +def test_rule_unsupported_pattern_skipped(): + """Unrecognised rule patterns are silently skipped (no sh:sparql emitted).""" + g = _parse_shacl(_UNSUPPORTED_RULE_SCHEMA_YAML) + + shape = URIRef("https://example.org/unsupported-test/TestClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0 + + +def test_rule_no_emit_rules_flag(): + """--no-emit-rules suppresses sh:sparql constraint generation.""" + g = _parse_shacl(_RULES_SCHEMA_YAML, emit_rules=False) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0, f"emit_rules=False should suppress rules, got {len(sparql_nodes)}" + + +_NO_RULES_SCHEMA_YAML = """ +id: https://example.org/no-rules +name: no_rules_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/no-rules/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + name: + range: string + slot_uri: ex:name +classes: + SimpleClass: + class_uri: ex:SimpleClass + slots: + - name +""" + + +def test_rule_no_rules_no_sparql(): + """Classes without rules: blocks produce no sh:sparql constraints.""" + g = _parse_shacl(_NO_RULES_SCHEMA_YAML) + + shape = URIRef("https://example.org/no-rules/SimpleClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0 + + +def test_rule_multiple_rules_per_class(): + """Multiple boolean-guard rules on one class produce multiple sh:sparql constraints.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 2 + + # Each constraint should reference different slot pairs + queries = [str(list(g.objects(n, SH.select))[0]) for n in sparql_nodes] + wind_query = [q for q in queries if "weatherWindValue" in q] + rain_query = [q for q in queries if "weatherRainValue" in q] + assert len(wind_query) == 1, "Expected exactly one wind query" + assert len(rain_query) == 1, "Expected exactly one rain query" + + +# --------------------------------------------------------------------------- +# Tests for URI resolution without explicit slot_uri +# --------------------------------------------------------------------------- + +_NO_SLOT_URI_SCHEMA_YAML = """ +id: https://example.org/no-slot-uri +name: no_slot_uri_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/no-slot-uri/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + is_active: + range: boolean + measured_value: + range: decimal +classes: + Reading: + class_uri: ex:Reading + slots: + - is_active + - measured_value + rules: + - description: If measured_value is provided, is_active must be true. + preconditions: + slot_conditions: + measured_value: + value_presence: PRESENT + postconditions: + slot_conditions: + is_active: + equals_string: "true" +""" + + +def test_rule_no_explicit_slot_uri(): + """Slots without explicit slot_uri resolve via default_prefix + underscore(name).""" + g = _parse_shacl(_NO_SLOT_URI_SCHEMA_YAML) + + shape = URIRef("https://example.org/no-slot-uri/Reading") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 1 + + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + # URIs should be default_prefix:underscore(name) + assert "https://example.org/no-slot-uri/is_active" in query + assert "https://example.org/no-slot-uri/measured_value" in query + + +# --------------------------------------------------------------------------- +# Tests for elseconditions rejection +# --------------------------------------------------------------------------- + +_ELSE_COND_SCHEMA_YAML = """ +id: https://example.org/else-test +name: else_cond_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/else-test/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + Flag: + range: boolean + slot_uri: ex:Flag + flagValue: + range: decimal + slot_uri: ex:flagValue + fallbackValue: + range: string + slot_uri: ex:fallbackValue +classes: + TestClass: + class_uri: ex:TestClass + slots: + - Flag + - flagValue + - fallbackValue + rules: + - description: Rule with elseconditions should be skipped. + preconditions: + slot_conditions: + flagValue: + value_presence: PRESENT + postconditions: + slot_conditions: + Flag: + equals_string: "true" + elseconditions: + slot_conditions: + fallbackValue: + value_presence: PRESENT +""" + + +def test_rule_with_elseconditions_emitted(): + """Rules with elseconditions now emit the forward (if/then) branch as sh:sparql.""" + g = _parse_shacl(_ELSE_COND_SCHEMA_YAML) + + shape = URIRef("https://example.org/else-test/TestClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) >= 1, "Rule with elseconditions should emit sh:sparql for the forward branch" + + +# --------------------------------------------------------------------------- +# SPARQL syntax validation +# --------------------------------------------------------------------------- + + +def test_rule_sparql_syntax_valid(): + """Generated SPARQL queries must be syntactically valid.""" + from rdflib.plugins.sparql import prepareQuery + + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) >= 1 + + for node in sparql_nodes: + query_text = str(list(g.objects(node, SH.select))[0]) + # prepareQuery validates SPARQL syntax; $this is a valid variable name + prepareQuery(query_text) + + +# =========================================================================== +# Exclusive-value pattern tests (SHACL §5 SPARQL constraints) +# =========================================================================== +# +# The "exclusive value" pattern translates a LinkML rule where: +# - preconditions: slot X has equals_string (a specific enum value name) +# - postconditions: same slot X has maximum_cardinality N +# +# Semantics: "If value V is present in multivalued slot X, then X has at most +# N values total." For N=1 this means V must be the sole value (mutual +# exclusion with other enum members). +# +# Generated SHACL: sh:SPARQLConstraint per W3C SHACL §5.3.1, using $this +# pre-bound to each focus node. +# +# References: +# - W3C SHACL §5 +# - W3C SHACL §5.3.1 +# - ISO 34503:2023, 9.3.6 (motivating use case: EdgeNone exclusivity) +# =========================================================================== + +_EXCLUSIVE_VALUE_SCHEMA_YAML = """ +id: https://example.org/exclusive-value +name: exclusive_value_rules +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/exclusive-value/ +imports: + - linkml:types +default_prefix: ex +default_range: string + +enums: + EdgeTypeEnum: + permissible_values: + EdgeNone: + meaning: ex:EdgeNone + EdgeBarriers: + meaning: ex:EdgeBarriers + EdgeMarkers: + meaning: ex:EdgeMarkers + + PriorityEnum: + permissible_values: + High: + description: High priority (no meaning IRI). + Medium: + description: Medium priority (no meaning IRI). + Low: + description: Low priority (no meaning IRI). + +slots: + edgeType: + range: EdgeTypeEnum + multivalued: true + slot_uri: ex:edgeType + priority: + range: PriorityEnum + multivalued: true + slot_uri: ex:priority + otherSlot: + range: string + slot_uri: ex:otherSlot + +classes: + Road: + class_uri: ex:Road + slots: + - edgeType + - otherSlot + rules: + - description: >- + EdgeNone is mutually exclusive with other edge types. + preconditions: + slot_conditions: + edgeType: + equals_string: "EdgeNone" + postconditions: + slot_conditions: + edgeType: + maximum_cardinality: 1 + + Intersection: + class_uri: ex:Intersection + slots: + - edgeType + rules: + - description: >- + EdgeNone allows at most 2 total edge values. + preconditions: + slot_conditions: + edgeType: + equals_string: "EdgeNone" + postconditions: + slot_conditions: + edgeType: + maximum_cardinality: 2 + + Task: + class_uri: ex:Task + slots: + - priority + rules: + - description: >- + High priority is exclusive (literal fallback test). + preconditions: + slot_conditions: + priority: + equals_string: "High" + postconditions: + slot_conditions: + priority: + maximum_cardinality: 1 + + MismatchedSlots: + class_uri: ex:MismatchedSlots + slots: + - edgeType + - otherSlot + rules: + - description: >- + Different slots in pre/post — not an exclusive-value pattern. + preconditions: + slot_conditions: + edgeType: + equals_string: "EdgeNone" + postconditions: + slot_conditions: + otherSlot: + maximum_cardinality: 1 +""" + +EX_EXCL = rdflib.Namespace("https://example.org/exclusive-value/") + + +def test_exclusive_value_generates_sparql(): + """Exclusive-value rules produce sh:sparql constraints on the NodeShape.""" + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Road + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 1, f"Expected 1 sh:sparql constraint, got {len(sparql_nodes)}" + + node = sparql_nodes[0] + assert (node, RDF.type, SH.SPARQLConstraint) in g + selects = list(g.objects(node, SH.select)) + assert len(selects) == 1, "Constraint must have exactly one sh:select" + + +def test_exclusive_value_sparql_uses_enum_iri(): + """SPARQL references the enum value's meaning IRI, not a string literal. + + Per the enum definition, EdgeNone has meaning: ex:EdgeNone which expands + to . The generated SPARQL + must use this full IRI in angle brackets. + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Road + sparql_nodes = list(g.objects(shape, SH.sparql)) + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + + edge_none_iri = str(EX_EXCL.EdgeNone) + assert f"<{edge_none_iri}>" in query, f"SPARQL must reference EdgeNone as full IRI <{edge_none_iri}>, got:\n{query}" + + +def test_exclusive_value_max_card_1_sparql_structure(): + """For maximum_cardinality: 1, SPARQL uses FILTER(?other != ). + + The query pattern for N=1 is: + SELECT $this WHERE { + $this . + $this ?other . + FILTER (?other != ) + } + + This is more efficient than the COUNT-based approach for the common + singleton exclusion case. + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Road + sparql_nodes = list(g.objects(shape, SH.sparql)) + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + + assert "$this" in query, "SPARQL must use $this pre-bound variable (SHACL §5.3.1)" + assert "FILTER" in query, "N=1 pattern must use FILTER for exclusion check" + assert "?other" in query, "N=1 pattern must bind ?other for comparison" + # Must NOT use COUNT for the N=1 case (simpler pattern) + assert "COUNT" not in query, "N=1 pattern should use FILTER, not COUNT" + # The slot URI must appear (property path) + assert str(EX_EXCL.edgeType) in query, "SPARQL must reference the slot URI" + + +def test_exclusive_value_max_card_gt1_sparql_structure(): + """For maximum_cardinality > 1, SPARQL uses COUNT-based subquery. + + The query pattern for N>1 is: + SELECT $this WHERE { + $this . + { + SELECT $this (COUNT(?val) AS ?count) + WHERE { $this ?val . } + GROUP BY $this + HAVING (?count > N) + } + } + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Intersection + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 1, f"Expected 1 sh:sparql constraint, got {len(sparql_nodes)}" + + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + + assert "$this" in query, "SPARQL must use $this pre-bound variable" + assert "COUNT" in query, "N>1 pattern must use COUNT" + assert "GROUP BY" in query, "N>1 pattern must GROUP BY $this" + assert "HAVING" in query, "N>1 pattern must use HAVING for count check" + assert "> 2" in query, "HAVING must check count > maximum_cardinality (2)" + + +def test_exclusive_value_no_meaning_falls_back_to_literal(): + """When enum values lack a meaning IRI, the value is compared as a literal. + + PriorityEnum values have no meaning field, so 'High' is used as a + quoted string in the SPARQL rather than an IRI in angle brackets. + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Task + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 1, f"Expected 1 sh:sparql constraint, got {len(sparql_nodes)}" + + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + + # Should use quoted literal, not angle-bracket IRI + assert '"High"' in query, f"No-meaning enum should use literal '\"High\"', got:\n{query}" + assert "" not in query, "Should not emit as IRI when meaning is absent" + + +def test_exclusive_value_different_slots_not_recognised(): + """Rules where pre/post reference different slots are NOT exclusive-value. + + The pattern requires the SAME slot in both preconditions and + postconditions. When they differ, the rule is unrecognised and + silently skipped (no sh:sparql emitted). + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.MismatchedSlots + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0, ( + f"Mismatched slots should not trigger exclusive-value pattern, got {len(sparql_nodes)}" + ) + + +def test_exclusive_value_message_from_description(): + """Rule description is emitted as sh:message on the SPARQLConstraint.""" + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Road + sparql_nodes = list(g.objects(shape, SH.sparql)) + messages = [str(m) for node in sparql_nodes for m in g.objects(node, SH.message)] + + assert any("EdgeNone is mutually exclusive" in m for m in messages), ( + f"Expected message about EdgeNone exclusivity, got: {messages}" + ) + + +def test_exclusive_value_sparql_syntax_valid(): + """Generated SPARQL for exclusive-value rules must be syntactically valid. + + Uses rdflib's prepareQuery() which validates SPARQL syntax. + $this is a valid SPARQL variable name per the grammar. + """ + from rdflib.plugins.sparql import prepareQuery + + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + for shape in (EX_EXCL.Road, EX_EXCL.Intersection, EX_EXCL.Task): + sparql_nodes = list(g.objects(shape, SH.sparql)) + for node in sparql_nodes: + query_text = str(list(g.objects(node, SH.select))[0]) + # prepareQuery validates SPARQL syntax + prepareQuery(query_text) + + +def test_exclusive_value_coexists_with_boolean_guard(): + """Exclusive-value and boolean-guard rules can coexist on the same class. + + When a class has both pattern types, both produce sh:sparql constraints. + """ + schema = """ +id: https://example.org/mixed-rules +name: mixed_rules +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/mixed-rules/ +imports: + - linkml:types +default_prefix: ex +default_range: string + +enums: + StatusEnum: + permissible_values: + None: + meaning: ex:None + Active: + meaning: ex:Active + +slots: + status: + range: StatusEnum + multivalued: true + slot_uri: ex:status + Flag: + range: boolean + slot_uri: ex:Flag + flagValue: + range: decimal + slot_uri: ex:flagValue + +classes: + Widget: + class_uri: ex:Widget + slots: + - status + - Flag + - flagValue + rules: + - description: None is exclusive. + preconditions: + slot_conditions: + status: + equals_string: "None" + postconditions: + slot_conditions: + status: + maximum_cardinality: 1 + - description: If flagValue present, Flag must be true. + preconditions: + slot_conditions: + flagValue: + value_presence: PRESENT + postconditions: + slot_conditions: + Flag: + equals_string: "true" +""" + g = _parse_shacl(schema) + + shape = URIRef("https://example.org/mixed-rules/Widget") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 2, ( + f"Expected 2 sh:sparql constraints (1 exclusive + 1 boolean guard), got {len(sparql_nodes)}" + ) + + queries = [str(list(g.objects(n, SH.select))[0]) for n in sparql_nodes] + # One should have FILTER(?other != ...) pattern, the other BOUND pattern + has_exclusive = any("?other" in q for q in queries) + has_boolean = any("BOUND" in q for q in queries) + assert has_exclusive, "Expected one exclusive-value SPARQL constraint" + assert has_boolean, "Expected one boolean-guard SPARQL constraint" From 247f4b8cfecb8d3922350dbd2b9abf71838ae7c6 Mon Sep 17 00:00:00 2001 From: jdsika Date: Fri, 8 May 2026 13:00:16 +0200 Subject: [PATCH 06/15] feat(gen-shacl): add exclusive-value SPARQL rule pattern --- tests/linkml/test_generators/test_shaclgen.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index e5a57a5f13..81ab43b888 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -2163,6 +2163,7 @@ def test_exclusive_value_sparql_uses_enum_iri(): assert f"<{edge_none_iri}>" in query, f"SPARQL must reference EdgeNone as full IRI <{edge_none_iri}>, got:\n{query}" + def test_exclusive_value_max_card_1_sparql_structure(): """For maximum_cardinality: 1, SPARQL uses FILTER(?other != ). From 90872b176ea3e1b8caaa0366a958e9685332e928 Mon Sep 17 00:00:00 2001 From: Carlo van Driesten Date: Tue, 12 May 2026 11:38:58 +0200 Subject: [PATCH 07/15] fix(shaclgen): emit sh:minCount/maxCount 0 for zero cardinality values Python truthiness check `if s.maximum_cardinality:` evaluates to False when the value is 0 (an integer), silently skipping sh:maxCount 0 emission. The same bug affected minimum_cardinality and exact_cardinality. Replace all three truthiness checks with explicit `is not None` guards: - `if s.minimum_cardinality is not None:` - `if s.maximum_cardinality is not None:` - `elif s.exact_cardinality is not None:` (two occurrences) Add regression tests: - test_zero_maximum_cardinality_emits_maxcount - test_zero_exact_cardinality_emits_both_counts This is the primary mechanism for suppressing inherited slots on subclasses via slot_usage (OWL maxCardinality 0 pattern). Signed-off-by: Carlo van Driesten --- .../linkml/src/linkml/generators/shaclgen.py | 8 +- .../input/shaclgen/cardinality.yaml | 25 +++++++ tests/linkml/test_generators/test_shaclgen.py | 75 +++++++++++++++++++ 3 files changed, 104 insertions(+), 4 deletions(-) diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index 1de99eaef4..5da16b2901 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -285,9 +285,9 @@ def prop_pv_text(p, v): if msg_text: prop_pv_text(SH.message, msg_text) # minCount - if s.minimum_cardinality: + if s.minimum_cardinality is not None: prop_pv_literal(SH.minCount, s.minimum_cardinality) - elif s.exact_cardinality: + elif s.exact_cardinality is not None: prop_pv_literal(SH.minCount, s.exact_cardinality) # Identifiers map to the node's IRI rather than a property triple, # so there's no arc to constrain with sh:minCount 1 — emitting it @@ -295,9 +295,9 @@ def prop_pv_text(p, v): elif s.required and not s.identifier: prop_pv_literal(SH.minCount, 1) # maxCount - if s.maximum_cardinality: + if s.maximum_cardinality is not None: prop_pv_literal(SH.maxCount, s.maximum_cardinality) - elif s.exact_cardinality: + elif s.exact_cardinality is not None: prop_pv_literal(SH.maxCount, s.exact_cardinality) elif not s.multivalued: prop_pv_literal(SH.maxCount, 1) diff --git a/tests/linkml/test_generators/input/shaclgen/cardinality.yaml b/tests/linkml/test_generators/input/shaclgen/cardinality.yaml index 6bacffa680..86f88c4f60 100644 --- a/tests/linkml/test_generators/input/shaclgen/cardinality.yaml +++ b/tests/linkml/test_generators/input/shaclgen/cardinality.yaml @@ -17,6 +17,23 @@ classes: slots: - list_exact_size + ParentClass: + slots: + - inherited_slot + - restricted_slot + + ChildWithZeroMaxCard: + is_a: ParentClass + slot_usage: + restricted_slot: + maximum_cardinality: 0 + + ChildWithZeroExactCard: + is_a: ParentClass + slot_usage: + restricted_slot: + exact_cardinality: 0 + slots: list_min_max_size: range: integer @@ -28,3 +45,11 @@ slots: range: integer multivalued: true exact_cardinality: 3 + + inherited_slot: + range: string + multivalued: true + + restricted_slot: + range: string + multivalued: true diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index 81ab43b888..ebabd7edbf 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -570,6 +570,81 @@ def test_multivalued_slot_exact_cardinality(input_path): ) in g +def test_zero_maximum_cardinality_emits_maxcount(input_path): + """Test that maximum_cardinality: 0 correctly emits sh:maxCount 0. + + Regression test for the bug where Python truthiness check + `if s.maximum_cardinality:` would skip the value 0 (falsy), + failing to emit sh:maxCount 0 in the generated SHACL shape. + The fix uses `if s.maximum_cardinality is not None:` instead. + + This is the primary mechanism for suppressing inherited slots on + subclasses via slot_usage (e.g., OWL maxCardinality 0 pattern). + """ + shacl = ShaclGenerator(input_path("shaclgen/cardinality.yaml"), mergeimports=True).serialize() + + g = rdflib.Graph() + g.parse(data=shacl) + + # Find the ChildWithZeroMaxCard shape + child_uri = URIRef("https://w3id.org/linkml/examples/cardinality/ChildWithZeroMaxCard") + restricted_slot_uri = URIRef("https://w3id.org/linkml/examples/cardinality/restricted_slot") + + # Get all property shapes for the child class + prop_nodes = list(g.objects(child_uri, SH.property)) + assert prop_nodes, "ChildWithZeroMaxCard should have property shapes" + + # Find the property shape for restricted_slot + restricted_prop_node = None + for pn in prop_nodes: + if (pn, SH.path, restricted_slot_uri) in g: + restricted_prop_node = pn + break + assert restricted_prop_node is not None, "Should have a property shape for restricted_slot" + + # The critical assertion: sh:maxCount 0 must be emitted + max_count_values = list(g.objects(restricted_prop_node, SH.maxCount)) + assert len(max_count_values) == 1, f"Expected exactly one sh:maxCount, got {max_count_values}" + assert max_count_values[0] == rdflib.term.Literal( + 0, datatype=rdflib.term.URIRef("http://www.w3.org/2001/XMLSchema#integer") + ), f"sh:maxCount should be 0, got {max_count_values[0]}" + + +def test_zero_exact_cardinality_emits_both_counts(input_path): + """Test that exact_cardinality: 0 emits both sh:minCount 0 and sh:maxCount 0. + + Same truthiness bug as maximum_cardinality: `if s.exact_cardinality:` + skips value 0 (falsy). The fix uses `is not None` instead. + """ + shacl = ShaclGenerator(input_path("shaclgen/cardinality.yaml"), mergeimports=True).serialize() + + g = rdflib.Graph() + g.parse(data=shacl) + + child_uri = URIRef("https://w3id.org/linkml/examples/cardinality/ChildWithZeroExactCard") + restricted_slot_uri = URIRef("https://w3id.org/linkml/examples/cardinality/restricted_slot") + + prop_nodes = list(g.objects(child_uri, SH.property)) + assert prop_nodes, "ChildWithZeroExactCard should have property shapes" + + restricted_prop_node = None + for pn in prop_nodes: + if (pn, SH.path, restricted_slot_uri) in g: + restricted_prop_node = pn + break + assert restricted_prop_node is not None, "Should have a property shape for restricted_slot" + + XSD_INT = rdflib.term.URIRef("http://www.w3.org/2001/XMLSchema#integer") + + min_count_values = list(g.objects(restricted_prop_node, SH.minCount)) + assert len(min_count_values) == 1, f"Expected exactly one sh:minCount, got {min_count_values}" + assert min_count_values[0] == rdflib.term.Literal(0, datatype=XSD_INT) + + max_count_values = list(g.objects(restricted_prop_node, SH.maxCount)) + assert len(max_count_values) == 1, f"Expected exactly one sh:maxCount, got {max_count_values}" + assert max_count_values[0] == rdflib.term.Literal(0, datatype=XSD_INT) + + def test_exclude_imports(input_path): shacl = ShaclGenerator( input_path("shaclgen/exclude_imports.yaml"), mergeimports=True, exclude_imports=True From 8a53714b454dd7ea998643b2736c0fb8125f8da6 Mon Sep 17 00:00:00 2001 From: Carlo van Driesten Date: Thu, 7 May 2026 13:58:58 +0200 Subject: [PATCH 08/15] fix(shaclgen): emit sh:pattern for pattern constraints inside any_of MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SHACL generator translated any_of branches by dispatching solely on `any.range` (class, type, enum, or simple datatype). If a branch specified `pattern:` — either alone or combined with a range — the constraint was silently dropped, producing an empty blank node `[ ]` (trivially satisfied) instead of the intended `[ sh:pattern "..." ]`. This is a problem for schemas that use pattern alternatives in `any_of`, such as the SPDX license field where valid values are either members of a fixed enum (SPDX identifiers), IRIs, or custom identifiers matching the LicenseRef- pattern defined in SPDX Specification v2.3 Annex D (ABNF: license-ref = ["DocumentRef-"(idstring)":"]"LicenseRef-"(idstring)). The fix adds a single check after the range dispatch: if any.pattern: g.add((range_list[-1], SH.pattern, Literal(any.pattern))) This correctly handles: - Pattern-only branches (no range): node gets only sh:pattern - Range + pattern branches: node gets both sh:datatype and sh:pattern - Range-only branches (no pattern): unchanged behaviour The test suite now includes a dedicated schema exercising all three cases, with assertions on both the generated RDF triples and pyshacl validation of conforming/non-conforming data. Signed-off-by: Carlo van Driesten --- .../linkml/src/linkml/generators/shaclgen.py | 5 ++ .../input/shaclgen/any_of_pattern.yaml | 59 +++++++++++++++++ tests/linkml/test_generators/test_shaclgen.py | 65 +++++++++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 tests/linkml/test_generators/input/shaclgen/any_of_pattern.yaml diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index 5da16b2901..ddabe8114f 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -353,6 +353,11 @@ def st_node_pv(p, v): add_simple_data_type(st_node_pv, r) range_list.append(st_node) + # Propagate pattern constraint to the branch node. + # A branch may combine range + pattern (e.g. range: string + # with pattern: "^...") or specify pattern alone (no range). + if any.pattern: + g.add((range_list[-1], SH.pattern, Literal(any.pattern))) Collection(g, or_node, range_list) else: prop_pv_literal(SH.hasValue, s.equals_number) diff --git a/tests/linkml/test_generators/input/shaclgen/any_of_pattern.yaml b/tests/linkml/test_generators/input/shaclgen/any_of_pattern.yaml new file mode 100644 index 0000000000..5b247bb2a1 --- /dev/null +++ b/tests/linkml/test_generators/input/shaclgen/any_of_pattern.yaml @@ -0,0 +1,59 @@ +id: https://w3id.org/linkml/examples/any_of_pattern +name: test_any_of_pattern +description: >- + Test schema for pattern constraints inside any_of branches. + Exercises three cases: (1) pattern-only branch (no range), + (2) range + pattern on the same branch, (3) mixed branches + where some have pattern and some do not. +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://w3id.org/linkml/examples/any_of_pattern/ +imports: + - linkml:types +default_range: string +default_prefix: ex + +enums: + LicenseEnum: + permissible_values: + MIT: + Apache-2.0: + GPL-3.0-only: + +classes: + PatternOnlyBranch: + description: >- + A class where one any_of branch specifies only a pattern + (no range). The generated SHACL sh:or should contain a + node with sh:pattern but no sh:datatype or sh:class. + attributes: + license: + any_of: + - range: LicenseEnum + - range: uri + - pattern: "^LicenseRef-[a-zA-Z0-9\\-\\.]+$" + + RangeWithPattern: + description: >- + A class where an any_of branch combines range + pattern. + The generated SHACL sh:or node should have both sh:datatype + and sh:pattern. + attributes: + identifier: + any_of: + - range: string + pattern: "^[A-Z]{2}-[0-9]{4}$" + - range: integer + + MixedBranches: + description: >- + A class with three any_of branches: one with range only, + one with pattern only, one with range + pattern. Ensures + pattern is emitted only on branches that declare it. + attributes: + code: + any_of: + - range: integer + - pattern: "^CUSTOM-.*$" + - range: string + pattern: "^STD-[0-9]+$" diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index ebabd7edbf..0300449ace 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -2439,3 +2439,68 @@ def test_exclusive_value_coexists_with_boolean_guard(): has_boolean = any("BOUND" in q for q in queries) assert has_exclusive, "Expected one exclusive-value SPARQL constraint" assert has_boolean, "Expected one boolean-guard SPARQL constraint" + + +def test_any_of_with_pattern(input_path): + """Test that pattern constraints inside any_of branches emit sh:pattern. + + Exercises three cases: + 1. PatternOnlyBranch: any_of with a pattern-only branch (no range) + 2. RangeWithPattern: any_of with range + pattern on the same branch + 3. MixedBranches: combination of range-only, pattern-only, and range+pattern + """ + shacl = ShaclGenerator(input_path("shaclgen/any_of_pattern.yaml"), mergeimports=True).serialize() + g = rdflib.Graph() + g.parse(data=shacl) + + def get_or_branch_nodes(class_uri: str, slot_local: str) -> list[rdflib.BNode]: + """Return the list of BNodes inside sh:or for a given class property.""" + class_ref = URIRef(class_uri) + for prop_node in g.objects(class_ref, SH.property): + paths = list(g.objects(prop_node, SH.path)) + if any(slot_local in str(p) for p in paths): + for or_head in g.objects(prop_node, SH["or"]): + return list(Collection(g, or_head)) + return [] + + prefix = "https://w3id.org/linkml/examples/any_of_pattern/" + + # Case 1: PatternOnlyBranch — license slot has 3 branches: + # [enum sh:in], [sh:nodeKind sh:IRI], [sh:pattern "^LicenseRef-..."] + branches = get_or_branch_nodes(f"{prefix}PatternOnlyBranch", "license") + assert len(branches) == 3, f"Expected 3 branches, got {len(branches)}" + # Find the branch with sh:pattern + pattern_branches = [b for b in branches if list(g.objects(b, SH.pattern))] + assert len(pattern_branches) == 1, f"Expected 1 pattern branch, got {len(pattern_branches)}" + pattern_val = str(list(g.objects(pattern_branches[0], SH.pattern))[0]) + assert pattern_val == "^LicenseRef-[a-zA-Z0-9\\-\\.]+$" + # The pattern-only branch should NOT have sh:datatype or sh:class + assert list(g.objects(pattern_branches[0], SH.datatype)) == [] + assert list(g.objects(pattern_branches[0], SH["class"])) == [] + + # Case 2: RangeWithPattern — identifier slot has 2 branches: + # [sh:datatype xsd:string + sh:pattern "^[A-Z]{2}-[0-9]{4}$"], [sh:datatype xsd:integer] + branches = get_or_branch_nodes(f"{prefix}RangeWithPattern", "identifier") + assert len(branches) == 2, f"Expected 2 branches, got {len(branches)}" + # Find branch with both datatype and pattern + combo_branches = [b for b in branches if list(g.objects(b, SH.datatype)) and list(g.objects(b, SH.pattern))] + assert len(combo_branches) == 1, f"Expected 1 combo branch, got {len(combo_branches)}" + assert str(list(g.objects(combo_branches[0], SH.pattern))[0]) == "^[A-Z]{2}-[0-9]{4}$" + # The other branch (integer) should NOT have sh:pattern + int_branches = [b for b in branches if b not in combo_branches] + assert list(g.objects(int_branches[0], SH.pattern)) == [] + + # Case 3: MixedBranches — code slot has 3 branches: + # [sh:datatype xsd:integer], [sh:pattern "^CUSTOM-.*$"], [sh:datatype xsd:string + sh:pattern "^STD-[0-9]+$"] + branches = get_or_branch_nodes(f"{prefix}MixedBranches", "code") + assert len(branches) == 3, f"Expected 3 branches, got {len(branches)}" + # Exactly 2 branches should have sh:pattern + pattern_branches = [b for b in branches if list(g.objects(b, SH.pattern))] + assert len(pattern_branches) == 2, f"Expected 2 pattern branches, got {len(pattern_branches)}" + # Collect the patterns + patterns = sorted(str(list(g.objects(b, SH.pattern))[0]) for b in pattern_branches) + assert patterns == ["^CUSTOM-.*$", "^STD-[0-9]+$"] + # The integer-only branch should have no pattern + no_pattern = [b for b in branches if not list(g.objects(b, SH.pattern))] + assert len(no_pattern) == 1 + assert list(g.objects(no_pattern[0], SH.datatype)) == [URIRef("http://www.w3.org/2001/XMLSchema#integer")] From 3eee11487e30735746bbf1c14ba8dfd917feb2ed Mon Sep 17 00:00:00 2001 From: Nico Matentzoglu Date: Fri, 17 Apr 2026 13:54:59 +0300 Subject: [PATCH 09/15] Canonicalise all RDF outputs using pyoxygraph --- packages/linkml/pyproject.toml | 1 - .../linkml/src/linkml/generators/owlgen.py | 6 +- .../linkml/src/linkml/generators/rdfgen.py | 15 +- .../linkml/src/linkml/generators/shaclgen.py | 6 +- .../linkml/src/linkml/generators/shexgen.py | 4 +- packages/linkml_runtime/pyproject.toml | 1 + .../src/linkml_runtime/dumpers/rdf_dumper.py | 5 +- .../linkml_runtime/dumpers/rdflib_dumper.py | 5 +- .../linkml_runtime/utils/rdf_canonicalize.py | 133 +++++++++ tests/linkml/test_compliance/helper.py | 3 +- tests/linkml/test_generators/test_shaclgen.py | 13 +- tests/linkml/test_issues/conftest.py | 4 +- tests/linkml/test_notebooks/input/examples.py | 3 +- tests/linkml/test_scripts/test_gen_jsonld.py | 7 +- tests/linkml/test_utils/test_uri_and_curie.py | 7 +- tests/linkml/utils/compare_rdf.py | 3 +- tests/linkml_runtime/support/compare_rdf.py | 3 +- .../test_utils/test_metamodelcore.py | 3 +- .../test_utils/test_rdf_canonicalize.py | 131 +++++++++ uv.lock | 257 +++--------------- 20 files changed, 366 insertions(+), 244 deletions(-) create mode 100644 packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py create mode 100644 tests/linkml_runtime/test_utils/test_rdf_canonicalize.py diff --git a/packages/linkml/pyproject.toml b/packages/linkml/pyproject.toml index 52f58d32da..3bc3d4c2c5 100644 --- a/packages/linkml/pyproject.toml +++ b/packages/linkml/pyproject.toml @@ -81,7 +81,6 @@ tests = [ { include-group = "lint" }, { include-group = "typing" }, { include-group = "shacl" }, - "morph-kgc >= 2.9.0; python_version >= '3.10'" ] dev = [ {include-group = "tests" }, diff --git a/packages/linkml/src/linkml/generators/owlgen.py b/packages/linkml/src/linkml/generators/owlgen.py index 1d69890c0b..698db904ef 100644 --- a/packages/linkml/src/linkml/generators/owlgen.py +++ b/packages/linkml/src/linkml/generators/owlgen.py @@ -18,6 +18,8 @@ from rdflib.plugin import Parser as rdflib_Parser from rdflib.plugin import plugins as rdflib_plugins +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph + from linkml import METAMODEL_NAMESPACE_NAME from linkml._version import __version__ from linkml.generators.common.subproperty import is_xsd_anyuri_range @@ -374,8 +376,8 @@ def serialize(self, **kwargs: Any) -> str: :return: """ self.as_graph() - data = self.graph.serialize(format="turtle" if self.format in ["owl", "ttl"] else self.format) - return data + fmt = "turtle" if self.format in ["owl", "ttl"] else self.format + return canonicalize_rdf_graph(self.graph, output_format=fmt) def add_metadata(self, e: Definition | PermissibleValue, uri: URIRef) -> None: """ diff --git a/packages/linkml/src/linkml/generators/rdfgen.py b/packages/linkml/src/linkml/generators/rdfgen.py index 22ba6031c2..28e0cf6a9c 100644 --- a/packages/linkml/src/linkml/generators/rdfgen.py +++ b/packages/linkml/src/linkml/generators/rdfgen.py @@ -15,6 +15,8 @@ from rdflib.plugin import Parser as rdflib_Parser from rdflib.plugin import plugins as rdflib_plugins +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph + from linkml import LOCAL_METAMODEL_LDCONTEXT_FILE from linkml._version import __version__ from linkml.generators.jsonldgen import JSONLDGenerator @@ -44,7 +46,8 @@ def __post_init__(self): super().__post_init__() def _data(self, g: Graph) -> str: - return g.serialize(format="turtle" if self.format == "ttl" else self.format) + fmt = "turtle" if self.format == "ttl" else self.format + return canonicalize_rdf_graph(g, output_format=fmt) def end_schema(self, output: str | None = None, context: str = None, **_) -> str: gen = JSONLDGenerator( @@ -68,15 +71,7 @@ def end_schema(self, output: str | None = None, context: str = None, **_) -> str prefix=True, ) if output: - # Binary-safe when -o/--output is used: - # delegate to RDFLib (Graph.serialize(destination=..., format=...)). - # Serializers that produce bytes write directly to the file; stdout stays empty. - fmt = "turtle" if self.format == "ttl" else self.format - try: - out = graph.serialize(format=fmt) - except UnicodeDecodeError: - graph.serialize(destination=output, format=fmt) - return "" + out = self._data(graph) with open(output, "w", encoding="UTF-8") as outf: outf.write(out) return out diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index ddabe8114f..35febccf8b 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -10,6 +10,8 @@ from rdflib.collection import Collection from rdflib.namespace import RDF, RDFS, SH, XSD +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph + from linkml._version import __version__ from linkml.generators.common.subproperty import get_subproperty_values, is_uri_range from linkml.generators.shacl.shacl_data_type import ShaclDataType @@ -182,8 +184,8 @@ def generate_header(self) -> str: def serialize(self, **args) -> str: g = self.as_graph() - data = g.serialize(format="turtle" if self.format in ["owl", "ttl"] else self.format) - return data + fmt = "turtle" if self.format in ["owl", "ttl"] else self.format + return canonicalize_rdf_graph(g, output_format=fmt) def as_graph(self) -> Graph: sv = self.schemaview diff --git a/packages/linkml/src/linkml/generators/shexgen.py b/packages/linkml/src/linkml/generators/shexgen.py index 387cff8eb3..093778d019 100644 --- a/packages/linkml/src/linkml/generators/shexgen.py +++ b/packages/linkml/src/linkml/generators/shexgen.py @@ -11,6 +11,8 @@ from ShExJSG.SchemaWithContext import Schema from ShExJSG.ShExJ import IRIREF, EachOf, NodeConstraint, Shape, ShapeOr, TripleConstraint +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph + from linkml import METAMODEL_NAMESPACE, METAMODEL_NAMESPACE_NAME from linkml._version import __version__ from linkml.generators.common.subproperty import get_subproperty_values @@ -176,7 +178,7 @@ def end_schema(self, output: str | None = None, **_) -> str: g = Graph() g.parse(data=shex, format="json-ld", version="1.1") g.bind("owl", OWL) - shex = g.serialize(format="turtle") + shex = canonicalize_rdf_graph(g, output_format="turtle") elif self.format == "shex": g = Graph() self.namespaces.load_graph(g) diff --git a/packages/linkml_runtime/pyproject.toml b/packages/linkml_runtime/pyproject.toml index d3dc9a7682..fd738a2ca1 100644 --- a/packages/linkml_runtime/pyproject.toml +++ b/packages/linkml_runtime/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ "requests", "prefixmaps >=0.1.4", "curies >=0.5.4", + "pyoxigraph >=0.5.6", "pydantic >=1.10.2, <3.0.0", "isodate >=0.7.2, <1.0.0; python_version < '3.11'", ] diff --git a/packages/linkml_runtime/src/linkml_runtime/dumpers/rdf_dumper.py b/packages/linkml_runtime/src/linkml_runtime/dumpers/rdf_dumper.py index 0c6ab4a856..c28f1da691 100644 --- a/packages/linkml_runtime/src/linkml_runtime/dumpers/rdf_dumper.py +++ b/packages/linkml_runtime/src/linkml_runtime/dumpers/rdf_dumper.py @@ -7,6 +7,7 @@ from linkml_runtime.dumpers.dumper_root import Dumper from linkml_runtime.utils.context_utils import CONTEXT_TYPE, CONTEXTS_PARAM_TYPE from linkml_runtime.utils.formatutils import remove_empty_items +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.yamlutils import YAMLRoot @@ -101,4 +102,6 @@ def dumps( """ if isinstance(element, BaseModel): element = element.model_dump() - return self.as_rdf_graph(remove_empty_items(element, hide_protected_keys=True), contexts).serialize(format=fmt) + return canonicalize_rdf_graph( + self.as_rdf_graph(remove_empty_items(element, hide_protected_keys=True), contexts), output_format=fmt + ) diff --git a/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py b/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py index 82f568d6dd..8362871e6d 100644 --- a/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py +++ b/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py @@ -9,7 +9,8 @@ from rdflib.term import BNode, Literal, Node from linkml_runtime.dumpers.dumper_root import Dumper -from linkml_runtime.linkml_model import ElementName, PermissibleValue, SlotDefinition +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph +from linkml_runtime.linkml_model import ElementName, PermissibleValue, PermissibleValueText, SlotDefinition from linkml_runtime.utils.schemaview import SchemaView from linkml_runtime.utils.yamlutils import YAMLRoot @@ -196,7 +197,7 @@ def dumps( :param prefix_map: :return: serialization of rdflib Graph containing element """ - return self.as_rdf_graph(element, schemaview, prefix_map=prefix_map).serialize(format=fmt) + return canonicalize_rdf_graph(self.as_rdf_graph(element, schemaview, prefix_map=prefix_map), output_format=fmt) def _as_uri(self, element_id: str, id_slot: SlotDefinition | None, schemaview: SchemaView) -> URIRef: if id_slot and schemaview.is_slot_percent_encoded(id_slot): diff --git a/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py new file mode 100644 index 0000000000..4ff4a08389 --- /dev/null +++ b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py @@ -0,0 +1,133 @@ +"""Deterministic RDF serialization via pyoxigraph RDFC-1.0 canonicalization. + +This module provides a function to canonicalize an rdflib Graph using +pyoxigraph's RDFC-1.0 implementation, producing deterministic output +with stable blank node labels and sorted triples. + +**Known limitations:** + +1. **xsd:string normalization**: pyoxigraph follows RDF 1.1, where plain + string literals and ``"text"^^xsd:string`` are identical. The output + will never contain explicit ``^^xsd:string`` annotations. Code that + re-parses the output with rdflib will see ``Literal("x")`` (datatype + ``None``) rather than ``Literal("x", datatype=XSD.string)``. + +2. **Non-standard RDF**: Graphs with literal predicates (e.g. SHACL + annotation mode) are rejected by pyoxigraph. This function falls + back to rdflib's serializer for such graphs. + +3. **Numeric short forms**: pyoxigraph uses Turtle short forms for + ``xsd:integer`` (``42``), ``xsd:boolean`` (``true``), and + ``xsd:decimal`` (``1.23``). rdflib parses these back with the + correct datatype, so this is lossless. + +4. **Base IRI / prefix collision**: When a graph has ``@base`` and a + prefix whose namespace equals the base IRI (e.g. rdflib's auto-bound + ``base:`` prefix), pyoxigraph emits CURIEs like ``base:label`` that + rdflib rejects. We skip such prefixes during serialization. +""" + +import io +import logging +from typing import Optional + +import pyoxigraph as ox +import rdflib + +logger = logging.getLogger(__name__) + +# Mapping from rdflib/LinkML format strings to pyoxigraph RdfFormat objects. +_FORMAT_MAP: dict[str, ox.RdfFormat] = { + "turtle": ox.RdfFormat.TURTLE, + "ttl": ox.RdfFormat.TURTLE, + "nt": ox.RdfFormat.N_TRIPLES, + "ntriples": ox.RdfFormat.N_TRIPLES, + "n-triples": ox.RdfFormat.N_TRIPLES, + "nt11": ox.RdfFormat.N_TRIPLES, + "nquads": ox.RdfFormat.N_QUADS, + "n-quads": ox.RdfFormat.N_QUADS, + "xml": ox.RdfFormat.RDF_XML, + "rdf/xml": ox.RdfFormat.RDF_XML, + "trig": ox.RdfFormat.TRIG, + "n3": ox.RdfFormat.N3, +} + +# Formats that support prefix declarations. +_PREFIX_FORMATS = frozenset({ox.RdfFormat.TURTLE, ox.RdfFormat.TRIG, ox.RdfFormat.N3, ox.RdfFormat.RDF_XML}) + + +def canonicalize_rdf_graph( + graph: rdflib.Graph, + output_format: str = "turtle", +) -> str: + """Serialize an rdflib Graph deterministically using RDFC-1.0 canonicalization. + + The graph is transferred to pyoxigraph via N-Triples, canonicalized + with RDFC-1.0, sorted, and serialized back to the requested format. + Prefix bindings from the rdflib Graph are preserved in the output + for formats that support them (Turtle, TriG, N3, RDF/XML). + + Falls back to plain rdflib serialization for unsupported formats or + graphs containing non-standard RDF (e.g. literal predicates). + + :param graph: The rdflib Graph to serialize. + :param output_format: Target serialization format (e.g. ``"turtle"``, ``"nt"``). + :return: Deterministic string serialization of the graph. + """ + ox_format = _FORMAT_MAP.get(output_format.lower()) + if ox_format is None: + logger.warning( + "pyoxigraph does not support format %r; falling back to rdflib serializer", + output_format, + ) + return graph.serialize(format=output_format) + + # 1. Transfer rdflib graph to pyoxigraph via N-Triples. + nt_data = graph.serialize(format="nt") + nt_bytes = nt_data.encode("utf-8") if isinstance(nt_data, str) else nt_data + + # 2. Parse into pyoxigraph and build a Dataset for canonicalization. + # Fall back to rdflib if the graph contains non-standard RDF + # (e.g. literal predicates from annotations) that pyoxigraph rejects. + try: + triples = list(ox.parse(io.BytesIO(nt_bytes), format=ox.RdfFormat.N_TRIPLES)) + except SyntaxError: + logger.warning( + "Graph contains non-standard RDF that pyoxigraph cannot parse; " + "falling back to rdflib serializer" + ) + return graph.serialize(format=output_format) + + dataset = ox.Dataset() + for triple in triples: + dataset.add(ox.Quad(triple.subject, triple.predicate, triple.object, ox.DefaultGraph())) + + # 3. Canonicalize blank node labels with RDFC-1.0. + dataset.canonicalize(ox.CanonicalizationAlgorithm.RDFC_1_0) + + # 4. Sort triples for deterministic ordering. + quads = list(dataset) + sorted_triples = sorted( + (ox.Triple(q.subject, q.predicate, q.object) for q in quads), + key=lambda t: (str(t.subject), str(t.predicate), str(t.object)), + ) + + # 5. Collect prefixes for formats that support them. + base_iri = str(graph.base) if graph.base else None + prefixes: Optional[dict[str, str]] = None + if ox_format in _PREFIX_FORMATS: + prefixes = {} + for prefix, namespace in graph.namespace_manager.namespaces(): + if not prefix: # skip empty prefix (base) + continue + ns_str = str(namespace) + # Skip prefixes whose namespace matches the base IRI to avoid + # pyoxigraph emitting CURIEs like `base:label` that conflict + # with the @base directive. + if base_iri and ns_str == base_iri: + continue + prefixes[str(prefix)] = ns_str + result_bytes = ox.serialize( + sorted_triples, format=ox_format, prefixes=prefixes, base_iri=base_iri, + ) + return result_bytes.decode("utf-8") diff --git a/tests/linkml/test_compliance/helper.py b/tests/linkml/test_compliance/helper.py index 5f156ee5bb..5476882278 100644 --- a/tests/linkml/test_compliance/helper.py +++ b/tests/linkml/test_compliance/helper.py @@ -31,6 +31,7 @@ from linkml_runtime.loaders import rdflib_loader from linkml_runtime.utils.compile_python import compile_python from linkml_runtime.utils.introspection import package_schemaview +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.yamlutils import YAMLRoot from .dataframe_helper import check_data_pandera @@ -979,7 +980,7 @@ def _convert_data_to_rdf(schema: dict, instance: dict, target_class: str, ttl_pa "P": "http://example.org/P/", }, ) - ttl_output = g.serialize(format="turtle") + ttl_output = canonicalize_rdf_graph(g, output_format="turtle") g = rdflib.Graph() g.parse(data=ttl_output, format="turtle") _roundtripped = rdflib_loader.load(ttl_output, target_class=py_cls, schemaview=schemaview) diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index 0300449ace..80f3b7fb2a 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -378,7 +378,18 @@ def test_ifabsent(input_path): def check_slot_default_value(slot: URIRef, default_value: Any, datatype: str = None) -> None: for subject, predicate, object in g.triples((None, SH.path, slot)): - assert (subject, SH.defaultValue, Literal(default_value, datatype=datatype)) in g + # pyoxigraph's RDFC-1.0 serialization drops explicit ^^xsd:string + # per RDF 1.1 (plain literals and xsd:string are equivalent). + # Accept either form for xsd:string typed values. + expected = Literal(default_value, datatype=datatype) + if (subject, SH.defaultValue, expected) in g: + return + if datatype and str(datatype) == "http://www.w3.org/2001/XMLSchema#string": + if (subject, SH.defaultValue, Literal(default_value)) in g: + return + raise AssertionError( + f"Expected ({subject}, sh:defaultValue, {expected!r}) not found in graph" + ) check_slot_default_value( URIRef("https://w3id.org/linkml/tests/kitchen_sink/ifabsent_string"), diff --git a/tests/linkml/test_issues/conftest.py b/tests/linkml/test_issues/conftest.py index 88d07118d9..3e33633d55 100644 --- a/tests/linkml/test_issues/conftest.py +++ b/tests/linkml/test_issues/conftest.py @@ -5,6 +5,8 @@ import rdflib from rdflib.compare import to_canonical_graph +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph + @pytest.fixture def personinfo_path(input_path): @@ -150,7 +152,7 @@ def _normalize_snapshot_bundle_output(name: str, output: str) -> str: if name.endswith((".ttl", ".owl")): graph = rdflib.Graph() graph.parse(data=output, format="turtle") - normalized = to_canonical_graph(graph).serialize(format="nt") + normalized = canonicalize_rdf_graph(graph, output_format="nt") return "\n".join(sorted(line for line in normalized.splitlines() if line)) + "\n" if name.endswith((".json", ".schema.json", ".context.jsonld")): return json.dumps(json.loads(output), indent=2, sort_keys=True, ensure_ascii=False) + "\n" diff --git a/tests/linkml/test_notebooks/input/examples.py b/tests/linkml/test_notebooks/input/examples.py index fa316632b3..ec8054a485 100644 --- a/tests/linkml/test_notebooks/input/examples.py +++ b/tests/linkml/test_notebooks/input/examples.py @@ -6,6 +6,7 @@ from linkml.generators.pythongen import PythonGenerator from linkml.generators.shexgen import ShExGenerator from linkml_runtime.dumpers import json_dumper +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph yaml = """ id: http://example.org/sample/example1 @@ -108,7 +109,7 @@ print(jsonld) g = Graph() g.parse(data=jsonld, format="json-ld") -print(g.serialize(format="turtle")) +print(canonicalize_rdf_graph(g, output_format="turtle")) shex = ShExGenerator(yaml).serialize(collections=False) diff --git a/tests/linkml/test_scripts/test_gen_jsonld.py b/tests/linkml/test_scripts/test_gen_jsonld.py index eaeae15d7d..dd93e857f7 100644 --- a/tests/linkml/test_scripts/test_gen_jsonld.py +++ b/tests/linkml/test_scripts/test_gen_jsonld.py @@ -4,6 +4,8 @@ from click.testing import CliRunner from rdflib import Graph, URIRef +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph + from linkml import METAMODEL_NAMESPACE from linkml.generators.jsonldcontextgen import ContextGenerator from linkml.generators.jsonldgen import JSONLDGenerator, cli @@ -132,9 +134,10 @@ def test_meta_output(tmp_path_factory): # Convert JSON to TTL g = Graph() g.load(tmp_jsonld_path, format="json-ld") - g.serialize(tmp_rdf_path, format="ttl") + with open(tmp_rdf_path, "w", encoding="utf-8") as f: + f.write(canonicalize_rdf_graph(g, output_format="turtle")) g.bind("meta", METAMODEL_NAMESPACE) - new_ttl = g.serialize(format="turtle") + new_ttl = canonicalize_rdf_graph(g, output_format="turtle") # Make sure that the generated TTL matches the JSON-LD (probably not really needed, as this is more of a test # of rdflib than our tooling but it doesn't hurt diff --git a/tests/linkml/test_utils/test_uri_and_curie.py b/tests/linkml/test_utils/test_uri_and_curie.py index 3dba819c79..26accf5f23 100644 --- a/tests/linkml/test_utils/test_uri_and_curie.py +++ b/tests/linkml/test_utils/test_uri_and_curie.py @@ -10,6 +10,7 @@ from linkml.generators.jsonldgen import JSONLDGenerator from linkml.generators.pythongen import PythonGenerator from linkml_runtime.utils.compile_python import compile_python +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.yamlutils import as_rdf from tests.linkml.utils.compare_jsonld_context import CompareJsonldContext @@ -50,7 +51,7 @@ def test_uri_and_curie(input_path, snapshot, snapshot_path): instance_jsonld, ], ) - assert g.serialize(format="ttl") == snapshot(f"{model_name}.ttl") + assert canonicalize_rdf_graph(g, output_format="turtle") == snapshot(f"{model_name}.ttl") def test_issue_80_objectidentifier_roundtrip(input_path): @@ -72,7 +73,9 @@ def test_issue_80_objectidentifier_roundtrip(input_path): assert generated_context["Person"]["@id"] == "ex:PERSON" assert generated_context["age"]["@type"] == "xsd:integer" - rdf_output = as_rdf(example, contexts=json.dumps({"@context": generated_context})).serialize(format="turtle") + rdf_output = canonicalize_rdf_graph( + as_rdf(example, contexts=json.dumps({"@context": generated_context})), output_format="turtle" + ) graph = Graph() graph.parse(data=rdf_output, format="turtle") diff --git a/tests/linkml/utils/compare_rdf.py b/tests/linkml/utils/compare_rdf.py index 61c0c8aba2..509899abde 100644 --- a/tests/linkml/utils/compare_rdf.py +++ b/tests/linkml/utils/compare_rdf.py @@ -6,6 +6,7 @@ from rdflib.compare import IsomorphicGraph, graph_diff, to_isomorphic from linkml_runtime.linkml_model.meta import LINKML +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph # TODO: Find out why test_issue_namespace is emitting generation_date in the TYPE namespace from tests import SKIP_RDF_COMPARE, SKIP_RDF_COMPARE_REASON @@ -38,7 +39,7 @@ def print_triples(g: Graph) -> None: Print the contents of g into stdout :param g: graph to print """ - g_text = re.sub(r"@prefix.*\n", "", g.serialize(format="turtle")) + g_text = re.sub(r"@prefix.*\n", "", canonicalize_rdf_graph(g, output_format="turtle")) print(g_text) diff --git a/tests/linkml_runtime/support/compare_rdf.py b/tests/linkml_runtime/support/compare_rdf.py index ea6118d7f4..f09ee97fb2 100644 --- a/tests/linkml_runtime/support/compare_rdf.py +++ b/tests/linkml_runtime/support/compare_rdf.py @@ -7,6 +7,7 @@ from rdflib.compare import IsomorphicGraph, graph_diff, to_isomorphic from linkml_runtime import LINKML +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph # TODO: Find out why test_issue_namespace is emitting generation_date in the TYPE namespace from tests import SKIP_RDF_COMPARE, SKIP_RDF_COMPARE_REASON @@ -39,7 +40,7 @@ def print_triples(g: Graph) -> None: Print the contents of g into stdout :param g: graph to print """ - g_text = re.sub(r"@prefix.*\n", "", g.serialize(format="turtle")) + g_text = re.sub(r"@prefix.*\n", "", canonicalize_rdf_graph(g, output_format="turtle")) print(g_text) diff --git a/tests/linkml_runtime/test_utils/test_metamodelcore.py b/tests/linkml_runtime/test_utils/test_metamodelcore.py index 579662fd21..35a4457842 100644 --- a/tests/linkml_runtime/test_utils/test_metamodelcore.py +++ b/tests/linkml_runtime/test_utils/test_metamodelcore.py @@ -5,6 +5,7 @@ from jsonasobj2 import as_json from rdflib import RDF, XSD, Graph, Literal, Namespace +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.metamodelcore import ( URI, Bool, @@ -296,7 +297,7 @@ def __post_init__(self): rdfs:object "http://example.org/tests/child2" ; rdfs:subject "http://example.org/tests/descendant1" . -""" == as_rdf(y, context).serialize(format="turtle") +""" == canonicalize_rdf_graph(as_rdf(y, context), output_format="turtle") with pytest.raises(ValueError): y = Pair(s, s) diff --git a/tests/linkml_runtime/test_utils/test_rdf_canonicalize.py b/tests/linkml_runtime/test_utils/test_rdf_canonicalize.py new file mode 100644 index 0000000000..dd706c1d82 --- /dev/null +++ b/tests/linkml_runtime/test_utils/test_rdf_canonicalize.py @@ -0,0 +1,131 @@ +"""Tests for deterministic RDF serialization via pyoxigraph RDFC-1.0.""" + +import rdflib +from rdflib import BNode, Graph, Literal, URIRef +from rdflib.namespace import RDF + +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph + + +def _make_graph_with_bnodes() -> Graph: + """Create a graph with blank nodes for testing.""" + g = Graph() + g.bind("ex", "http://example.com/") + subj = URIRef("http://example.com/a") + bn = BNode() + g.add((subj, URIRef("http://example.com/p"), Literal("hello"))) + g.add((subj, URIRef("http://example.com/r"), bn)) + g.add((bn, URIRef("http://example.com/q"), Literal("blank_val"))) + return g + + +def test_determinism(): + """Same graph serialized multiple times produces byte-identical output.""" + g = _make_graph_with_bnodes() + results = [canonicalize_rdf_graph(g, output_format="turtle") for _ in range(5)] + assert all(r == results[0] for r in results), "Output was not deterministic across runs" + + +def test_round_trip_isomorphic(): + """Re-parsing canonicalized output produces an isomorphic graph.""" + g = _make_graph_with_bnodes() + ttl = canonicalize_rdf_graph(g, output_format="turtle") + g2 = Graph() + g2.parse(data=ttl, format="turtle") + assert rdflib.compare.isomorphic(g, g2), "Round-tripped graph is not isomorphic to original" + + +def test_blank_node_stability(): + """Blank node labels are stable across calls.""" + g = _make_graph_with_bnodes() + out1 = canonicalize_rdf_graph(g, output_format="turtle") + out2 = canonicalize_rdf_graph(g, output_format="turtle") + assert out1 == out2, "Blank node labels differ between calls" + + +def test_prefix_preservation(): + """Output Turtle contains expected @prefix declarations.""" + g = Graph() + g.bind("ex", "http://example.com/") + g.bind("foaf", "http://xmlns.com/foaf/0.1/") + g.add((URIRef("http://example.com/a"), URIRef("http://xmlns.com/foaf/0.1/name"), Literal("Alice"))) + ttl = canonicalize_rdf_graph(g, output_format="turtle") + assert "@prefix ex:" in ttl + assert "@prefix foaf:" in ttl + + +def test_ntriples_format(): + """N-Triples output is deterministic.""" + g = _make_graph_with_bnodes() + results = [canonicalize_rdf_graph(g, output_format="nt") for _ in range(5)] + assert all(r == results[0] for r in results) + # N-Triples should not contain @prefix + assert "@prefix" not in results[0] + + +def test_empty_graph(): + """Empty graph produces valid empty output.""" + g = Graph() + ttl = canonicalize_rdf_graph(g, output_format="turtle") + # Re-parsing should also be empty + g2 = Graph() + g2.parse(data=ttl, format="turtle") + assert len(g2) == 0 + + +def test_ordering_is_sorted(): + """Subjects appear in sorted order in the output.""" + g = Graph() + g.bind("ex", "http://example.com/") + g.add((URIRef("http://example.com/z"), RDF.type, URIRef("http://example.com/Thing"))) + g.add((URIRef("http://example.com/a"), RDF.type, URIRef("http://example.com/Thing"))) + g.add((URIRef("http://example.com/m"), RDF.type, URIRef("http://example.com/Thing"))) + ttl = canonicalize_rdf_graph(g, output_format="turtle") + # Find positions of subjects in the output + pos_a = ttl.index("ex:a") + pos_m = ttl.index("ex:m") + pos_z = ttl.index("ex:z") + assert pos_a < pos_m < pos_z, f"Subjects not in sorted order: a@{pos_a}, m@{pos_m}, z@{pos_z}" + + +def test_multiple_blank_nodes_deterministic(): + """Multiple blank nodes are canonicalized deterministically.""" + g = Graph() + g.bind("ex", "http://example.com/") + subj = URIRef("http://example.com/s") + bn1 = BNode() + bn2 = BNode() + g.add((subj, URIRef("http://example.com/p"), bn1)) + g.add((subj, URIRef("http://example.com/q"), bn2)) + g.add((bn1, URIRef("http://example.com/val"), Literal("first"))) + g.add((bn2, URIRef("http://example.com/val"), Literal("second"))) + results = [canonicalize_rdf_graph(g, output_format="turtle") for _ in range(5)] + assert all(r == results[0] for r in results) + + +def test_xsd_string_normalized(): + """pyoxigraph drops explicit ^^xsd:string per RDF 1.1; output is still semantically correct.""" + g = Graph() + g.bind("ex", "http://example.com/") + XSD = rdflib.Namespace("http://www.w3.org/2001/XMLSchema#") + g.add((URIRef("http://example.com/a"), URIRef("http://example.com/p"), Literal("hello", datatype=XSD.string))) + ttl = canonicalize_rdf_graph(g, output_format="turtle") + # pyoxigraph writes plain "hello" without ^^xsd:string + assert "xsd:string" not in ttl + # The triple is still present (rdflib reads it back as untyped Literal) + g2 = Graph() + g2.parse(data=ttl, format="turtle") + assert len(g2) == 1 + obj = list(g2.objects())[0] + assert str(obj) == "hello" + + +def test_fallback_on_invalid_rdf(): + """Graphs with literal predicates fall back to rdflib serializer.""" + g = Graph() + g.bind("ex", "http://example.com/") + # Add a triple with a Literal predicate (non-standard RDF) + g.add((URIRef("http://example.com/s"), Literal("not_a_predicate"), Literal("value"))) + # Should not raise, falls back to rdflib + result = canonicalize_rdf_graph(g, output_format="turtle") + assert "not_a_predicate" in result diff --git a/uv.lock b/uv.lock index 49c5573281..8a68840cb2 100644 --- a/uv.lock +++ b/uv.lock @@ -884,47 +884,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408, upload-time = "2024-04-23T18:57:14.835Z" }, ] -[[package]] -name = "duckdb" -version = "1.4.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ea/e7/21cf50a3d52ffceee1f0bcc3997fa96a5062e6bab705baee4f6c4e33cce5/duckdb-1.4.1.tar.gz", hash = "sha256:f903882f045d057ebccad12ac69975952832edfe133697694854bb784b8d6c76", size = 18461687, upload-time = "2025-10-07T10:37:28.605Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/68/cc/00a07de0e33d16763edd4132d7c8a2f9efd57a2f296a25a948f239a1fadf/duckdb-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:296b4fff3908fb4c47b0aa1d77bd1933375e75401009d2dc81af8e7a0b8a05b4", size = 29062814, upload-time = "2025-10-07T10:36:14.261Z" }, - { url = "https://files.pythonhosted.org/packages/17/ea/fb0fda8886d1928f1b2a53a1163ef94f6f4b41f6d8b29eee457acfc2fa67/duckdb-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0b4182800092115feee5d71a8691efb283d3c9f5eb0b36362b308ef007a12222", size = 16161652, upload-time = "2025-10-07T10:36:17.358Z" }, - { url = "https://files.pythonhosted.org/packages/b4/5f/052e6436a71f461e61cd3a982954c029145a84b58cefa1dfb3eb2d96e4fc/duckdb-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:67cc3b6c7f7ba07a69e9331b8ccea7a60cbcd4204bb473e5da9b71588bd2eca9", size = 13753030, upload-time = "2025-10-07T10:36:19.782Z" }, - { url = "https://files.pythonhosted.org/packages/c2/fd/3ae3c89d0f6ad54c0be4430e572306fbfc9f173c97b23c5025a540449325/duckdb-1.4.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cef0cee7030b561640cb9af718f8841b19cdd2aa020d53561057b5743bea90b", size = 18487683, upload-time = "2025-10-07T10:36:22.375Z" }, - { url = "https://files.pythonhosted.org/packages/d4/3c/eef454cd7c3880c2d55b50e18a9c7a213bf91ded79efcfb573d8d6dd8a47/duckdb-1.4.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2bf93347f37a46bacce6ac859d651dbf5731e2c94a64ab358300425b09e3de23", size = 20487080, upload-time = "2025-10-07T10:36:24.692Z" }, - { url = "https://files.pythonhosted.org/packages/bb/5b/b619f4c986a1cb0b06315239da9ce5fd94a20c07a344d03e2635d56a6967/duckdb-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:2e60d2361f978908a3d96eebaf1f4b346f283afcc467351aae50ea45ca293a2b", size = 12324436, upload-time = "2025-10-07T10:36:27.458Z" }, - { url = "https://files.pythonhosted.org/packages/d9/52/606f13fa9669a24166d2fe523e28982d8ef9039874b4de774255c7806d1f/duckdb-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:605d563c1d5203ca992497cd33fb386ac3d533deca970f9dcf539f62a34e22a9", size = 29065894, upload-time = "2025-10-07T10:36:29.837Z" }, - { url = "https://files.pythonhosted.org/packages/84/57/138241952ece868b9577e607858466315bed1739e1fbb47205df4dfdfd88/duckdb-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d3305c7c4b70336171de7adfdb50431f23671c000f11839b580c4201d9ce6ef5", size = 16163720, upload-time = "2025-10-07T10:36:32.241Z" }, - { url = "https://files.pythonhosted.org/packages/a3/81/afa3a0a78498a6f4acfea75c48a70c5082032d9ac87822713d7c2d164af1/duckdb-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a063d6febbe34b32f1ad2e68822db4d0e4b1102036f49aaeeb22b844427a75df", size = 13756223, upload-time = "2025-10-07T10:36:34.673Z" }, - { url = "https://files.pythonhosted.org/packages/47/dd/5f6064fbd9248e37a3e806a244f81e0390ab8f989d231b584fb954f257fc/duckdb-1.4.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1ffcaaf74f7d1df3684b54685cbf8d3ce732781c541def8e1ced304859733ae", size = 18487022, upload-time = "2025-10-07T10:36:36.759Z" }, - { url = "https://files.pythonhosted.org/packages/a1/10/b54969a1c42fd9344ad39228d671faceb8aa9f144b67cd9531a63551757f/duckdb-1.4.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:685d3d1599dc08160e0fa0cf09e93ac4ff8b8ed399cb69f8b5391cd46b5b207c", size = 20491004, upload-time = "2025-10-07T10:36:39.318Z" }, - { url = "https://files.pythonhosted.org/packages/ed/d5/7332ae8f804869a4e895937821b776199a283f8d9fc775fd3ae5a0558099/duckdb-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:78f1d28a15ae73bd449c43f80233732adffa49be1840a32de8f1a6bb5b286764", size = 12327619, upload-time = "2025-10-07T10:36:41.509Z" }, - { url = "https://files.pythonhosted.org/packages/0e/6c/906a3fe41cd247b5638866fc1245226b528de196588802d4df4df1e6e819/duckdb-1.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:cd1765a7d180b7482874586859fc23bc9969d7d6c96ced83b245e6c6f49cde7f", size = 29076820, upload-time = "2025-10-07T10:36:43.782Z" }, - { url = "https://files.pythonhosted.org/packages/66/c7/01dd33083f01f618c2a29f6dd068baf16945b8cbdb132929d3766610bbbb/duckdb-1.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8ed7a86725185470953410823762956606693c0813bb64e09c7d44dbd9253a64", size = 16167558, upload-time = "2025-10-07T10:36:46.003Z" }, - { url = "https://files.pythonhosted.org/packages/81/e2/f983b4b7ae1dfbdd2792dd31dee9a0d35f88554452cbfc6c9d65e22fdfa9/duckdb-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8a189bdfc64cfb9cc1adfbe4f2dcfde0a4992ec08505ad8ce33c886e4813f0bf", size = 13762226, upload-time = "2025-10-07T10:36:48.55Z" }, - { url = "https://files.pythonhosted.org/packages/ed/34/fb69a7be19b90f573b3cc890961be7b11870b77514769655657514f10a98/duckdb-1.4.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a9090089b6486f7319c92acdeed8acda022d4374032d78a465956f50fc52fabf", size = 18500901, upload-time = "2025-10-07T10:36:52.445Z" }, - { url = "https://files.pythonhosted.org/packages/e4/a5/1395d7b49d5589e85da9a9d7ffd8b50364c9d159c2807bef72d547f0ad1e/duckdb-1.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:142552ea3e768048e0e8c832077a545ca07792631c59edaee925e3e67401c2a0", size = 20514177, upload-time = "2025-10-07T10:36:55.358Z" }, - { url = "https://files.pythonhosted.org/packages/c0/21/08f10706d30252753349ec545833fc0cea67c11abd0b5223acf2827f1056/duckdb-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:567f3b3a785a9e8650612461893c49ca799661d2345a6024dda48324ece89ded", size = 12336422, upload-time = "2025-10-07T10:36:57.521Z" }, - { url = "https://files.pythonhosted.org/packages/d7/08/705988c33e38665c969f7876b3ca4328be578554aa7e3dc0f34158da3e64/duckdb-1.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:46496a2518752ae0c6c5d75d4cdecf56ea23dd098746391176dd8e42cf157791", size = 29077070, upload-time = "2025-10-07T10:36:59.83Z" }, - { url = "https://files.pythonhosted.org/packages/99/c5/7c9165f1e6b9069441bcda4da1e19382d4a2357783d37ff9ae238c5c41ac/duckdb-1.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1c65ae7e9b541cea07d8075343bcfebdecc29a3c0481aa6078ee63d51951cfcd", size = 16167506, upload-time = "2025-10-07T10:37:02.24Z" }, - { url = "https://files.pythonhosted.org/packages/38/46/267f4a570a0ee3ae6871ddc03435f9942884284e22a7ba9b7cb252ee69b6/duckdb-1.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:598d1a314e34b65d9399ddd066ccce1eeab6a60a2ef5885a84ce5ed62dbaf729", size = 13762330, upload-time = "2025-10-07T10:37:04.581Z" }, - { url = "https://files.pythonhosted.org/packages/15/7b/c4f272a40c36d82df20937d93a1780eb39ab0107fe42b62cba889151eab9/duckdb-1.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2f16b8def782d484a9f035fc422bb6f06941ed0054b4511ddcdc514a7fb6a75", size = 18504687, upload-time = "2025-10-07T10:37:06.991Z" }, - { url = "https://files.pythonhosted.org/packages/17/fc/9b958751f0116d7b0406406b07fa6f5a10c22d699be27826d0b896f9bf51/duckdb-1.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5a7d0aed068a5c33622a8848857947cab5cfb3f2a315b1251849bac2c74c492", size = 20513823, upload-time = "2025-10-07T10:37:09.349Z" }, - { url = "https://files.pythonhosted.org/packages/30/79/4f544d73fcc0513b71296cb3ebb28a227d22e80dec27204977039b9fa875/duckdb-1.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:280fd663dacdd12bb3c3bf41f3e5b2e5b95e00b88120afabb8b8befa5f335c6f", size = 12336460, upload-time = "2025-10-07T10:37:12.154Z" }, -] - -[[package]] -name = "elementpath" -version = "4.8.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ac/41/afdd82534c80e9675d1c51dc21d0889b72d023bfe395a2f5a44d751d3a73/elementpath-4.8.0.tar.gz", hash = "sha256:5822a2560d99e2633d95f78694c7ff9646adaa187db520da200a8e9479dc46ae", size = 358528, upload-time = "2025-03-03T20:51:08.397Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/45/95/615af832e7f507fe5ce4562b4be1bd2fec080c4ff6da88dcd0c2dbfca582/elementpath-4.8.0-py3-none-any.whl", hash = "sha256:5393191f84969bcf8033b05ec4593ef940e58622ea13cefe60ecefbbf09d58d9", size = 243271, upload-time = "2025-03-03T20:51:03.027Z" }, -] - [[package]] name = "et-xmlfile" version = "2.0.0" @@ -964,55 +923,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" }, ] -[[package]] -name = "falcon" -version = "4.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/90/85/a4abc8357f6bc6b6b0b3d80e2c319c895900c518a3528279a222d7a53b7e/falcon-4.1.0.tar.gz", hash = "sha256:dbc3fa642b43e7662f121d0a7b5d7ea42a1a31fb22ae63572c42ee3ecd1f79d0", size = 659308, upload-time = "2025-08-06T16:08:20.905Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/9e/8168712ac4d9f585479b1516796e43669ddb4fde4d77dbac3d6d47a3cbaf/falcon-4.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c4aa85dbf9af10ee0d48f6c409551d7c7226953b2479a24eaad42d39c05bd4b2", size = 409465, upload-time = "2025-08-06T16:18:38.44Z" }, - { url = "https://files.pythonhosted.org/packages/0f/08/024972673db6706e23898a8f0daed7c5af747f3acd9f65eadcc712ace10d/falcon-4.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:71ca6b380ddd09458127ddd5af73f302aad6de8da5690b610c9165209f5766bc", size = 406992, upload-time = "2025-08-06T16:18:40.76Z" }, - { url = "https://files.pythonhosted.org/packages/9d/80/694ec817bd338b41adfe0cb45f4181ac5e712ccd332c0ff3f061e44bde47/falcon-4.1.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a294092ff27ac6038a36169e99175af86ea03c5e054d60c5d219fab36ad6cec7", size = 803325, upload-time = "2025-08-06T16:18:42.496Z" }, - { url = "https://files.pythonhosted.org/packages/30/f7/4b2482f9f9e7f8ad366773c8b960ab8258df65f92c6e62906408a229774a/falcon-4.1.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:033b74c3e512441cef122cd7d9caa61571bf9cfef1cc60dc2c2c1c23bcf14727", size = 835781, upload-time = "2025-08-06T16:18:44.347Z" }, - { url = "https://files.pythonhosted.org/packages/de/11/20d9aca668d5c327f143306c21b4e81a39cd89185d157b24c77d1e40e31c/falcon-4.1.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a2a8324259cd47c1fe21f352a4b34b97ca29c064813115a1dddb8d95168ed154", size = 802545, upload-time = "2025-08-06T16:18:46.088Z" }, - { url = "https://files.pythonhosted.org/packages/cc/32/06c5ae5aedcf0f7d92755f273b4a6835c295423b9fc8929db7b73a04283c/falcon-4.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:758fcf3416842b8b431429873accbee34e92206600358a02e8d418e7a968983e", size = 797777, upload-time = "2025-08-06T16:18:47.951Z" }, - { url = "https://files.pythonhosted.org/packages/1f/ee/c4286c4261ccd10fac1c11708e1946dc363c6dc23cb9956d51860cfab1ee/falcon-4.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f783cfb370ed8ccaf658b0b6c766dad3ad945f1eaf27ee1f5016706618ac6f3c", size = 805584, upload-time = "2025-08-06T16:18:50.029Z" }, - { url = "https://files.pythonhosted.org/packages/f5/5a/2aaba3f41ff0dbb134a7bcb377a11f902edda8e512bc2b06df83fe9ca7c6/falcon-4.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:600183fecbd121fbe55590645618ae1af12e02a4ca6ac035d6857b628cbee5ca", size = 406475, upload-time = "2025-08-06T16:18:52.045Z" }, - { url = "https://files.pythonhosted.org/packages/98/c7/c4e39259046749fc4227ecb965484fa0d44e4f199e4cc591c2cddcb2120f/falcon-4.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a2105cd03cd7accad2f2a4b875a9f588d37293e9c202929762ea94e446bfb6ee", size = 410220, upload-time = "2025-08-06T16:18:53.831Z" }, - { url = "https://files.pythonhosted.org/packages/77/e7/405d54c3f81a8d869f0d7bdbefa45a35a6e225f12f809e823e02c67f85f0/falcon-4.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:589e9a1cde0f84c9cb25327f6431a017a1b17e9d2c42b4b927e0ffbde6e42315", size = 407669, upload-time = "2025-08-06T16:18:55.771Z" }, - { url = "https://files.pythonhosted.org/packages/7f/03/508d444bf71df55dd70c7f341abf681922a7a135e7a385c4ff77b2823f46/falcon-4.1.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41a3f208e28d2ff59beaaa503900c57a4d59ab17ddb3946efbad0817a314936", size = 831283, upload-time = "2025-08-06T16:18:57.206Z" }, - { url = "https://files.pythonhosted.org/packages/7f/02/fe51c596da54854c38766b274c5cb827922bbdcfef5c53d1e7fc7c2a1867/falcon-4.1.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:43aed2194c804b64aeaf331384e2ef2ff69f29047cca65fb71e47d56f44c646f", size = 863284, upload-time = "2025-08-06T16:18:59.22Z" }, - { url = "https://files.pythonhosted.org/packages/ca/52/b80d409c712a0ec97c085fdd56997f71cedb284d4052eeb7f1ecc8db7a1d/falcon-4.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fdad618359822f577c3ae9db73107b511e22d55715ffd4543a5096559bf1f18f", size = 832109, upload-time = "2025-08-06T16:19:00.825Z" }, - { url = "https://files.pythonhosted.org/packages/3d/0c/fafa157e7a039d750b1f6f23aa98e27d60e6a18d11bfac11a0d5df22dc2e/falcon-4.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eea2063c49e3b327a545dadb9e445e00bf44edbfd492b92f81de582e3ce461ca", size = 825371, upload-time = "2025-08-06T16:19:03.402Z" }, - { url = "https://files.pythonhosted.org/packages/76/6e/6980b8bfe107816f74cfc5f342a3f7c1d18af407e400c61eb64e78350eaa/falcon-4.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6e88c8a4f7bf125068d8f34bda094443ce7a2dd9bfef05c0da97a8eb5a8cb568", size = 833296, upload-time = "2025-08-06T16:19:05.79Z" }, - { url = "https://files.pythonhosted.org/packages/ce/fa/144de2c8725080495560ad6b9ef5140a7ace6fcdb0da98482aac8e6108fe/falcon-4.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cda1a53580014f35f1314d66c74205f79e4cfb620dd1b952f581b81415477c60", size = 406723, upload-time = "2025-08-06T16:19:07.46Z" }, - { url = "https://files.pythonhosted.org/packages/02/af/e1e7109ae1a77076e2c690c04ce3dd95644ad77e8543ab08bebaec659666/falcon-4.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:971def6f088f433092538a8409431c7001ed4eb4a6c908d5bb932111749e36e9", size = 409594, upload-time = "2025-08-06T16:19:08.957Z" }, - { url = "https://files.pythonhosted.org/packages/5d/73/36c724f0fb825bc2172c72ec55377256543fb06283d3b228ec3e0663b575/falcon-4.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76a6af9f8142b7e10cee1b835b5548d73b6c65f2a1dc71aa75786bb61f3db5aa", size = 406024, upload-time = "2025-08-06T16:19:10.72Z" }, - { url = "https://files.pythonhosted.org/packages/df/2e/a272d9b96d5c8f2b7144a6bae3a5b4a28c8a90f1f3db17382ad5f23a9222/falcon-4.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:85cf8446d54625e67b219a29ba1cd1d72a28166973c4ead07086a0a0991be161", size = 843815, upload-time = "2025-08-06T16:19:12.607Z" }, - { url = "https://files.pythonhosted.org/packages/92/2c/21a0aff6363095b0dc3694d4dc9880c4a18d82e7cf5c358b7dd7194e766b/falcon-4.1.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:41368d586b58d68bb39c672654baef0dcbc130b371e2c59e093f2699892637f2", size = 878483, upload-time = "2025-08-06T16:19:14.441Z" }, - { url = "https://files.pythonhosted.org/packages/82/46/5a3bc1c08e5302f9a84c52c0b3ac42ac032a25eb9299968a644f4a530432/falcon-4.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cece1561c6766b733136306c07a4198a1b760583651fd45bd071378d353b0fae", size = 850682, upload-time = "2025-08-06T16:19:16.236Z" }, - { url = "https://files.pythonhosted.org/packages/6f/68/f280840cd33fcd6b9dd632a628d2123b6d44ae69fba20d9b5d4a13b6eb83/falcon-4.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:27c27f7144d2c705a47c17de3e19b58d0d64bafc17006cbf57064c5a56c4c0f2", size = 832994, upload-time = "2025-08-06T16:19:17.723Z" }, - { url = "https://files.pythonhosted.org/packages/52/2e/135aed83a24c856f1200843efe4728408e1a6898532bcf84175954ff1ca1/falcon-4.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ef8a4c30da27ae8707c1994d4b2728492e7e173c04e79dac49f22711db285bc", size = 846235, upload-time = "2025-08-06T16:19:19.486Z" }, - { url = "https://files.pythonhosted.org/packages/c7/38/3a2244f67e5b107732bdef184fcf5deb64116799918a6cca90a6e5569a64/falcon-4.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:6060146aad636931fdc88275740343e336fdb40c67b3ab43c40d48cb30964891", size = 406754, upload-time = "2025-08-06T16:19:21.772Z" }, - { url = "https://files.pythonhosted.org/packages/aa/5d/3b76f1582bb3cb17d21bd7d5187a6fa38ed43517569ee3235c94f0746b78/falcon-4.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8ea3954305fcf7ebc1aac90f36c01d26ce12b77b059281166b95a553b3cc9d45", size = 407931, upload-time = "2025-08-06T16:19:23.642Z" }, - { url = "https://files.pythonhosted.org/packages/60/1a/7bbd6f5d192a8ccd0a83f429740549f2d3308464982ea47bd4d9863bdb45/falcon-4.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:84063a5242696c4cd9040ca53d178604445e8ce8b0a112c9461465554784644b", size = 403950, upload-time = "2025-08-06T16:19:25.349Z" }, - { url = "https://files.pythonhosted.org/packages/1d/71/6c9a0335a35b1e914ca1a53a4520b6abce4f2b18c1a447a8d6e84c4e1c3f/falcon-4.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3dbb2648f8d0670f48d5cdfa0b2b1decba6d1e505679291ffeaa5c4cb7e15df", size = 824177, upload-time = "2025-08-06T16:19:27.193Z" }, - { url = "https://files.pythonhosted.org/packages/fd/fc/527e46842af688d4f532573446399ec48872596b32d1273f9b467b009ee6/falcon-4.1.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b0c7cc2a2b3a715086a3b7877f6a6ddfcac7261c15ceb671c6a09f442b06ef66", size = 857963, upload-time = "2025-08-06T16:19:29.142Z" }, - { url = "https://files.pythonhosted.org/packages/ff/a3/97f7be3e4e47a3e5c4f417ca015feb3b4bfe70b77ce00e4fd2a5c4408095/falcon-4.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccc7302a0056f8a03b98c9aa9acb96ad4cae4aabf3aa857f0c9f1871b58ae799", size = 832928, upload-time = "2025-08-06T16:19:31.018Z" }, - { url = "https://files.pythonhosted.org/packages/5f/f1/ca585ad0848b664d1b4c34118140ef8aac2ee3c27a167463c5dcaadc5410/falcon-4.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:06e554c1448fbad75de4747247d02a29a944b666f44279928efcac1324e941b4", size = 816075, upload-time = "2025-08-06T16:19:32.837Z" }, - { url = "https://files.pythonhosted.org/packages/a2/14/85b7026a265678b50cdbbb2efbafd106669b6347b1f8de5cd445d8f0e51e/falcon-4.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a01511d6ef24eeb228b4bd47ba05e8e74f50834b5530223e834be84c0eae506", size = 830746, upload-time = "2025-08-06T16:19:34.413Z" }, - { url = "https://files.pythonhosted.org/packages/db/94/65d8d73ee69e3934221aff6a5384a5eaf76f583450aa33573563f41e03e3/falcon-4.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:e3fefde1f590fff4ccd404dd932f10c9b91b42a1735e8205aa7551432a5e870a", size = 405271, upload-time = "2025-08-06T16:19:36.223Z" }, - { url = "https://files.pythonhosted.org/packages/aa/4f/2818c15070e693bcb85a4766397d6bb3b673b39fe3f154d2b6dd17cedc00/falcon-4.1.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:91e4dacdfc5681c13294d42caf951a2837a17b413539d3c71f21650ddf6b0b27", size = 408094, upload-time = "2025-08-06T16:19:37.866Z" }, - { url = "https://files.pythonhosted.org/packages/42/4e/2f9bf0c3a4621b6c5f4f4181e35791a17f0a6765fb6ef9d0af8360f7990b/falcon-4.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:214b437c03f9d0273f5b6f2e139e7d83b3f8748925d495ae9826e739608f6155", size = 404444, upload-time = "2025-08-06T16:19:40.232Z" }, - { url = "https://files.pythonhosted.org/packages/8c/60/e856790f93840bd75267e62405e6507e15d9fa60b502b05a8409ce1539be/falcon-4.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d569fde899c813a229108c689aa3bd88511db49d904dd35fc111949267d2ecd1", size = 824892, upload-time = "2025-08-06T16:19:41.84Z" }, - { url = "https://files.pythonhosted.org/packages/d4/8d/814cd47e6b5f9be2a0ae1c3b5f57306b63d5f1a80806ed31c5991c8c4991/falcon-4.1.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c38afcbb1b475974572859c6cc48fbdcda9a5d4094d06fd0ce54f321b3490975", size = 854783, upload-time = "2025-08-06T16:19:43.676Z" }, - { url = "https://files.pythonhosted.org/packages/7d/47/157be9e4ffe6a3430f12b6c42be2badf34f4f8afc95a415b691ae0e8c0f6/falcon-4.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:903501f334d5135e6c74e11141a94f1dd0fbf80344b3dc96b57e8a1a3c5a62cb", size = 828502, upload-time = "2025-08-06T16:19:45.349Z" }, - { url = "https://files.pythonhosted.org/packages/9e/54/5423672a17525d8aa678cfc0d942ed8ae7c6b31afef7d49861887267fb6b/falcon-4.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:90cc61a36d3e0375042b3c6a28ead19e969f8ba238beed17a887784625b9ee78", size = 817282, upload-time = "2025-08-06T16:19:47.222Z" }, - { url = "https://files.pythonhosted.org/packages/0b/2c/c745aafe9f09ab0263a11fff2c7235db5bcc52717d993dc68e32a03ed9a1/falcon-4.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:aa12c4422ba789fdeb90f66f39fb5f9a359b5ddb4ff2c8d51d2e1f59277af7b9", size = 829284, upload-time = "2025-08-06T16:19:49.093Z" }, - { url = "https://files.pythonhosted.org/packages/60/bb/15816f6dbad103a277de19f924bcd370bf86330513c655c691f9ad883c63/falcon-4.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:25be29328b39e384bd7fdd0cc46c0e86f232fcf37d9e3fb7033200df92cf1940", size = 407399, upload-time = "2025-08-06T16:19:50.888Z" }, - { url = "https://files.pythonhosted.org/packages/91/36/ee359d6d8d201ddafd124919ec65432d48796e4181537c991e9b1cb70a15/falcon-4.1.0-py3-none-any.whl", hash = "sha256:07cb9690525fd69ca48bcf52dca8f32cff823564e89f3d0a04a2674c4c598176", size = 323157, upload-time = "2025-08-06T16:08:18.421Z" }, -] - [[package]] name = "fastjsonschema" version = "2.21.2" @@ -1156,6 +1066,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/ed/6bfa4109fcb23a58819600392564fea69cdc6551ffd5e69ccf1d52a40cbc/greenlet-3.2.4-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8c68325b0d0acf8d91dde4e6f930967dd52a5302cd4062932a6b2e7c2969f47c", size = 271061, upload-time = "2025-08-07T13:17:15.373Z" }, { url = "https://files.pythonhosted.org/packages/2a/fc/102ec1a2fc015b3a7652abab7acf3541d58c04d3d17a8d3d6a44adae1eb1/greenlet-3.2.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:94385f101946790ae13da500603491f04a76b6e4c059dab271b3ce2e283b2590", size = 629475, upload-time = "2025-08-07T13:42:54.009Z" }, { url = "https://files.pythonhosted.org/packages/c5/26/80383131d55a4ac0fb08d71660fd77e7660b9db6bdb4e8884f46d9f2cc04/greenlet-3.2.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f10fd42b5ee276335863712fa3da6608e93f70629c631bf77145021600abc23c", size = 640802, upload-time = "2025-08-07T13:45:25.52Z" }, + { url = "https://files.pythonhosted.org/packages/9f/7c/e7833dbcd8f376f3326bd728c845d31dcde4c84268d3921afcae77d90d08/greenlet-3.2.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c8c9e331e58180d0d83c5b7999255721b725913ff6bc6cf39fa2a45841a4fd4b", size = 636703, upload-time = "2025-08-07T13:53:12.622Z" }, { url = "https://files.pythonhosted.org/packages/e9/49/547b93b7c0428ede7b3f309bc965986874759f7d89e4e04aeddbc9699acb/greenlet-3.2.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58b97143c9cc7b86fc458f215bd0932f1757ce649e05b640fea2e79b54cedb31", size = 635417, upload-time = "2025-08-07T13:18:25.189Z" }, { url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" }, { url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" }, @@ -1166,6 +1077,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" }, { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" }, { url = "https://files.pythonhosted.org/packages/ae/8f/95d48d7e3d433e6dae5b1682e4292242a53f22df82e6d3dda81b1701a960/greenlet-3.2.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:94abf90142c2a18151632371140b3dba4dee031633fe614cb592dbb6c9e17bc3", size = 644646, upload-time = "2025-08-07T13:45:26.523Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5e/405965351aef8c76b8ef7ad370e5da58d57ef6068df197548b015464001a/greenlet-3.2.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:4d1378601b85e2e5171b99be8d2dc85f594c79967599328f95c1dc1a40f1c633", size = 640519, upload-time = "2025-08-07T13:53:13.928Z" }, { url = "https://files.pythonhosted.org/packages/25/5d/382753b52006ce0218297ec1b628e048c4e64b155379331f25a7316eb749/greenlet-3.2.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0db5594dce18db94f7d1650d7489909b57afde4c580806b8d9203b6e79cdc079", size = 639707, upload-time = "2025-08-07T13:18:27.146Z" }, { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, @@ -1176,6 +1088,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" }, { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" }, { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, @@ -1186,6 +1099,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, @@ -1196,6 +1110,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" }, @@ -1512,15 +1427,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade", size = 12898, upload-time = "2023-06-16T21:01:28.466Z" }, ] -[[package]] -name = "jsonpath-python" -version = "1.0.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b5/49/e582e50b0c54c1b47e714241c4a4767bf28758bf90212248aea8e1ce8516/jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666", size = 18121, upload-time = "2022-03-14T02:35:01.877Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/16/8a/d63959f4eff03893a00e6e63592e3a9f15b9266ed8e0275ab77f8c7dbc94/jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575", size = 7552, upload-time = "2022-03-14T02:34:59.754Z" }, -] - [[package]] name = "jsonpointer" version = "3.0.0" @@ -2008,7 +1914,6 @@ dev = [ { name = "jsonpatch" }, { name = "jupyter" }, { name = "mock" }, - { name = "morph-kgc" }, { name = "myst-nb" }, { name = "nbconvert" }, { name = "nbformat" }, @@ -2056,7 +1961,6 @@ shacl = [ ] tests = [ { name = "black" }, - { name = "morph-kgc" }, { name = "numpydantic" }, { name = "pyshacl" }, ] @@ -2123,7 +2027,6 @@ dev = [ { name = "jsonpatch", specifier = ">=1.33" }, { name = "jupyter" }, { name = "mock", specifier = ">=5.1.0" }, - { name = "morph-kgc", marker = "python_full_version >= '3.10'", specifier = ">=2.9.0" }, { name = "myst-nb", marker = "python_full_version >= '3.10'", specifier = ">=1.0.0" }, { name = "nbconvert" }, { name = "nbformat" }, @@ -2166,7 +2069,6 @@ pandera = [ shacl = [{ name = "pyshacl", specifier = ">=0.25.0" }] tests = [ { name = "black", specifier = ">=24.0.0" }, - { name = "morph-kgc", marker = "python_full_version >= '3.10'", specifier = ">=2.9.0" }, { name = "numpydantic", specifier = ">=1.6.1" }, { name = "pyshacl", specifier = ">=0.25.0" }, ] @@ -2202,6 +2104,7 @@ dependencies = [ { name = "prefixcommons" }, { name = "prefixmaps" }, { name = "pydantic" }, + { name = "pyoxigraph" }, { name = "pyyaml" }, { name = "rdflib" }, { name = "requests" }, @@ -2233,6 +2136,7 @@ requires-dist = [ { name = "prefixcommons", specifier = ">=0.1.12" }, { name = "prefixmaps", specifier = ">=0.1.4" }, { name = "pydantic", specifier = ">=1.10.2,<3.0.0" }, + { name = "pyoxigraph", specifier = ">=0.5.6" }, { name = "pyyaml" }, { name = "rdflib", specifier = ">=6.0.0" }, { name = "requests" }, @@ -2496,25 +2400,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bd/d9/617e6af809bf3a1d468e0d58c3997b1dc219a9a9202e650d30c2fc85d481/mock-5.2.0-py3-none-any.whl", hash = "sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f", size = 31617, upload-time = "2025-03-03T12:31:41.518Z" }, ] -[[package]] -name = "morph-kgc" -version = "2.10.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "duckdb" }, - { name = "elementpath" }, - { name = "falcon" }, - { name = "jsonpath-python" }, - { name = "pandas" }, - { name = "pyoxigraph" }, - { name = "rdflib" }, - { name = "ruamel-yaml" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/09/b1/2306c3bf858186c1cb1b872fbae45ad5b820a2af7d5755766b9ad91b087a/morph_kgc-2.10.0.tar.gz", hash = "sha256:a8d01d4c2118821ed46490bc6445d0d06f0f5dd23e29eeeee1da64ea99e1a124", size = 257627, upload-time = "2026-01-20T11:04:45.478Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/c4/8edb7ec3b0602ffcfad2670cfb5eb2af2c40196695b440b636fc5a50b311/morph_kgc-2.10.0-py3-none-any.whl", hash = "sha256:8ce01d8db014a3cbe9d2e77f61c543ea8d19a7f9ec7cd6ad4abf203bd9ce9ba7", size = 72212, upload-time = "2026-01-20T11:04:44.446Z" }, -] - [[package]] name = "multidict" version = "6.7.0" @@ -3690,32 +3575,42 @@ wheels = [ [[package]] name = "pyoxigraph" -version = "0.3.22" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/68/14/33446bc01926701595b0890811f96ab941fd61e89f257ae262368a3b90c5/pyoxigraph-0.3.22.tar.gz", hash = "sha256:430b18cb3cec37b8c71cee0f70ea10601b9e479f1b8c364861660ae9f8629fd9", size = 4306498, upload-time = "2023-12-01T14:37:34.573Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/94/73/4b196f0e7c7a6a92c6a61050a8af18d1ec56bd32e6778343db45d8a5cb9c/pyoxigraph-0.3.22-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:49609d3c8d6637193872181e8f9d8b85ae304b3d944b1d50a2e363bd4d3ad878", size = 6494143, upload-time = "2023-12-01T17:54:04.086Z" }, - { url = "https://files.pythonhosted.org/packages/8a/68/9d8608fe064976c4b6debcd74473a191f05d51956064ddc2adc1928b570b/pyoxigraph-0.3.22-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb0a0f2bd4348e9b92fbb92c71f449b7e42f6ac6fb67ce5797cbd8ab3b673c86", size = 6817401, upload-time = "2023-12-01T14:48:50.508Z" }, - { url = "https://files.pythonhosted.org/packages/1c/57/7e41261bd61f95d8aa8f85c03c9342fb247b83b2a99ec01bd9bc11c1bfb3/pyoxigraph-0.3.22-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5e9cd5931488feb3bdd189094a746d2d0c05c5364a2d93a1b748d2bb91145ab8", size = 6941539, upload-time = "2023-12-04T11:51:56.186Z" }, - { url = "https://files.pythonhosted.org/packages/9b/7d/4a40bb0f2e188354b4433a6ab7225d877eca68f21aaa65b937899dd8a7bf/pyoxigraph-0.3.22-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:95c43d3da6d43460368f0a5f4b497412b0d6509e55eb12245b0f173248118656", size = 7308576, upload-time = "2023-12-01T15:05:44.902Z" }, - { url = "https://files.pythonhosted.org/packages/97/c4/92050b91063458f48e3724ad4fc070c7552e005a18a9af6f45d81090a738/pyoxigraph-0.3.22-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9d466025962895e67a7c4a4ba303fe23a911f99d2158f5f53eb50f56949125f", size = 6494142, upload-time = "2023-12-01T17:54:06.663Z" }, - { url = "https://files.pythonhosted.org/packages/10/03/0c6f5c744d9cc03e606fa35b7f533846c4ba602f22db4a5cc38e7ea0e253/pyoxigraph-0.3.22-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90dc1e4010e2011c5440b7a3832153a14f52257e12a90a0d7fc6ed16e88a7961", size = 6817400, upload-time = "2023-12-01T14:48:52.531Z" }, - { url = "https://files.pythonhosted.org/packages/a0/44/6765ecd12c68b973cabeea8267cb17c21e4f74150075a6751fa6ba98dc4f/pyoxigraph-0.3.22-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:10c02f543fa83338e93308cad7868137ccadffc3330827deebac715333070091", size = 6941543, upload-time = "2023-12-04T11:51:58.401Z" }, - { url = "https://files.pythonhosted.org/packages/21/a4/f6ce53292ef2104dc098d979f293452916d6a5d076193497efa8411d2751/pyoxigraph-0.3.22-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:469039b1ed6a31fef59b8b6c2ef5c836dd147944aa7120b4f4e6db4fd5abf60a", size = 7308578, upload-time = "2023-12-01T15:05:47.378Z" }, - { url = "https://files.pythonhosted.org/packages/9e/54/bf0e750bcc9cc29813454a7e56b18cd611dcacfb6533e615c44af5f9d1d1/pyoxigraph-0.3.22-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2baadd8dba65ff91bdcdf85e57d928806d94612b85da58d64526f0f1d5cd4df", size = 6495789, upload-time = "2023-12-01T17:54:10.045Z" }, - { url = "https://files.pythonhosted.org/packages/b7/bc/339893be78df7ed09fc2a4c59028cfcdc6335d96eacf58a206ded042da35/pyoxigraph-0.3.22-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f7e217e82e541f7df4697705c7cbfbd62e019c50786669647cb261445d75215", size = 6821696, upload-time = "2023-12-01T14:48:55.074Z" }, - { url = "https://files.pythonhosted.org/packages/bb/be/2466aaf6b705952e5f87a758fff1dfd1fc213650fb0b0c812f6b1968b81a/pyoxigraph-0.3.22-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:963bc825e34d7238bffb942572ac0e59a6512e7d33ec8f898f495964a8dac1de", size = 6941181, upload-time = "2023-12-04T11:52:00.908Z" }, - { url = "https://files.pythonhosted.org/packages/81/2d/6847756fc2b0d21f1a674268ac476c86de961c83dca0ab04b0b46a045f06/pyoxigraph-0.3.22-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c99cd7d305a5f154d6fa7eca3a93b153ac94ad2a4aff6c404ec56db38d538ea4", size = 7312588, upload-time = "2023-12-01T15:05:49.69Z" }, - { url = "https://files.pythonhosted.org/packages/01/6b/7bcebf6e590db0384cb56b0ac76385cea3b51ea37170da849214d1d9b1cf/pyoxigraph-0.3.22-cp37-abi3-macosx_10_14_x86_64.macosx_11_0_arm64.macosx_10_14_universal2.whl", hash = "sha256:32d5630c9fb3d7b819a25401b3afdbd01dbfc9624b1519d41216622fe3af52e6", size = 10561197, upload-time = "2023-12-01T15:17:42.567Z" }, - { url = "https://files.pythonhosted.org/packages/4d/05/141746d5b1a8e6210613317d6cb772ab5bfe931a3ebdb98a92fa6ed59c39/pyoxigraph-0.3.22-cp37-abi3-macosx_10_14_x86_64.whl", hash = "sha256:6368f24bc236a6055171f4a80cb63b9ad76fcbdbcb4a3ef981eb6d86d8975c11", size = 5478410, upload-time = "2023-12-01T15:17:45.497Z" }, - { url = "https://files.pythonhosted.org/packages/15/ef/85854115c9a8a08eea3d9e46317ca9774566dacd33964b41acd30a8156cd/pyoxigraph-0.3.22-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:821e1103cf1e8f12d0738cf1b2625c8374758e33075ca67161ead3669f53e4cb", size = 5092580, upload-time = "2023-12-01T15:17:48.102Z" }, - { url = "https://files.pythonhosted.org/packages/7f/96/ad8b73eb8c649a089d07cbe323034e8d09cdabcdab44194786c4c2faee28/pyoxigraph-0.3.22-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630f1090d67d1199c86f358094289816e0c00a21000164cfe06499c8689f8b9e", size = 6493664, upload-time = "2023-12-01T17:54:12.064Z" }, - { url = "https://files.pythonhosted.org/packages/a9/08/a185a89bf7832e7377b0d9c961073731c0e5e13beaf2b91d93adaec13baa/pyoxigraph-0.3.22-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1aca511243209005da32470bbfec9e023ac31095bbeaa8cedabe0a652adce38c", size = 6816344, upload-time = "2023-12-01T14:48:57.322Z" }, - { url = "https://files.pythonhosted.org/packages/45/b4/37f7114c4a21b1ee210505e953fdda533b6ff9a458570905ebfe53588cf2/pyoxigraph-0.3.22-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ab329df388865afa9a934f1eac2e75264b220962a21bbcded6cb7ead96d1f1dd", size = 6940281, upload-time = "2023-12-04T11:52:02.933Z" }, - { url = "https://files.pythonhosted.org/packages/91/d3/7bff379c8075bb46301c29027e355f8a6f6388252e8626636619c2dbca19/pyoxigraph-0.3.22-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:60b7f13331b91827e2edfa8633ffb7e3bfc8630b708578fb0bc8d43c76754f20", size = 7308086, upload-time = "2023-12-01T15:05:51.979Z" }, - { url = "https://files.pythonhosted.org/packages/9d/7d/dd2d8c606633c2da157e7dd5a7c92b7bfdef1866bd4f382763ff17454431/pyoxigraph-0.3.22-cp37-abi3-win_amd64.whl", hash = "sha256:9a4ffd8ce28c3e8ce888662e0d9e9155e5226ecd8cd967f3c46391cf266c4c1d", size = 4716868, upload-time = "2023-12-01T14:51:37.776Z" }, - { url = "https://files.pythonhosted.org/packages/f6/78/9ae20f0e465b0cb1babd65ce6c563792f68b54baa48e55e8a83026e7eded/pyoxigraph-0.3.22-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55322d5b9b852c4813c293575aa5e676cec19c617d0aad5ae7ce47c49b113f0b", size = 6493901, upload-time = "2023-12-01T17:54:20.748Z" }, - { url = "https://files.pythonhosted.org/packages/b9/e7/3331a426874c82ed6bbeffcc2e83d9c330644c72ec10e0b6b017cf958756/pyoxigraph-0.3.22-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3397138f3a6d2c3299250ebde2bca7c95a25b58b29009eb0b29c2f5d1438d954", size = 6818107, upload-time = "2023-12-01T14:49:05.339Z" }, +version = "0.5.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/48/fc/254b483d1e3f7a1bd6c3ea7203d9c4e5940be730b1efbce87520b3241336/pyoxigraph-0.5.6.tar.gz", hash = "sha256:489c0cde3f441c5bb2025ee6bc77da02f0a085f21a098798e81cbc61705a0317", size = 5202595, upload-time = "2026-03-14T21:08:40.513Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/ae/eda13b712203900eb2d2b414504c3e49dd53c47a251d87b70957f6cceb04/pyoxigraph-0.5.6-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ff7fac21671d4f643ccdc65e97bd87df8bd4d3493988d7f2b41614850fed72c1", size = 7430739, upload-time = "2026-03-14T21:07:21.517Z" }, + { url = "https://files.pythonhosted.org/packages/06/f0/81eb22b17b19462fc02d556d402d51ddc88b3363a7d2018813724ff6449a/pyoxigraph-0.5.6-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:bfb972052f849916fd37844faba2d5e36c95da491142f71204eb7ca9660523b5", size = 7956586, upload-time = "2026-03-14T21:07:24.079Z" }, + { url = "https://files.pythonhosted.org/packages/4a/28/d11d3e549c8d1734a82803929000cd8c9b9eceb63a4ec5c1895a40570d26/pyoxigraph-0.5.6-cp310-cp310-win_amd64.whl", hash = "sha256:ac7ae56b51eabfb99ac46287e7c0b33494390cecdafef1669f82727e7d02521f", size = 5227362, upload-time = "2026-03-14T21:07:26.22Z" }, + { url = "https://files.pythonhosted.org/packages/fc/6b/0ca86543fb08b446dece09280fa3aadfdba32d6de4c9ac592817597816e7/pyoxigraph-0.5.6-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:08eaf626ef5cdc5ace666a7a6ef899f90ff078988d04d8b4a2b6004653f5e8cf", size = 7430042, upload-time = "2026-03-14T21:07:27.993Z" }, + { url = "https://files.pythonhosted.org/packages/c2/6c/0d6990b770c921bc9b3a07a6f783b5477994edb59569fc9d7c5eb74c33df/pyoxigraph-0.5.6-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:6f7671a2d5c623d6065561c0da7e453a9bc6f6eb9e7a34fe366fd5aaa578af15", size = 7956657, upload-time = "2026-03-14T21:07:30.328Z" }, + { url = "https://files.pythonhosted.org/packages/95/f8/9f26e7f7d04e57295d99528672733c1a935ecb946fcc1700c26018581ca9/pyoxigraph-0.5.6-cp311-cp311-win_amd64.whl", hash = "sha256:b4919d1ee6e511b74c899668687dc789c93d13d9a13406ffff4e3813b08f0b9b", size = 5227878, upload-time = "2026-03-14T21:07:32.75Z" }, + { url = "https://files.pythonhosted.org/packages/ed/c3/ee716e15fd4f21434580753ba5707837f1966662f7b5e7c95b4bf7768e11/pyoxigraph-0.5.6-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:881f39a79a4c6be9c82c0e46f5982d66d65d9865d48a1745a2e07cd80d949bc0", size = 7432286, upload-time = "2026-03-14T21:07:34.706Z" }, + { url = "https://files.pythonhosted.org/packages/7c/e9/916bafcc86e727f8b51866a66839152188040c7aa3abf5225e20b091194d/pyoxigraph-0.5.6-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:063e7dd246295d0904dfeaefe0db2f9c69575d4ba6be9722454ba9cff106de3a", size = 7961451, upload-time = "2026-03-14T21:07:36.709Z" }, + { url = "https://files.pythonhosted.org/packages/be/d8/92fb76759e920194377fd5f60b4a41b20ebc4116f67dad361e8277bb5aa3/pyoxigraph-0.5.6-cp312-cp312-win_amd64.whl", hash = "sha256:3b37bb5ca1692707b7b84d3899f1f9a93b049cc84e54e22a31cfd7cabb42920d", size = 5231363, upload-time = "2026-03-14T21:07:38.693Z" }, + { url = "https://files.pythonhosted.org/packages/43/23/4696181d8dc4315f071c29fff9c3d61c103c8911e36b9a85c8b7e2244d85/pyoxigraph-0.5.6-cp312-cp312-win_arm64.whl", hash = "sha256:ad0beb3e900ecb9b1dd335eadaae3c51681c8e1d46c8d752237764c840e34084", size = 4873400, upload-time = "2026-03-14T21:07:40.618Z" }, + { url = "https://files.pythonhosted.org/packages/85/d4/061c87fbd35e62558c3913671e1b81cf5c9f6861bbe6314111e168c48880/pyoxigraph-0.5.6-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:aea454dc1182f08baa6b6d43987fbfbaf322c7830f0e02a78e53a91b8513f22b", size = 7431847, upload-time = "2026-03-14T21:07:43.267Z" }, + { url = "https://files.pythonhosted.org/packages/b4/ca/c026607e5b88d94ceb30ebd865443fdefc273e403dc54048ce7a3b107f02/pyoxigraph-0.5.6-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b9e80f79965d46ee84c8931b151390c5edec7bcd24a472a12a211c008f74c6f1", size = 7961348, upload-time = "2026-03-14T21:07:45.823Z" }, + { url = "https://files.pythonhosted.org/packages/de/4f/2c25ca45648a6aa21e03ba51f7195f3ba3b745a543d2b541dd12a40ff7e6/pyoxigraph-0.5.6-cp313-cp313-win_amd64.whl", hash = "sha256:7474294f67f68e5e3f09eb6d7f8c12044d850eec41425330e8fbf9d4c0f2085e", size = 5230125, upload-time = "2026-03-14T21:07:48.073Z" }, + { url = "https://files.pythonhosted.org/packages/00/54/d3ab2f6455aae90ac25eee13dc0c0a863a3c6d200e22f30f8c5994434d42/pyoxigraph-0.5.6-cp313-cp313-win_arm64.whl", hash = "sha256:fda4d490a56f1796b60f03dac69f5cd366bb26dbf5c92dcdad2f4fbac9a459e8", size = 4873532, upload-time = "2026-03-14T21:07:50.208Z" }, + { url = "https://files.pythonhosted.org/packages/d9/70/82b9c003458c9dcbb2f733250a12d5c187087ef4f4c2890d9b8602417549/pyoxigraph-0.5.6-cp313-cp313t-win_amd64.whl", hash = "sha256:bbd7a2966763f15adb7714faaf9fd853499dfe5ca03386000ef148ef355dad6f", size = 5227483, upload-time = "2026-03-14T21:07:52.411Z" }, + { url = "https://files.pythonhosted.org/packages/82/97/586fb0d599eb2144deec0ab4ae0091b853fc85264fc93705ebc569684b60/pyoxigraph-0.5.6-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:94e523532b3103d8612fd3301f095633eaf567fab11667efed36bb898654b150", size = 7430020, upload-time = "2026-03-14T21:07:54.805Z" }, + { url = "https://files.pythonhosted.org/packages/e4/c5/24164107dfb9eecaf794a0a6e0cdec1791d006bc4545713ea8954c8944fd/pyoxigraph-0.5.6-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:80539561bcf8cbae170099d9b2a46d2c2421bf7e64db11d12d46ad2bbea9fb28", size = 7957098, upload-time = "2026-03-14T21:07:57.841Z" }, + { url = "https://files.pythonhosted.org/packages/01/5c/80984a041553be6325ebe45493868db213eb8d4c522e73fe9899d10ce300/pyoxigraph-0.5.6-cp314-cp314-win_amd64.whl", hash = "sha256:9030dc72e8faca351cada7a39ccea1447abed2e1cb96a4c10e32aace131ec916", size = 5224414, upload-time = "2026-03-14T21:08:00.049Z" }, + { url = "https://files.pythonhosted.org/packages/af/f1/3714c2245539a838150f2d38ad5e4d01d0476491335a5dc9506bdec5024a/pyoxigraph-0.5.6-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:78ece614582b783d576eeb15cffa11ccb18b7d3a05a944351d0a5e8a20beecf3", size = 7421862, upload-time = "2026-03-14T21:08:02.461Z" }, + { url = "https://files.pythonhosted.org/packages/47/9c/f97f617269ad6237867f16f547b4da32d36bc41b8aba1426b7cf72cfa1e9/pyoxigraph-0.5.6-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:766e748dfa7391203e774ca052141ca31c94b948e11db0ea2387416aa68c8c38", size = 7953331, upload-time = "2026-03-14T21:08:04.891Z" }, + { url = "https://files.pythonhosted.org/packages/8e/88/a5fc4a95d9ed7c830a2b1a406da91d65b8cb4e9f05642e5a4399954fa13e/pyoxigraph-0.5.6-cp314-cp314t-win_amd64.whl", hash = "sha256:ac53e535248ca56c6495f297c552412a82dc3759898077e4dd559cc54d53e4d3", size = 5221705, upload-time = "2026-03-14T21:08:07.273Z" }, + { url = "https://files.pythonhosted.org/packages/85/40/62729562e1773814a2d0876f4a940e711d014507e6415eb06cf9cc634434/pyoxigraph-0.5.6-cp38-abi3-macosx_10_14_x86_64.whl", hash = "sha256:09c8ad0b90b895062554636d5bd1b55276d88bd774a846c4d24d598229854dfc", size = 6061386, upload-time = "2026-03-14T21:08:09.587Z" }, + { url = "https://files.pythonhosted.org/packages/59/8e/30bce4f9b272c9b17f89088a82614c185732193632cf3af6ba120c97b293/pyoxigraph-0.5.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:01496b787851d79d95849d34b6b5453f588b07f0d1edabec7f7d5eede8a216e4", size = 5551255, upload-time = "2026-03-14T21:08:11.911Z" }, + { url = "https://files.pythonhosted.org/packages/49/9d/37753e600a83f3f9114828f10ef600bce5c04cd39ed3ab392a57c367cdf6/pyoxigraph-0.5.6-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:43fe283de37965fcb8285f0ff446ede17b2c3bbc87d44fe629e5aacb0b95c78f", size = 7434866, upload-time = "2026-03-14T21:08:14.331Z" }, + { url = "https://files.pythonhosted.org/packages/4b/53/1222ca43232127ff31b7dec5801108d63d7d0645c034aaf2af35e518181b/pyoxigraph-0.5.6-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6946ec3aadfc884a09334b0d8e8751bed49385330772b7f1c6c5ab2db6081bf1", size = 7963169, upload-time = "2026-03-14T21:08:16.494Z" }, + { url = "https://files.pythonhosted.org/packages/33/e6/d43532e6c5a67a806b5979ab5c841fbd5fef879c42451e4f7fbe899613b0/pyoxigraph-0.5.6-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b3be9f17c013b383675b3fb9c61e61ceaa3f79c77a6c01b9deef672a4489f86c", size = 8628845, upload-time = "2026-03-14T21:08:19.343Z" }, + { url = "https://files.pythonhosted.org/packages/99/7f/4b4e0407c40613c0b16166c9fc78b5d544d2982a5bc5436ed3e17ee930d6/pyoxigraph-0.5.6-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:34d1fb2ac85d0e3b76a40d94c20504edf72d479f778e19bea79fc508994c90a7", size = 9175920, upload-time = "2026-03-14T21:08:22.576Z" }, + { url = "https://files.pythonhosted.org/packages/30/0e/4683e8f54c613dbfb97f31995a2d15e2fa7a8bfc8624aec7b419b7e83266/pyoxigraph-0.5.6-cp38-abi3-win_amd64.whl", hash = "sha256:93309ab2d7e41767b279ed21ffdf3c769139dd05695b52ec1bb0c404ae2eb730", size = 5231864, upload-time = "2026-03-14T21:08:24.849Z" }, + { url = "https://files.pythonhosted.org/packages/70/a8/7458b00c1948a168ffbedd98f2534e0de335f3e575bcc5c8b578178a6880/pyoxigraph-0.5.6-cp38-abi3-win_arm64.whl", hash = "sha256:98c5618d6dddc0c3193e4dcf615a78c860c61c8ac35d03a440fe334cf395f814", size = 4873521, upload-time = "2026-03-14T21:08:26.984Z" }, + { url = "https://files.pythonhosted.org/packages/c9/db/cf53fae102f9a3e15cff96828d8b96f88949f6de7509599893fdad2d5331/pyoxigraph-0.5.6-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:64e89a1497dac8f28052d93e725b0963c488ac64b954035d5a64754f144dedca", size = 7430165, upload-time = "2026-03-14T21:08:33.356Z" }, + { url = "https://files.pythonhosted.org/packages/c0/49/e593afa859496e746d8850cc3d721c733f6f441a807191e67365ca3f73f3/pyoxigraph-0.5.6-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ec8f4a1efe6255a6dc5a756365866eb6d3c02b88980b17f7604c3bbbec77c9e", size = 7956174, upload-time = "2026-03-14T21:08:36.315Z" }, + { url = "https://files.pythonhosted.org/packages/9d/c3/c94f7263e28ccf172333ccde8e94f7a3b417476515ab4dd4f110440321f3/pyoxigraph-0.5.6-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:0df0addc23174d7919355d16087c057463a40a7abe061ea4ad9c12ab8600398c", size = 5226707, upload-time = "2026-03-14T21:08:38.5Z" }, ] [[package]] @@ -4345,72 +4240,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/d2/4a73b18821fd4669762c855fd1f4e80ceb66fb72d71162d14da58444a763/rpds_py-0.28.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:5d0145edba8abd3db0ab22b5300c99dc152f5c9021fab861be0f0544dc3cbc5f", size = 552199, upload-time = "2025-10-22T22:24:26.54Z" }, ] -[[package]] -name = "ruamel-yaml" -version = "0.18.16" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ruamel-yaml-clib", marker = "python_full_version < '3.14' and platform_python_implementation == 'CPython'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9f/c7/ee630b29e04a672ecfc9b63227c87fd7a37eb67c1bf30fe95376437f897c/ruamel.yaml-0.18.16.tar.gz", hash = "sha256:a6e587512f3c998b2225d68aa1f35111c29fad14aed561a26e73fab729ec5e5a", size = 147269, upload-time = "2025-10-22T17:54:02.346Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0f/73/bb1bc2529f852e7bf64a2dec885e89ff9f5cc7bbf6c9340eed30ff2c69c5/ruamel.yaml-0.18.16-py3-none-any.whl", hash = "sha256:048f26d64245bae57a4f9ef6feb5b552a386830ef7a826f235ffb804c59efbba", size = 119858, upload-time = "2025-10-22T17:53:59.012Z" }, -] - -[[package]] -name = "ruamel-yaml-clib" -version = "0.2.14" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d8/e9/39ec4d4b3f91188fad1842748f67d4e749c77c37e353c4e545052ee8e893/ruamel.yaml.clib-0.2.14.tar.gz", hash = "sha256:803f5044b13602d58ea378576dd75aa759f52116a0232608e8fdada4da33752e", size = 225394, upload-time = "2025-09-22T19:51:23.753Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/56/35a0a752415ae01992c68f5a6513bdef0e1b6fbdb60d7619342ce12346a0/ruamel.yaml.clib-0.2.14-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f8b2acb0ffdd2ce8208accbec2dca4a06937d556fdcaefd6473ba1b5daa7e3c4", size = 269216, upload-time = "2025-09-23T14:24:09.742Z" }, - { url = "https://files.pythonhosted.org/packages/98/6a/9a68184ab93619f4607ff1675e4ef01e8accfcbff0d482f4ca44c10d8eab/ruamel.yaml.clib-0.2.14-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:aef953f3b8bd0b50bd52a2e52fb54a6a2171a1889d8dea4a5959d46c6624c451", size = 137092, upload-time = "2025-09-22T19:50:26.906Z" }, - { url = "https://files.pythonhosted.org/packages/2b/3f/cfed5f088628128a9ec66f46794fd4d165642155c7b78c26d83b16c6bf7b/ruamel.yaml.clib-0.2.14-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a0ac90efbc7a77b0d796c03c8cc4e62fd710b3f1e4c32947713ef2ef52e09543", size = 633768, upload-time = "2025-09-22T19:50:31.228Z" }, - { url = "https://files.pythonhosted.org/packages/3a/d5/5ce2cc156c1da48160171968d91f066d305840fbf930ee955a509d025a44/ruamel.yaml.clib-0.2.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9bf6b699223afe6c7fe9f2ef76e0bfa6dd892c21e94ce8c957478987ade76cd8", size = 721253, upload-time = "2025-09-22T19:50:28.776Z" }, - { url = "https://files.pythonhosted.org/packages/2b/71/d0b56bc902b38ebe4be8e270f730f929eec4edaf8a0fa7028f4ef64fa950/ruamel.yaml.clib-0.2.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d73a0187718f6eec5b2f729b0f98e4603f7bd9c48aa65d01227d1a5dcdfbe9e8", size = 683823, upload-time = "2025-09-22T19:50:29.993Z" }, - { url = "https://files.pythonhosted.org/packages/4b/db/1f37449dd89c540218598316ccafc1a0aed60215e72efa315c5367cfd015/ruamel.yaml.clib-0.2.14-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:81f6d3b19bc703679a5705c6a16dabdc79823c71d791d73c65949be7f3012c02", size = 690370, upload-time = "2025-09-23T18:42:46.797Z" }, - { url = "https://files.pythonhosted.org/packages/5d/53/c498b30f35efcd9f47cb084d7ad9374f2b907470f73913dec6396b81397d/ruamel.yaml.clib-0.2.14-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b28caeaf3e670c08cb7e8de221266df8494c169bd6ed8875493fab45be9607a4", size = 703578, upload-time = "2025-09-22T19:50:32.531Z" }, - { url = "https://files.pythonhosted.org/packages/34/79/492cfad9baed68914840c39e5f3c1cc251f51a897ddb3f532601215cbb12/ruamel.yaml.clib-0.2.14-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:94f3efb718f8f49b031f2071ec7a27dd20cbfe511b4dfd54ecee54c956da2b31", size = 722544, upload-time = "2025-09-22T19:50:34.157Z" }, - { url = "https://files.pythonhosted.org/packages/ca/f5/479ebfd5ba396e209ade90f7282d84b90c57b3e07be8dc6fcd02a6df7ffc/ruamel.yaml.clib-0.2.14-cp310-cp310-win32.whl", hash = "sha256:27c070cf3888e90d992be75dd47292ff9aa17dafd36492812a6a304a1aedc182", size = 100375, upload-time = "2025-09-22T19:50:36.832Z" }, - { url = "https://files.pythonhosted.org/packages/57/31/a044520fdb3bd409889f67f1efebda0658033c7ab3f390cee37531cc9a9e/ruamel.yaml.clib-0.2.14-cp310-cp310-win_amd64.whl", hash = "sha256:4f4a150a737fccae13fb51234d41304ff2222e3b7d4c8e9428ed1a6ab48389b8", size = 118129, upload-time = "2025-09-22T19:50:35.545Z" }, - { url = "https://files.pythonhosted.org/packages/b3/9f/3c51e9578b8c36fcc4bdd271a1a5bb65963a74a4b6ad1a989768a22f6c2a/ruamel.yaml.clib-0.2.14-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5bae1a073ca4244620425cd3d3aa9746bde590992b98ee8c7c8be8c597ca0d4e", size = 270207, upload-time = "2025-09-23T14:24:11.445Z" }, - { url = "https://files.pythonhosted.org/packages/4a/16/cb02815bc2ae9c66760c0c061d23c7358f9ba51dae95ac85247662b7fbe2/ruamel.yaml.clib-0.2.14-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:0a54e5e40a7a691a426c2703b09b0d61a14294d25cfacc00631aa6f9c964df0d", size = 137780, upload-time = "2025-09-22T19:50:37.734Z" }, - { url = "https://files.pythonhosted.org/packages/31/c6/fc687cd1b93bff8e40861eea46d6dc1a6a778d9a085684e4045ff26a8e40/ruamel.yaml.clib-0.2.14-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:10d9595b6a19778f3269399eff6bab642608e5966183abc2adbe558a42d4efc9", size = 641590, upload-time = "2025-09-22T19:50:41.978Z" }, - { url = "https://files.pythonhosted.org/packages/45/5d/65a2bc08b709b08576b3f307bf63951ee68a8e047cbbda6f1c9864ecf9a7/ruamel.yaml.clib-0.2.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dba72975485f2b87b786075e18a6e5d07dc2b4d8973beb2732b9b2816f1bad70", size = 738090, upload-time = "2025-09-22T19:50:39.152Z" }, - { url = "https://files.pythonhosted.org/packages/fb/d0/a70a03614d9a6788a3661ab1538879ed2aae4e84d861f101243116308a37/ruamel.yaml.clib-0.2.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29757bdb7c142f9595cc1b62ec49a3d1c83fab9cef92db52b0ccebaad4eafb98", size = 700744, upload-time = "2025-09-22T19:50:40.811Z" }, - { url = "https://files.pythonhosted.org/packages/77/30/c93fa457611f79946d5cb6cc97493ca5425f3f21891d7b1f9b44eaa1b38e/ruamel.yaml.clib-0.2.14-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:557df28dbccf79b152fe2d1b935f6063d9cc431199ea2b0e84892f35c03bb0ee", size = 742321, upload-time = "2025-09-23T18:42:48.916Z" }, - { url = "https://files.pythonhosted.org/packages/40/85/e2c54ad637117cd13244a4649946eaa00f32edcb882d1f92df90e079ab00/ruamel.yaml.clib-0.2.14-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:26a8de280ab0d22b6e3ec745b4a5a07151a0f74aad92dd76ab9c8d8d7087720d", size = 743805, upload-time = "2025-09-22T19:50:43.58Z" }, - { url = "https://files.pythonhosted.org/packages/81/50/f899072c38877d8ef5382e0b3d47f8c4346226c1f52d6945d6f64fec6a2f/ruamel.yaml.clib-0.2.14-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e501c096aa3889133d674605ebd018471bc404a59cbc17da3c5924421c54d97c", size = 769529, upload-time = "2025-09-22T19:50:45.707Z" }, - { url = "https://files.pythonhosted.org/packages/99/7c/96d4b5075e30c65ea2064e40c2d657c7c235d7b6ef18751cf89a935b9041/ruamel.yaml.clib-0.2.14-cp311-cp311-win32.whl", hash = "sha256:915748cfc25b8cfd81b14d00f4bfdb2ab227a30d6d43459034533f4d1c207a2a", size = 100256, upload-time = "2025-09-22T19:50:48.26Z" }, - { url = "https://files.pythonhosted.org/packages/7d/8c/73ee2babd04e8bfcf1fd5c20aa553d18bf0ebc24b592b4f831d12ae46cc0/ruamel.yaml.clib-0.2.14-cp311-cp311-win_amd64.whl", hash = "sha256:4ccba93c1e5a40af45b2f08e4591969fa4697eae951c708f3f83dcbf9f6c6bb1", size = 118234, upload-time = "2025-09-22T19:50:47.019Z" }, - { url = "https://files.pythonhosted.org/packages/b4/42/ccfb34a25289afbbc42017e4d3d4288e61d35b2e00cfc6b92974a6a1f94b/ruamel.yaml.clib-0.2.14-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:6aeadc170090ff1889f0d2c3057557f9cd71f975f17535c26a5d37af98f19c27", size = 271775, upload-time = "2025-09-23T14:24:12.771Z" }, - { url = "https://files.pythonhosted.org/packages/82/73/e628a92e80197ff6a79ab81ec3fa00d4cc082d58ab78d3337b7ba7043301/ruamel.yaml.clib-0.2.14-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5e56ac47260c0eed992789fa0b8efe43404a9adb608608631a948cee4fc2b052", size = 138842, upload-time = "2025-09-22T19:50:49.156Z" }, - { url = "https://files.pythonhosted.org/packages/2b/c5/346c7094344a60419764b4b1334d9e0285031c961176ff88ffb652405b0c/ruamel.yaml.clib-0.2.14-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a911aa73588d9a8b08d662b9484bc0567949529824a55d3885b77e8dd62a127a", size = 647404, upload-time = "2025-09-22T19:50:52.921Z" }, - { url = "https://files.pythonhosted.org/packages/df/99/65080c863eb06d4498de3d6c86f3e90595e02e159fd8529f1565f56cfe2c/ruamel.yaml.clib-0.2.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a05ba88adf3d7189a974b2de7a9d56731548d35dc0a822ec3dc669caa7019b29", size = 753141, upload-time = "2025-09-22T19:50:50.294Z" }, - { url = "https://files.pythonhosted.org/packages/3d/e3/0de85f3e3333f8e29e4b10244374a202a87665d1131798946ee22cf05c7c/ruamel.yaml.clib-0.2.14-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb04c5650de6668b853623eceadcdb1a9f2fee381f5d7b6bc842ee7c239eeec4", size = 703477, upload-time = "2025-09-22T19:50:51.508Z" }, - { url = "https://files.pythonhosted.org/packages/d9/25/0d2f09d8833c7fd77ab8efeff213093c16856479a9d293180a0d89f6bed9/ruamel.yaml.clib-0.2.14-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:df3ec9959241d07bc261f4983d25a1205ff37703faf42b474f15d54d88b4f8c9", size = 741157, upload-time = "2025-09-23T18:42:50.408Z" }, - { url = "https://files.pythonhosted.org/packages/d3/8c/959f10c2e2153cbdab834c46e6954b6dd9e3b109c8f8c0a3cf1618310985/ruamel.yaml.clib-0.2.14-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fbc08c02e9b147a11dfcaa1ac8a83168b699863493e183f7c0c8b12850b7d259", size = 745859, upload-time = "2025-09-22T19:50:54.497Z" }, - { url = "https://files.pythonhosted.org/packages/ed/6b/e580a7c18b485e1a5f30a32cda96b20364b0ba649d9d2baaf72f8bd21f83/ruamel.yaml.clib-0.2.14-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c099cafc1834d3c5dac305865d04235f7c21c167c8dd31ebc3d6bbc357e2f023", size = 770200, upload-time = "2025-09-22T19:50:55.718Z" }, - { url = "https://files.pythonhosted.org/packages/ef/44/3455eebc761dc8e8fdced90f2b0a3fa61e32ba38b50de4130e2d57db0f21/ruamel.yaml.clib-0.2.14-cp312-cp312-win32.whl", hash = "sha256:b5b0f7e294700b615a3bcf6d28b26e6da94e8eba63b079f4ec92e9ba6c0d6b54", size = 98829, upload-time = "2025-09-22T19:50:58.895Z" }, - { url = "https://files.pythonhosted.org/packages/76/ab/5121f7f3b651db93de546f8c982c241397aad0a4765d793aca1dac5eadee/ruamel.yaml.clib-0.2.14-cp312-cp312-win_amd64.whl", hash = "sha256:a37f40a859b503304dd740686359fcf541d6fb3ff7fc10f539af7f7150917c68", size = 115570, upload-time = "2025-09-22T19:50:57.981Z" }, - { url = "https://files.pythonhosted.org/packages/d7/ae/e3811f05415594025e96000349d3400978adaed88d8f98d494352d9761ee/ruamel.yaml.clib-0.2.14-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7e4f9da7e7549946e02a6122dcad00b7c1168513acb1f8a726b1aaf504a99d32", size = 269205, upload-time = "2025-09-23T14:24:15.06Z" }, - { url = "https://files.pythonhosted.org/packages/72/06/7d51f4688d6d72bb72fa74254e1593c4f5ebd0036be5b41fe39315b275e9/ruamel.yaml.clib-0.2.14-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:dd7546c851e59c06197a7c651335755e74aa383a835878ca86d2c650c07a2f85", size = 137417, upload-time = "2025-09-22T19:50:59.82Z" }, - { url = "https://files.pythonhosted.org/packages/5a/08/b4499234a420ef42960eeb05585df5cc7eb25ccb8c980490b079e6367050/ruamel.yaml.clib-0.2.14-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:1c1acc3a0209ea9042cc3cfc0790edd2eddd431a2ec3f8283d081e4d5018571e", size = 642558, upload-time = "2025-09-22T19:51:03.388Z" }, - { url = "https://files.pythonhosted.org/packages/b6/ba/1975a27dedf1c4c33306ee67c948121be8710b19387aada29e2f139c43ee/ruamel.yaml.clib-0.2.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2070bf0ad1540d5c77a664de07ebcc45eebd1ddcab71a7a06f26936920692beb", size = 744087, upload-time = "2025-09-22T19:51:00.897Z" }, - { url = "https://files.pythonhosted.org/packages/20/15/8a19a13d27f3bd09fa18813add8380a29115a47b553845f08802959acbce/ruamel.yaml.clib-0.2.14-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd8fe07f49c170e09d76773fb86ad9135e0beee44f36e1576a201b0676d3d1d", size = 699709, upload-time = "2025-09-22T19:51:02.075Z" }, - { url = "https://files.pythonhosted.org/packages/19/ee/8d6146a079ad21e534b5083c9ee4a4c8bec42f79cf87594b60978286b39a/ruamel.yaml.clib-0.2.14-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ff86876889ea478b1381089e55cf9e345707b312beda4986f823e1d95e8c0f59", size = 708926, upload-time = "2025-09-23T18:42:51.707Z" }, - { url = "https://files.pythonhosted.org/packages/a9/f5/426b714abdc222392e68f3b8ad323930d05a214a27c7e7a0f06c69126401/ruamel.yaml.clib-0.2.14-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1f118b707eece8cf84ecbc3e3ec94d9db879d85ed608f95870d39b2d2efa5dca", size = 740202, upload-time = "2025-09-22T19:51:04.673Z" }, - { url = "https://files.pythonhosted.org/packages/3d/ac/3c5c2b27a183f4fda8a57c82211721c016bcb689a4a175865f7646db9f94/ruamel.yaml.clib-0.2.14-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b30110b29484adc597df6bd92a37b90e63a8c152ca8136aad100a02f8ba6d1b6", size = 765196, upload-time = "2025-09-22T19:51:05.916Z" }, - { url = "https://files.pythonhosted.org/packages/92/2e/06f56a71fd55021c993ed6e848c9b2e5e9cfce180a42179f0ddd28253f7c/ruamel.yaml.clib-0.2.14-cp313-cp313-win32.whl", hash = "sha256:f4e97a1cf0b7a30af9e1d9dad10a5671157b9acee790d9e26996391f49b965a2", size = 98635, upload-time = "2025-09-22T19:51:08.183Z" }, - { url = "https://files.pythonhosted.org/packages/51/79/76aba16a1689b50528224b182f71097ece338e7a4ab55e84c2e73443b78a/ruamel.yaml.clib-0.2.14-cp313-cp313-win_amd64.whl", hash = "sha256:090782b5fb9d98df96509eecdbcaffd037d47389a89492320280d52f91330d78", size = 115238, upload-time = "2025-09-22T19:51:07.081Z" }, - { url = "https://files.pythonhosted.org/packages/21/e2/a59ff65c26aaf21a24eb38df777cb9af5d87ba8fc8107c163c2da9d1e85e/ruamel.yaml.clib-0.2.14-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:7df6f6e9d0e33c7b1d435defb185095386c469109de723d514142632a7b9d07f", size = 271441, upload-time = "2025-09-23T14:24:16.498Z" }, - { url = "https://files.pythonhosted.org/packages/6b/fa/3234f913fe9a6525a7b97c6dad1f51e72b917e6872e051a5e2ffd8b16fbb/ruamel.yaml.clib-0.2.14-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:70eda7703b8126f5e52fcf276e6c0f40b0d314674f896fc58c47b0aef2b9ae83", size = 137970, upload-time = "2025-09-22T19:51:09.472Z" }, - { url = "https://files.pythonhosted.org/packages/ef/ec/4edbf17ac2c87fa0845dd366ef8d5852b96eb58fcd65fc1ecf5fe27b4641/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a0cb71ccc6ef9ce36eecb6272c81afdc2f565950cdcec33ae8e6cd8f7fc86f27", size = 739639, upload-time = "2025-09-22T19:51:10.566Z" }, - { url = "https://files.pythonhosted.org/packages/15/18/b0e1fafe59051de9e79cdd431863b03593ecfa8341c110affad7c8121efc/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7cb9ad1d525d40f7d87b6df7c0ff916a66bc52cb61b66ac1b2a16d0c1b07640", size = 764456, upload-time = "2025-09-22T19:51:11.736Z" }, - { url = "https://files.pythonhosted.org/packages/e7/cd/150fdb96b8fab27fe08d8a59fe67554568727981806e6bc2677a16081ec7/ruamel_yaml_clib-0.2.14-cp314-cp314-win32.whl", hash = "sha256:9b4104bf43ca0cd4e6f738cb86326a3b2f6eef00f417bd1e7efb7bdffe74c539", size = 102394, upload-time = "2025-11-14T21:57:36.703Z" }, - { url = "https://files.pythonhosted.org/packages/bd/e6/a3fa40084558c7e1dc9546385f22a93949c890a8b2e445b2ba43935f51da/ruamel_yaml_clib-0.2.14-cp314-cp314-win_amd64.whl", hash = "sha256:13997d7d354a9890ea1ec5937a219817464e5cc344805b37671562a401ca3008", size = 122673, upload-time = "2025-11-14T21:57:38.177Z" }, -] - [[package]] name = "send2trash" version = "1.8.3" From 2bbb3412eb61a86625b3c2bb10da9426e312648c Mon Sep 17 00:00:00 2001 From: Nico Matentzoglu Date: Fri, 17 Apr 2026 15:57:05 +0300 Subject: [PATCH 10/15] Run pre-commit --- packages/linkml/src/linkml/generators/owlgen.py | 3 +-- packages/linkml/src/linkml/generators/rdfgen.py | 3 +-- packages/linkml/src/linkml/generators/shaclgen.py | 3 +-- packages/linkml/src/linkml/generators/shexgen.py | 3 +-- .../src/linkml_runtime/dumpers/rdflib_dumper.py | 2 +- .../src/linkml_runtime/utils/rdf_canonicalize.py | 11 ++++++----- tests/linkml/test_generators/test_shaclgen.py | 4 +--- tests/linkml/test_issues/conftest.py | 1 - tests/linkml/test_scripts/test_gen_jsonld.py | 3 +-- 9 files changed, 13 insertions(+), 20 deletions(-) diff --git a/packages/linkml/src/linkml/generators/owlgen.py b/packages/linkml/src/linkml/generators/owlgen.py index 698db904ef..51c2c941a4 100644 --- a/packages/linkml/src/linkml/generators/owlgen.py +++ b/packages/linkml/src/linkml/generators/owlgen.py @@ -18,8 +18,6 @@ from rdflib.plugin import Parser as rdflib_Parser from rdflib.plugin import plugins as rdflib_plugins -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph - from linkml import METAMODEL_NAMESPACE_NAME from linkml._version import __version__ from linkml.generators.common.subproperty import is_xsd_anyuri_range @@ -45,6 +43,7 @@ ) from linkml_runtime.utils.formatutils import camelcase, underscore from linkml_runtime.utils.introspection import package_schemaview +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph logger = logging.getLogger(__name__) diff --git a/packages/linkml/src/linkml/generators/rdfgen.py b/packages/linkml/src/linkml/generators/rdfgen.py index 28e0cf6a9c..a3fcf6a848 100644 --- a/packages/linkml/src/linkml/generators/rdfgen.py +++ b/packages/linkml/src/linkml/generators/rdfgen.py @@ -15,13 +15,12 @@ from rdflib.plugin import Parser as rdflib_Parser from rdflib.plugin import plugins as rdflib_plugins -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph - from linkml import LOCAL_METAMODEL_LDCONTEXT_FILE from linkml._version import __version__ from linkml.generators.jsonldgen import JSONLDGenerator from linkml.utils.generator import Generator, shared_arguments from linkml_runtime.linkml_model import SchemaDefinition +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph @dataclass diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index 35febccf8b..97f25b21da 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -10,8 +10,6 @@ from rdflib.collection import Collection from rdflib.namespace import RDF, RDFS, SH, XSD -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph - from linkml._version import __version__ from linkml.generators.common.subproperty import get_subproperty_values, is_uri_range from linkml.generators.shacl.shacl_data_type import ShaclDataType @@ -19,6 +17,7 @@ from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName, PresenceEnum from linkml_runtime.utils.formatutils import underscore +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.yamlutils import TypedNode, extended_float, extended_int, extended_str logger = logging.getLogger(__name__) diff --git a/packages/linkml/src/linkml/generators/shexgen.py b/packages/linkml/src/linkml/generators/shexgen.py index 093778d019..40a93ffbc9 100644 --- a/packages/linkml/src/linkml/generators/shexgen.py +++ b/packages/linkml/src/linkml/generators/shexgen.py @@ -11,8 +11,6 @@ from ShExJSG.SchemaWithContext import Schema from ShExJSG.ShExJ import IRIREF, EachOf, NodeConstraint, Shape, ShapeOr, TripleConstraint -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph - from linkml import METAMODEL_NAMESPACE, METAMODEL_NAMESPACE_NAME from linkml._version import __version__ from linkml.generators.common.subproperty import get_subproperty_values @@ -28,6 +26,7 @@ from linkml_runtime.linkml_model.types import SHEX from linkml_runtime.utils.formatutils import camelcase, sfx from linkml_runtime.utils.metamodelcore import URIorCURIE +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph @dataclass diff --git a/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py b/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py index 8362871e6d..ed3797f86d 100644 --- a/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py +++ b/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py @@ -9,8 +9,8 @@ from rdflib.term import BNode, Literal, Node from linkml_runtime.dumpers.dumper_root import Dumper -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.linkml_model import ElementName, PermissibleValue, PermissibleValueText, SlotDefinition +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.schemaview import SchemaView from linkml_runtime.utils.yamlutils import YAMLRoot diff --git a/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py index 4ff4a08389..d903850180 100644 --- a/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py +++ b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py @@ -29,7 +29,6 @@ import io import logging -from typing import Optional import pyoxigraph as ox import rdflib @@ -93,8 +92,7 @@ def canonicalize_rdf_graph( triples = list(ox.parse(io.BytesIO(nt_bytes), format=ox.RdfFormat.N_TRIPLES)) except SyntaxError: logger.warning( - "Graph contains non-standard RDF that pyoxigraph cannot parse; " - "falling back to rdflib serializer" + "Graph contains non-standard RDF that pyoxigraph cannot parse; falling back to rdflib serializer" ) return graph.serialize(format=output_format) @@ -114,7 +112,7 @@ def canonicalize_rdf_graph( # 5. Collect prefixes for formats that support them. base_iri = str(graph.base) if graph.base else None - prefixes: Optional[dict[str, str]] = None + prefixes: dict[str, str] | None = None if ox_format in _PREFIX_FORMATS: prefixes = {} for prefix, namespace in graph.namespace_manager.namespaces(): @@ -128,6 +126,9 @@ def canonicalize_rdf_graph( continue prefixes[str(prefix)] = ns_str result_bytes = ox.serialize( - sorted_triples, format=ox_format, prefixes=prefixes, base_iri=base_iri, + sorted_triples, + format=ox_format, + prefixes=prefixes, + base_iri=base_iri, ) return result_bytes.decode("utf-8") diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index 80f3b7fb2a..ccf477ed22 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -387,9 +387,7 @@ def check_slot_default_value(slot: URIRef, default_value: Any, datatype: str = N if datatype and str(datatype) == "http://www.w3.org/2001/XMLSchema#string": if (subject, SH.defaultValue, Literal(default_value)) in g: return - raise AssertionError( - f"Expected ({subject}, sh:defaultValue, {expected!r}) not found in graph" - ) + raise AssertionError(f"Expected ({subject}, sh:defaultValue, {expected!r}) not found in graph") check_slot_default_value( URIRef("https://w3id.org/linkml/tests/kitchen_sink/ifabsent_string"), diff --git a/tests/linkml/test_issues/conftest.py b/tests/linkml/test_issues/conftest.py index 3e33633d55..ca763d7f7e 100644 --- a/tests/linkml/test_issues/conftest.py +++ b/tests/linkml/test_issues/conftest.py @@ -3,7 +3,6 @@ import pytest import rdflib -from rdflib.compare import to_canonical_graph from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph diff --git a/tests/linkml/test_scripts/test_gen_jsonld.py b/tests/linkml/test_scripts/test_gen_jsonld.py index dd93e857f7..3b1305f254 100644 --- a/tests/linkml/test_scripts/test_gen_jsonld.py +++ b/tests/linkml/test_scripts/test_gen_jsonld.py @@ -4,11 +4,10 @@ from click.testing import CliRunner from rdflib import Graph, URIRef -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph - from linkml import METAMODEL_NAMESPACE from linkml.generators.jsonldcontextgen import ContextGenerator from linkml.generators.jsonldgen import JSONLDGenerator, cli +from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph from tests.conftest import KITCHEN_SINK_PATH From f472dce328be84e72778cf980c6e101a46dcf4ed Mon Sep 17 00:00:00 2001 From: Nico Matentzoglu Date: Wed, 29 Apr 2026 16:39:04 +0300 Subject: [PATCH 11/15] Update tests and fixtures --- examples/tutorial/tutorial01/data.ttl | 42 +- .../tutorial/tutorial04/data-semantic.ttl | 59 +- examples/tutorial/tutorial04/data.ttl | 54 +- .../tutorial04/personinfo-semantic.shacl.ttl | 111 ++-- .../linkml_runtime/utils/rdf_canonicalize.py | 53 +- .../test_generators/rdf/test_rdfgen_binary.py | 89 +-- .../__snapshots__/linkml_issue_384.other.txt | 28 +- .../__snapshots__/linkml_issue_384.owl.txt | 528 +++++++++--------- 8 files changed, 550 insertions(+), 414 deletions(-) diff --git a/examples/tutorial/tutorial01/data.ttl b/examples/tutorial/tutorial01/data.ttl index 5c8468d1fd..fff62c9b84 100644 --- a/examples/tutorial/tutorial01/data.ttl +++ b/examples/tutorial/tutorial01/data.ttl @@ -1,7 +1,39 @@ @prefix personinfo: . +@prefix rdf: . +@prefix wgs: . +@prefix geo: . +@prefix rdfs: . +@prefix brick: . +@prefix skos: . +@prefix xml: . +@prefix xsd: . +@prefix qb: . +@prefix dc: . +@prefix owl: . +@prefix doap: . +@prefix prof: . +@prefix dcmitype: . +@prefix odrl: . +@prefix time: . +@prefix vann: . +@prefix sh: . +@prefix csvw: . +@prefix shex: . +@prefix dcat: . +@prefix sosa: . +@prefix foaf: . +@prefix prov: . +@prefix org: . +@prefix ssn: . +@prefix dcterms: . +@prefix dcam: . +@prefix void: . +@prefix linkml: . +@prefix schema: . +@prefix schema1: . +_:c14n0 a personinfo:Person ; + personinfo:age "32" ; + personinfo:full_name "Clark Kent" ; + personinfo:id "ORCID:1234" ; + personinfo:phone "555-555-5555" . -[] a personinfo:Person ; - personinfo:age "32" ; - personinfo:full_name "Clark Kent" ; - personinfo:id "ORCID:1234" ; - personinfo:phone "555-555-5555" . diff --git a/examples/tutorial/tutorial04/data-semantic.ttl b/examples/tutorial/tutorial04/data-semantic.ttl index f5fbd33412..380c50d030 100644 --- a/examples/tutorial/tutorial04/data-semantic.ttl +++ b/examples/tutorial/tutorial04/data-semantic.ttl @@ -1,17 +1,48 @@ -@prefix ORCID: . +@prefix oboInOwl: . @prefix personinfo: . -@prefix schema1: . +@prefix rdf: . +@prefix wgs: . +@prefix faldo: . +@prefix brick: . +@prefix geo: . +@prefix rdfs: . +@prefix skos: . +@prefix xml: . +@prefix qb: . @prefix xsd: . +@prefix dc: . +@prefix owl: . +@prefix doap: . +@prefix prof: . +@prefix dcmitype: . +@prefix odrl: . +@prefix time: . +@prefix sh: . +@prefix vann: . +@prefix shex: . +@prefix sosa: . +@prefix dcat: . +@prefix foaf: . +@prefix csvw: . +@prefix prov: . +@prefix dcterms: . +@prefix ssn: . +@prefix org: . +@prefix linkml: . +@prefix oa: . +@prefix dcam: . +@prefix void: . +@prefix idot: . +@prefix schema: . +@prefix schema1: . +@prefix ORCID: . +ORCID:1234 schema1:name "Clark Kent" ; + schema1:telephone "555-555-5555" ; + a schema1:Person ; + personinfo:age 33 . +ORCID:4567 schema1:name "Lois Lane" ; + a schema1:Person ; + personinfo:age 34 . +_:c14n0 a personinfo:Container ; + personinfo:persons ORCID:1234 , ORCID:4567 . -ORCID:1234 a schema1:Person ; - schema1:name "Clark Kent" ; - schema1:telephone "555-555-5555" ; - personinfo:age 33 . - -ORCID:4567 a schema1:Person ; - schema1:name "Lois Lane" ; - personinfo:age 34 . - -[] a personinfo:Container ; - personinfo:persons ORCID:1234, - ORCID:4567 . diff --git a/examples/tutorial/tutorial04/data.ttl b/examples/tutorial/tutorial04/data.ttl index a18726325e..bc1a04229e 100644 --- a/examples/tutorial/tutorial04/data.ttl +++ b/examples/tutorial/tutorial04/data.ttl @@ -1,16 +1,48 @@ -@prefix ORCID: . +@prefix oboInOwl: . @prefix personinfo: . +@prefix rdf: . +@prefix wgs: . +@prefix faldo: . +@prefix brick: . +@prefix geo: . +@prefix rdfs: . +@prefix skos: . +@prefix xml: . +@prefix qb: . @prefix xsd: . - +@prefix dc: . +@prefix owl: . +@prefix doap: . +@prefix prof: . +@prefix dcmitype: . +@prefix odrl: . +@prefix time: . +@prefix sh: . +@prefix vann: . +@prefix shex: . +@prefix sosa: . +@prefix dcat: . +@prefix foaf: . +@prefix csvw: . +@prefix prov: . +@prefix dcterms: . +@prefix ssn: . +@prefix org: . +@prefix linkml: . +@prefix oa: . +@prefix dcam: . +@prefix void: . +@prefix idot: . +@prefix schema: . +@prefix schema1: . +@prefix ORCID: . ORCID:1234 a personinfo:Person ; - personinfo:age 33 ; - personinfo:full_name "Clark Kent" ; - personinfo:phone "555-555-5555" . - + personinfo:age 33 ; + personinfo:full_name "Clark Kent" ; + personinfo:phone "555-555-5555" . ORCID:4567 a personinfo:Person ; - personinfo:age 34 ; - personinfo:full_name "Lois Lane" . + personinfo:age 34 ; + personinfo:full_name "Lois Lane" . +_:c14n0 a personinfo:Container ; + personinfo:persons ORCID:1234 , ORCID:4567 . -[] a personinfo:Container ; - personinfo:persons ORCID:1234, - ORCID:4567 . diff --git a/examples/tutorial/tutorial04/personinfo-semantic.shacl.ttl b/examples/tutorial/tutorial04/personinfo-semantic.shacl.ttl index ee2237b062..4925ac8cd6 100644 --- a/examples/tutorial/tutorial04/personinfo-semantic.shacl.ttl +++ b/examples/tutorial/tutorial04/personinfo-semantic.shacl.ttl @@ -1,39 +1,82 @@ @prefix personinfo: . @prefix rdf: . -@prefix schema1: . -@prefix sh: . +@prefix wgs: . +@prefix brick: . +@prefix geo: . +@prefix rdfs: . +@prefix skos: . +@prefix xml: . +@prefix qb: . @prefix xsd: . - +@prefix dc: . +@prefix owl: . +@prefix doap: . +@prefix prof: . +@prefix dcmitype: . +@prefix odrl: . +@prefix time: . +@prefix vann: . +@prefix sh: . +@prefix dcat: . +@prefix csvw: . +@prefix sosa: . +@prefix foaf: . +@prefix prov: . +@prefix org: . +@prefix ssn: . +@prefix dcterms: . +@prefix linkml: . +@prefix void: . +@prefix dcam: . +@prefix schema: . +@prefix schema1: . +@prefix ORCID: . +schema1:Person a sh:NodeShape ; + sh:closed true ; + sh:ignoredProperties _:c14n7 ; + sh:property _:c14n0 , _:c14n2 , _:c14n3 , _:c14n4 , _:c14n6 ; + sh:targetClass schema1:Person . personinfo:Container a sh:NodeShape ; - sh:closed true ; - sh:ignoredProperties ( rdf:type ) ; - sh:property [ sh:class schema1:Person ; - sh:nodeKind sh:IRI ; - sh:order 0 ; - sh:path personinfo:persons ] ; - sh:targetClass personinfo:Container . + sh:closed true ; + sh:ignoredProperties _:c14n1 ; + sh:property _:c14n5 ; + sh:targetClass personinfo:Container . +_:c14n0 sh:datatype xsd:string ; + sh:description "name of the person" ; + sh:maxCount 1 ; + sh:minCount 1 ; + sh:nodeKind sh:Literal ; + sh:order 1 ; + sh:path schema1:name . +_:c14n1 rdf:first rdf:type ; + rdf:rest rdf:nil . +_:c14n2 sh:datatype xsd:string ; + sh:maxCount 1 ; + sh:nodeKind sh:Literal ; + sh:order 0 ; + sh:path personinfo:id . +_:c14n3 sh:datatype xsd:integer ; + sh:maxCount 1 ; + sh:maxInclusive 200 ; + sh:minInclusive 0 ; + sh:nodeKind sh:Literal ; + sh:order 4 ; + sh:path personinfo:age . +_:c14n4 sh:datatype xsd:string ; + sh:maxCount 1 ; + sh:nodeKind sh:Literal ; + sh:order 3 ; + sh:path schema1:telephone ; + sh:pattern "^[\\d\\(\\)\\-]+$" . +_:c14n5 sh:class schema1:Person ; + sh:nodeKind sh:IRI ; + sh:order 0 ; + sh:path personinfo:persons . +_:c14n6 sh:datatype xsd:string ; + sh:description "other names for the person" ; + sh:nodeKind sh:Literal ; + sh:order 2 ; + sh:path personinfo:aliases . +_:c14n7 rdf:first rdf:type ; + rdf:rest rdf:nil . -schema1:Person a sh:NodeShape ; - sh:closed true ; - sh:ignoredProperties ( rdf:type ) ; - sh:property [ sh:maxCount 1 ; - sh:maxInclusive 200 ; - sh:minInclusive 0 ; - sh:order 4 ; - sh:path personinfo:age ], - [ sh:description "name of the person" ; - sh:maxCount 1 ; - sh:minCount 1 ; - sh:order 1 ; - sh:path schema1:name ], - [ sh:maxCount 1 ; - sh:order 0 ; - sh:path personinfo:id ], - [ sh:description "other names for the person" ; - sh:order 2 ; - sh:path personinfo:aliases ], - [ sh:maxCount 1 ; - sh:order 3 ; - sh:path schema1:telephone ; - sh:pattern "^[\\d\\(\\)\\-]+$" ] ; - sh:targetClass schema1:Person . diff --git a/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py index d903850180..20656690ea 100644 --- a/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py +++ b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py @@ -55,6 +55,27 @@ _PREFIX_FORMATS = frozenset({ox.RdfFormat.TURTLE, ox.RdfFormat.TRIG, ox.RdfFormat.N3, ox.RdfFormat.RDF_XML}) +def _is_safe_prefix_iri(iri: str) -> bool: + """Check whether a namespace IRI is safe for prefix serialization. + + pyoxigraph rejects IRIs with invalid code-points (e.g. double ``#``), + and rdflib's Turtle parser cannot round-trip CURIEs whose namespace + contains query parameters or fragments in unexpected positions. This + function returns ``False`` for such IRIs so they can be skipped during + prefix collection. + """ + # A namespace IRI should end with '/' or '#'. If '#' appears + # *before* the final character, the IRI contains an embedded + # fragment which produces unusable CURIEs. + if "#" in iri[:-1]: + return False + # Query parameters in namespace IRIs produce CURIEs that rdflib + # cannot parse back. + if "?" in iri: + return False + return True + + def canonicalize_rdf_graph( graph: rdflib.Graph, output_format: str = "turtle", @@ -124,11 +145,31 @@ def canonicalize_rdf_graph( # with the @base directive. if base_iri and ns_str == base_iri: continue + # Skip namespace IRIs that pyoxigraph rejects or that produce + # CURIEs rdflib cannot round-trip. Valid namespace IRIs for + # prefix use should end with '/' or '#' and contain no query + # parameters or fragment-like characters in the middle. + if not _is_safe_prefix_iri(ns_str): + continue prefixes[str(prefix)] = ns_str - result_bytes = ox.serialize( - sorted_triples, - format=ox_format, - prefixes=prefixes, - base_iri=base_iri, - ) + try: + result_bytes = ox.serialize( + sorted_triples, + format=ox_format, + prefixes=prefixes, + base_iri=base_iri, + ) + except ValueError: + # pyoxigraph rejects prefixes with invalid IRIs (e.g. containing + # fragment-like characters such as double '#'). Retry without + # the offending prefixes by falling back to no prefixes, which + # still produces valid (if verbose) Turtle. + logger.warning( + "pyoxigraph rejected one or more prefix IRIs; " + "serializing without prefix declarations" + ) + result_bytes = ox.serialize( + sorted_triples, + format=ox_format, + ) return result_bytes.decode("utf-8") diff --git a/tests/linkml/test_generators/rdf/test_rdfgen_binary.py b/tests/linkml/test_generators/rdf/test_rdfgen_binary.py index 7a3d57a51e..47f412a30b 100644 --- a/tests/linkml/test_generators/rdf/test_rdfgen_binary.py +++ b/tests/linkml/test_generators/rdf/test_rdfgen_binary.py @@ -1,7 +1,13 @@ -from pathlib import Path -from typing import Any +"""Tests for RDFGenerator.serialize() with and without output paths. + +After the switch to canonicalize_rdf_graph (pyoxigraph RDFC-1.0), +the old binary fallback (UnicodeDecodeError → destination) no longer +exists. These tests verify the current behaviour: serialize always +returns a str, and when an output path is given the same text is +written to the file. +""" -from rdflib import Graph +from pathlib import Path from linkml.generators.rdfgen import RDFGenerator @@ -21,78 +27,29 @@ def _write_min_schema(p: Path) -> Path: return p -def test_with_output_binary_path_on_decode_error(monkeypatch, tmp_path): - """On UnicodeDecodeError: write via destination, keep stdout empty.""" - calls: dict[str, Any] = {"destination_called": False, "format": None} - - def fake_serialize(self, *args, **kwargs): - if "destination" not in kwargs: - raise UnicodeDecodeError("utf-8", b"\xff", 0, 1, "invalid start byte") - calls["destination_called"] = True - calls["format"] = kwargs.get("format") - dest = kwargs["destination"] - Path(dest).write_bytes(b"\x00\x01\x02BINARY-DATA") - return None - - monkeypatch.setattr(Graph, "serialize", fake_serialize, raising=True) - - schema_path = _write_min_schema(tmp_path / "schema.yaml") - out_path = tmp_path / "out.bin" - +def test_without_output_returns_text(tmp_path): + """Without -o, serialize() returns a non-empty str.""" + schema_path = _write_min_schema(tmp_path / "s.yaml") gen = RDFGenerator(str(schema_path), mergeimports=False) - gen.format = "ttl" # maps to 'turtle' - - ret = gen.serialize(output=str(out_path)) - - assert ret.strip() == "" - assert calls["destination_called"] is True - assert calls["format"] == "turtle" - assert out_path.exists() and out_path.stat().st_size > 0 - - data = out_path.read_bytes() - assert data.startswith(b"\x00\x01\x02BINARY-DATA") - - -def test_with_output_text_path_returns_text_and_writes_file(monkeypatch, tmp_path): - """If serialization returns text, write UTF-8 file and return the same text.""" - calls: dict[str, Any] = {"destination_called": False, "format": None} + gen.format = "turtle" - def fake_serialize(self, *args, **kwargs): - if "destination" in kwargs: - calls["destination_called"] = True - calls["format"] = kwargs.get("format") - return None - fmt = kwargs.get("format") - return f"# fake {fmt} content" + ret = gen.serialize() + assert isinstance(ret, str) + assert len(ret.strip()) > 0 - monkeypatch.setattr(Graph, "serialize", fake_serialize, raising=True) +def test_with_output_writes_file_and_returns_text(tmp_path): + """With -o, serialize() writes UTF-8 file and returns the same text.""" schema_path = _write_min_schema(tmp_path / "schema.yaml") out_path = tmp_path / "out.ttl" gen = RDFGenerator(str(schema_path), mergeimports=False) - gen.format = "ttl" # => 'turtle' + gen.format = "ttl" ret = gen.serialize(output=str(out_path)) - assert isinstance(ret, str) and ret.startswith("# fake turtle") - assert calls["destination_called"] is False + assert isinstance(ret, str) + assert len(ret.strip()) > 0 + assert out_path.exists() txt = out_path.read_text(encoding="utf-8") - assert txt.rstrip("\n") == ret.rstrip("\n") - - -def test_without_output_returns_text(monkeypatch, tmp_path): - """Without -o, return text.""" - - def fake_serialize(self, *args, **kwargs): - assert "destination" not in kwargs - return "# fake turtle content" - - monkeypatch.setattr(Graph, "serialize", fake_serialize, raising=True) - - schema_path = _write_min_schema(tmp_path / "s.yaml") - gen = RDFGenerator(str(schema_path), mergeimports=False) - gen.format = "turtle" - - ret = gen.serialize() - assert isinstance(ret, str) and ret.startswith("# fake turtle") + assert txt == ret diff --git a/tests/linkml/test_issues/__snapshots__/linkml_issue_384.other.txt b/tests/linkml/test_issues/__snapshots__/linkml_issue_384.other.txt index 49cacbfc1f..6a75839e04 100644 --- a/tests/linkml/test_issues/__snapshots__/linkml_issue_384.other.txt +++ b/tests/linkml/test_issues/__snapshots__/linkml_issue_384.other.txt @@ -605,7 +605,7 @@ generation_date: '2000-01-01T00:00:00' . . "https://w3id.org/linkml/examples/personinfo/GeoAge"^^ . - _:cb87aa37e3ad3dfbc100de41e622ae57a55c01252d953cd7f596f3f617c04a97ed . + _:c14n2 . . . . @@ -615,7 +615,7 @@ generation_date: '2000-01-01T00:00:00' . "https://w3id.org/linkml/examples/personinfo/GeoObject"^^ . . - _:cb6f98b99448d1238734b7af2cc92e23f15ba5c211db30144440eb739460c2cd83 . + _:c14n0 . . . . @@ -625,7 +625,7 @@ generation_date: '2000-01-01T00:00:00' "https://w3id.org/linkml/examples/personinfo"^^ . "https://w3id.org/linkml/examples/personinfo/Organization"^^ . . - _:cbb30ac86ca541a453eee19cdf1b7946df090933188ade517a9677564c8beac5bd . + _:c14n6 . . . . @@ -666,7 +666,7 @@ generation_date: '2000-01-01T00:00:00' . "https://w3id.org/linkml/examples/personinfo/Person"^^ . . - _:cba5af5c45ef581571107e9f40ba038543b617d27dfa8e41f71bfa140c3e5b8c09 . + _:c14n1 . . . . @@ -692,7 +692,7 @@ generation_date: '2000-01-01T00:00:00' "ex:Thing"^^ . "https://w3id.org/linkml/examples/personinfo"^^ . "https://w3id.org/linkml/examples/personinfo/Thing"^^ . - _:cb0 . + _:c14n4 . . . . @@ -919,9 +919,9 @@ generation_date: '2000-01-01T00:00:00' "https://w3id.org/linkml/examples/personinfo/phone"^^ . "https://creativecommons.org/publicdomain/zero/1.0/" . . - _:cb12ea35aa63cc721cd40fd34b4d5d9273803f97d45aa2daf1ba2eaa4b56057c201 . - _:cb143d2c4dab8e5bf48fda351a4d8564e15c151870b769a85f734179402d94c77f6 . - _:cb1a41ad3544cb8764d54c45fc982a5303f0bd602841d715a83a410416873890504 . + _:c14n3 . + _:c14n5 . + _:c14n7 . . . . @@ -1023,12 +1023,12 @@ generation_date: '2000-01-01T00:00:00' "http://www.w3.org/2001/XMLSchema#anyURI"^^ . . . -_:cb12ea35aa63cc721cd40fd34b4d5d9273803f97d45aa2daf1ba2eaa4b56057c201 "https://w3id.org/linkml/"^^ . -_:cb12ea35aa63cc721cd40fd34b4d5d9273803f97d45aa2daf1ba2eaa4b56057c201 "linkml" . -_:cb143d2c4dab8e5bf48fda351a4d8564e15c151870b769a85f734179402d94c77f6 "http://schema.org/"^^ . -_:cb143d2c4dab8e5bf48fda351a4d8564e15c151870b769a85f734179402d94c77f6 "sdo" . -_:cb1a41ad3544cb8764d54c45fc982a5303f0bd602841d715a83a410416873890504 "https://w3id.org/linkml/examples/personinfo/"^^ . -_:cb1a41ad3544cb8764d54c45fc982a5303f0bd602841d715a83a410416873890504 "ex" . +_:c14n3 "https://w3id.org/linkml/examples/personinfo/"^^ . +_:c14n3 "ex" . +_:c14n5 "http://schema.org/"^^ . +_:c14n5 "sdo" . +_:c14n7 "https://w3id.org/linkml/"^^ . +_:c14n7 "linkml" . # --- linkml_issue_384.py --- # Auto generated from linkml_issue_384.yaml by pythongen.py version: 0.0.1 diff --git a/tests/linkml/test_issues/__snapshots__/linkml_issue_384.owl.txt b/tests/linkml/test_issues/__snapshots__/linkml_issue_384.owl.txt index c1772f2805..0dfbd8ea82 100644 --- a/tests/linkml/test_issues/__snapshots__/linkml_issue_384.owl.txt +++ b/tests/linkml/test_issues/__snapshots__/linkml_issue_384.owl.txt @@ -1,55 +1,55 @@ # --- linkml_issue_384-False-False.owl --- . "GeoAge" . - _:cb1a82d378de24ceee4d880fb97fa60edc01c846e778d2e8849eb068d68cf447011 . - _:cb1e3ca21d885a6aa9198c7e0da80bb8945cc88e60fa7a050b328feea3ef8631fda . - _:cb1f07ec28cdb8029f73418d769b10652eadf89c2930f7e5d948ba3665125ffdd3b . - _:cb23b1f6eed54b7e50afd4742d834d2c517acf217864b10b109eee245892d46cb49 . - _:cb26e0d87f901e35c7138955124b4f6c3cdc7262aa37e10fdd8e0173f74bfdff98d . - _:cb276bc5937f811a0b7bd8e281abb2d609d5cf68f1e658279732cdaa25f56657b12 . + _:c14n10 . + _:c14n13 . + _:c14n18 . + _:c14n21 . + _:c14n7 . + _:c14n9 . . . "GeoObject" . . - _:cb19bb3d4e5067478c4b42732ad7814ace5f746c27d7053c41587dd574fcf2f4606 . - _:cb1d750bf2fa9ce3471746e17effe6f486ba74b3a158ac58c7ec5d5b425f84df5cf . - _:cb223ace63a5d4c89c0969a4be05e62755cf1328abcf1fc6fdf5e09256462f70a2b . - _:cb27cb1dd111a3ff7c3f28148cef9e09a6bb7205916d18918af480e011edec108ba . - _:cb2af9ff61cc76b6f2a2dcf571b7a049921d1546c340489657e3942fb0a715a36fe . - _:cb2b84ec75bbd99b370b2c82e11803b35f16724d0aeebfae11886065df507dfb883 . + _:c14n15 . + _:c14n2 . + _:c14n23 . + _:c14n24 . + _:c14n28 . + _:c14n4 . . . "Organization" . . - _:cb23e14b200c6b61b63e1f4757ffefca473fb646f496f81f18937ae0a96fa6a777e . - _:cb24a85419af6cf0d15e7bae94c471eaf0632069a8e2af85f5833415d38d3e2a7c1 . - _:cb27102cb0c73e192ca1d4283cc7f20a32a15988266a2823e5828e304828d03a5c2 . - _:cb2766e399d6d56b5602562bcdfa8bcade0989eca8ed336f11b67f3c7ad7edf0c62 . - _:cb279b19c4b6a0fd710a23b5ac285573ff9ab68e6e189f3b9f275a6676d23892747 . + _:c14n0 . + _:c14n12 . + _:c14n27 . + _:c14n29 . + _:c14n30 . . . "Person" . . - _:cb2005559eb2b21a3c9355da151f33d642e4af7b0d372076ce53935aec788fa6f67 . - _:cb23bf24435ce7b5f75f5a486947997ffb3fafc286b8c79354e772e0b9db2191f30 . - _:cb27e50cad98ec340969db736cbaf4bc5f6bbe583f92bac10dd75e1deb7cf54953f . - _:cb2901721e4c091193c4a20c1bd5ca511041ba22625906480032d1dded426e4c3a8 . - _:cb2a3227bee2c818a432f61adb96aa923b9f4dccb58093bf6fc5490945bc59586f7 . - _:cb2c3053af06dbc90a2856ed009dcc90fba35d63942c364ccd21e52d8bfb97df1ec . - _:cb2ca4bb2d727b7c18cdb247369b15b04322625d0ebc5721da8bb8c6e344edaf78b . - _:cb2cbb40c2f63ead4e90a67a6ffe2ffac89cba69dbdaad6486c6b163baa50037371 . - _:cb2e15362173eed22c873b7b773750951b40ad1476cd33cc17ef9665896988c321b . - _:cb314417b22ec189a2eaf05c5bff52d506a25055a8a063d0e4dea9b52822b25605f . + _:c14n11 . + _:c14n17 . + _:c14n22 . + _:c14n25 . + _:c14n26 . + _:c14n3 . + _:c14n31 . + _:c14n32 . + _:c14n6 . + _:c14n8 . . . . "Thing" . - _:cb1a0196aabf80e00feb710af29d4b2330953104cd23be4266ef6f0890e1e938a89 . - _:cb1d30783b7a5397864f25ebd7654d631bf6d445fef6ee4733de82582f9b12cb8cd . - _:cb1dbb654f69b67bcab7757946c5b0cce8f0314c46a5655eed834e8e5e447b23a52 . - _:cb1f249ba3e141c783753ca7f6bc46c4ce0ddb60ded99080b4958fc47b2e65e1e5a . - _:cb22537d349c147ef9d8f188db844904b96f7ea210acc0858184a31419e78f74c9e . - _:cb22de6a488b77633e4141164ae4ac6e8668dba8585b379d3b296f4a4890f7cce23 . + _:c14n1 . + _:c14n14 . + _:c14n16 . + _:c14n19 . + _:c14n20 . + _:c14n5 . . . . @@ -76,163 +76,163 @@ . . "personinfo" . -_:cb19bb3d4e5067478c4b42732ad7814ace5f746c27d7053c41587dd574fcf2f4606 . -_:cb19bb3d4e5067478c4b42732ad7814ace5f746c27d7053c41587dd574fcf2f4606 "0"^^ . -_:cb19bb3d4e5067478c4b42732ad7814ace5f746c27d7053c41587dd574fcf2f4606 . -_:cb1a0196aabf80e00feb710af29d4b2330953104cd23be4266ef6f0890e1e938a89 . -_:cb1a0196aabf80e00feb710af29d4b2330953104cd23be4266ef6f0890e1e938a89 "0"^^ . -_:cb1a0196aabf80e00feb710af29d4b2330953104cd23be4266ef6f0890e1e938a89 . -_:cb1a82d378de24ceee4d880fb97fa60edc01c846e778d2e8849eb068d68cf447011 . -_:cb1a82d378de24ceee4d880fb97fa60edc01c846e778d2e8849eb068d68cf447011 "0"^^ . -_:cb1a82d378de24ceee4d880fb97fa60edc01c846e778d2e8849eb068d68cf447011 . -_:cb1d30783b7a5397864f25ebd7654d631bf6d445fef6ee4733de82582f9b12cb8cd . -_:cb1d30783b7a5397864f25ebd7654d631bf6d445fef6ee4733de82582f9b12cb8cd . -_:cb1d30783b7a5397864f25ebd7654d631bf6d445fef6ee4733de82582f9b12cb8cd . -_:cb1d750bf2fa9ce3471746e17effe6f486ba74b3a158ac58c7ec5d5b425f84df5cf . -_:cb1d750bf2fa9ce3471746e17effe6f486ba74b3a158ac58c7ec5d5b425f84df5cf "1"^^ . -_:cb1d750bf2fa9ce3471746e17effe6f486ba74b3a158ac58c7ec5d5b425f84df5cf . -_:cb1dbb654f69b67bcab7757946c5b0cce8f0314c46a5655eed834e8e5e447b23a52 . -_:cb1dbb654f69b67bcab7757946c5b0cce8f0314c46a5655eed834e8e5e447b23a52 "1"^^ . -_:cb1dbb654f69b67bcab7757946c5b0cce8f0314c46a5655eed834e8e5e447b23a52 . -_:cb1e3ca21d885a6aa9198c7e0da80bb8945cc88e60fa7a050b328feea3ef8631fda . -_:cb1e3ca21d885a6aa9198c7e0da80bb8945cc88e60fa7a050b328feea3ef8631fda "1"^^ . -_:cb1e3ca21d885a6aa9198c7e0da80bb8945cc88e60fa7a050b328feea3ef8631fda . -_:cb1f07ec28cdb8029f73418d769b10652eadf89c2930f7e5d948ba3665125ffdd3b . -_:cb1f07ec28cdb8029f73418d769b10652eadf89c2930f7e5d948ba3665125ffdd3b . -_:cb1f07ec28cdb8029f73418d769b10652eadf89c2930f7e5d948ba3665125ffdd3b . -_:cb1f249ba3e141c783753ca7f6bc46c4ce0ddb60ded99080b4958fc47b2e65e1e5a . -_:cb1f249ba3e141c783753ca7f6bc46c4ce0ddb60ded99080b4958fc47b2e65e1e5a "0"^^ . -_:cb1f249ba3e141c783753ca7f6bc46c4ce0ddb60ded99080b4958fc47b2e65e1e5a . -_:cb2005559eb2b21a3c9355da151f33d642e4af7b0d372076ce53935aec788fa6f67 . -_:cb2005559eb2b21a3c9355da151f33d642e4af7b0d372076ce53935aec788fa6f67 "0"^^ . -_:cb2005559eb2b21a3c9355da151f33d642e4af7b0d372076ce53935aec788fa6f67 . -_:cb223ace63a5d4c89c0969a4be05e62755cf1328abcf1fc6fdf5e09256462f70a2b . -_:cb223ace63a5d4c89c0969a4be05e62755cf1328abcf1fc6fdf5e09256462f70a2b . -_:cb223ace63a5d4c89c0969a4be05e62755cf1328abcf1fc6fdf5e09256462f70a2b . -_:cb22537d349c147ef9d8f188db844904b96f7ea210acc0858184a31419e78f74c9e . -_:cb22537d349c147ef9d8f188db844904b96f7ea210acc0858184a31419e78f74c9e . -_:cb22537d349c147ef9d8f188db844904b96f7ea210acc0858184a31419e78f74c9e . -_:cb22de6a488b77633e4141164ae4ac6e8668dba8585b379d3b296f4a4890f7cce23 . -_:cb22de6a488b77633e4141164ae4ac6e8668dba8585b379d3b296f4a4890f7cce23 "1"^^ . -_:cb22de6a488b77633e4141164ae4ac6e8668dba8585b379d3b296f4a4890f7cce23 . -_:cb23b1f6eed54b7e50afd4742d834d2c517acf217864b10b109eee245892d46cb49 . -_:cb23b1f6eed54b7e50afd4742d834d2c517acf217864b10b109eee245892d46cb49 "0"^^ . -_:cb23b1f6eed54b7e50afd4742d834d2c517acf217864b10b109eee245892d46cb49 . -_:cb23bf24435ce7b5f75f5a486947997ffb3fafc286b8c79354e772e0b9db2191f30 . -_:cb23bf24435ce7b5f75f5a486947997ffb3fafc286b8c79354e772e0b9db2191f30 "1"^^ . -_:cb23bf24435ce7b5f75f5a486947997ffb3fafc286b8c79354e772e0b9db2191f30 . -_:cb23e14b200c6b61b63e1f4757ffefca473fb646f496f81f18937ae0a96fa6a777e . -_:cb23e14b200c6b61b63e1f4757ffefca473fb646f496f81f18937ae0a96fa6a777e "0"^^ . -_:cb23e14b200c6b61b63e1f4757ffefca473fb646f496f81f18937ae0a96fa6a777e . -_:cb24a85419af6cf0d15e7bae94c471eaf0632069a8e2af85f5833415d38d3e2a7c1 . -_:cb24a85419af6cf0d15e7bae94c471eaf0632069a8e2af85f5833415d38d3e2a7c1 . -_:cb24a85419af6cf0d15e7bae94c471eaf0632069a8e2af85f5833415d38d3e2a7c1 . -_:cb26e0d87f901e35c7138955124b4f6c3cdc7262aa37e10fdd8e0173f74bfdff98d . -_:cb26e0d87f901e35c7138955124b4f6c3cdc7262aa37e10fdd8e0173f74bfdff98d . -_:cb26e0d87f901e35c7138955124b4f6c3cdc7262aa37e10fdd8e0173f74bfdff98d . -_:cb27102cb0c73e192ca1d4283cc7f20a32a15988266a2823e5828e304828d03a5c2 . -_:cb27102cb0c73e192ca1d4283cc7f20a32a15988266a2823e5828e304828d03a5c2 . -_:cb27102cb0c73e192ca1d4283cc7f20a32a15988266a2823e5828e304828d03a5c2 . -_:cb2766e399d6d56b5602562bcdfa8bcade0989eca8ed336f11b67f3c7ad7edf0c62 . -_:cb2766e399d6d56b5602562bcdfa8bcade0989eca8ed336f11b67f3c7ad7edf0c62 "0"^^ . -_:cb2766e399d6d56b5602562bcdfa8bcade0989eca8ed336f11b67f3c7ad7edf0c62 . -_:cb276bc5937f811a0b7bd8e281abb2d609d5cf68f1e658279732cdaa25f56657b12 . -_:cb276bc5937f811a0b7bd8e281abb2d609d5cf68f1e658279732cdaa25f56657b12 "1"^^ . -_:cb276bc5937f811a0b7bd8e281abb2d609d5cf68f1e658279732cdaa25f56657b12 . -_:cb279b19c4b6a0fd710a23b5ac285573ff9ab68e6e189f3b9f275a6676d23892747 . -_:cb279b19c4b6a0fd710a23b5ac285573ff9ab68e6e189f3b9f275a6676d23892747 "1"^^ . -_:cb279b19c4b6a0fd710a23b5ac285573ff9ab68e6e189f3b9f275a6676d23892747 . -_:cb27cb1dd111a3ff7c3f28148cef9e09a6bb7205916d18918af480e011edec108ba . -_:cb27cb1dd111a3ff7c3f28148cef9e09a6bb7205916d18918af480e011edec108ba "0"^^ . -_:cb27cb1dd111a3ff7c3f28148cef9e09a6bb7205916d18918af480e011edec108ba . -_:cb27e50cad98ec340969db736cbaf4bc5f6bbe583f92bac10dd75e1deb7cf54953f . -_:cb27e50cad98ec340969db736cbaf4bc5f6bbe583f92bac10dd75e1deb7cf54953f . -_:cb27e50cad98ec340969db736cbaf4bc5f6bbe583f92bac10dd75e1deb7cf54953f . -_:cb2901721e4c091193c4a20c1bd5ca511041ba22625906480032d1dded426e4c3a8 . -_:cb2901721e4c091193c4a20c1bd5ca511041ba22625906480032d1dded426e4c3a8 "0"^^ . -_:cb2901721e4c091193c4a20c1bd5ca511041ba22625906480032d1dded426e4c3a8 . -_:cb2a3227bee2c818a432f61adb96aa923b9f4dccb58093bf6fc5490945bc59586f7 . -_:cb2a3227bee2c818a432f61adb96aa923b9f4dccb58093bf6fc5490945bc59586f7 "0"^^ . -_:cb2a3227bee2c818a432f61adb96aa923b9f4dccb58093bf6fc5490945bc59586f7 . -_:cb2af9ff61cc76b6f2a2dcf571b7a049921d1546c340489657e3942fb0a715a36fe . -_:cb2af9ff61cc76b6f2a2dcf571b7a049921d1546c340489657e3942fb0a715a36fe . -_:cb2af9ff61cc76b6f2a2dcf571b7a049921d1546c340489657e3942fb0a715a36fe . -_:cb2b84ec75bbd99b370b2c82e11803b35f16724d0aeebfae11886065df507dfb883 . -_:cb2b84ec75bbd99b370b2c82e11803b35f16724d0aeebfae11886065df507dfb883 "1"^^ . -_:cb2b84ec75bbd99b370b2c82e11803b35f16724d0aeebfae11886065df507dfb883 . -_:cb2c3053af06dbc90a2856ed009dcc90fba35d63942c364ccd21e52d8bfb97df1ec . -_:cb2c3053af06dbc90a2856ed009dcc90fba35d63942c364ccd21e52d8bfb97df1ec . -_:cb2c3053af06dbc90a2856ed009dcc90fba35d63942c364ccd21e52d8bfb97df1ec . -_:cb2ca4bb2d727b7c18cdb247369b15b04322625d0ebc5721da8bb8c6e344edaf78b . -_:cb2ca4bb2d727b7c18cdb247369b15b04322625d0ebc5721da8bb8c6e344edaf78b . -_:cb2ca4bb2d727b7c18cdb247369b15b04322625d0ebc5721da8bb8c6e344edaf78b . -_:cb2cbb40c2f63ead4e90a67a6ffe2ffac89cba69dbdaad6486c6b163baa50037371 . -_:cb2cbb40c2f63ead4e90a67a6ffe2ffac89cba69dbdaad6486c6b163baa50037371 "1"^^ . -_:cb2cbb40c2f63ead4e90a67a6ffe2ffac89cba69dbdaad6486c6b163baa50037371 . -_:cb2e15362173eed22c873b7b773750951b40ad1476cd33cc17ef9665896988c321b . -_:cb2e15362173eed22c873b7b773750951b40ad1476cd33cc17ef9665896988c321b "0"^^ . -_:cb2e15362173eed22c873b7b773750951b40ad1476cd33cc17ef9665896988c321b . -_:cb314417b22ec189a2eaf05c5bff52d506a25055a8a063d0e4dea9b52822b25605f . -_:cb314417b22ec189a2eaf05c5bff52d506a25055a8a063d0e4dea9b52822b25605f . -_:cb314417b22ec189a2eaf05c5bff52d506a25055a8a063d0e4dea9b52822b25605f . +_:c14n0 . +_:c14n0 . +_:c14n0 . +_:c14n1 . +_:c14n1 . +_:c14n1 . +_:c14n10 . +_:c14n10 . +_:c14n10 . +_:c14n11 . +_:c14n11 "0"^^ . +_:c14n11 . +_:c14n12 . +_:c14n12 "0"^^ . +_:c14n12 . +_:c14n13 . +_:c14n13 . +_:c14n13 . +_:c14n14 . +_:c14n14 . +_:c14n14 . +_:c14n15 . +_:c14n15 "0"^^ . +_:c14n15 . +_:c14n16 . +_:c14n16 "1"^^ . +_:c14n16 . +_:c14n17 . +_:c14n17 . +_:c14n17 . +_:c14n18 . +_:c14n18 "0"^^ . +_:c14n18 . +_:c14n19 . +_:c14n19 "1"^^ . +_:c14n19 . +_:c14n2 . +_:c14n2 "0"^^ . +_:c14n2 . +_:c14n20 . +_:c14n20 "0"^^ . +_:c14n20 . +_:c14n21 . +_:c14n21 "1"^^ . +_:c14n21 . +_:c14n22 . +_:c14n22 "0"^^ . +_:c14n22 . +_:c14n23 . +_:c14n23 . +_:c14n23 . +_:c14n24 . +_:c14n24 . +_:c14n24 . +_:c14n25 . +_:c14n25 "1"^^ . +_:c14n25 . +_:c14n26 . +_:c14n26 . +_:c14n26 . +_:c14n27 . +_:c14n27 "1"^^ . +_:c14n27 . +_:c14n28 . +_:c14n28 "1"^^ . +_:c14n28 . +_:c14n29 . +_:c14n29 "0"^^ . +_:c14n29 . +_:c14n3 . +_:c14n3 . +_:c14n3 . +_:c14n30 . +_:c14n30 . +_:c14n30 . +_:c14n31 . +_:c14n31 "0"^^ . +_:c14n31 . +_:c14n32 . +_:c14n32 . +_:c14n32 . +_:c14n4 . +_:c14n4 "1"^^ . +_:c14n4 . +_:c14n5 . +_:c14n5 "0"^^ . +_:c14n5 . +_:c14n6 . +_:c14n6 "1"^^ . +_:c14n6 . +_:c14n7 . +_:c14n7 "0"^^ . +_:c14n7 . +_:c14n8 . +_:c14n8 "0"^^ . +_:c14n8 . +_:c14n9 . +_:c14n9 "1"^^ . +_:c14n9 . # --- linkml_issue_384-True-True.owl --- . . "GeoAge" . - _:cb1a82d378de24ceee4d880fb97fa60edc01c846e778d2e8849eb068d68cf447011 . - _:cb1def0490d3233d18bbcdc75ea440636f2bbe33bf0d9fd842e9ac6c85d56782eb1 . - _:cb1e3ca21d885a6aa9198c7e0da80bb8945cc88e60fa7a050b328feea3ef8631fda . - _:cb2251a2cc284802be192c79b4968c5e3e0d0a6310ef22b3c5b7486c1426634bfd4 . - _:cb23b1f6eed54b7e50afd4742d834d2c517acf217864b10b109eee245892d46cb49 . - _:cb276bc5937f811a0b7bd8e281abb2d609d5cf68f1e658279732cdaa25f56657b12 . + _:c14n10 . + _:c14n17 . + _:c14n20 . + _:c14n23 . + _:c14n26 . + _:c14n7 . . . . "GeoObject" . . - _:cb19bb3d4e5067478c4b42732ad7814ace5f746c27d7053c41587dd574fcf2f4606 . - _:cb1d750bf2fa9ce3471746e17effe6f486ba74b3a158ac58c7ec5d5b425f84df5cf . - _:cb223ace63a5d4c89c0969a4be05e62755cf1328abcf1fc6fdf5e09256462f70a2b . - _:cb266ac9ae64a083e9a8801a1402dd3b934dad4729f78a3a400cdb27cd817aefd45 . - _:cb27cb1dd111a3ff7c3f28148cef9e09a6bb7205916d18918af480e011edec108ba . - _:cb2b84ec75bbd99b370b2c82e11803b35f16724d0aeebfae11886065df507dfb883 . + _:c14n1 . + _:c14n13 . + _:c14n2 . + _:c14n24 . + _:c14n28 . + _:c14n3 . . . . "Organization" . . - _:cb2280f6fd5f67e623a7774cdf132efc33d1f1888d2169c7cdabd52865033586c09 . - _:cb23e14b200c6b61b63e1f4757ffefca473fb646f496f81f18937ae0a96fa6a777e . - _:cb24a85419af6cf0d15e7bae94c471eaf0632069a8e2af85f5833415d38d3e2a7c1 . - _:cb2766e399d6d56b5602562bcdfa8bcade0989eca8ed336f11b67f3c7ad7edf0c62 . - _:cb279b19c4b6a0fd710a23b5ac285573ff9ab68e6e189f3b9f275a6676d23892747 . + _:c14n12 . + _:c14n27 . + _:c14n29 . + _:c14n30 . + _:c14n4 . . . . "Person" . . - _:cb2005559eb2b21a3c9355da151f33d642e4af7b0d372076ce53935aec788fa6f67 . - _:cb23bf24435ce7b5f75f5a486947997ffb3fafc286b8c79354e772e0b9db2191f30 . - _:cb2437057497c9a38863099ba911a51798d2811dd43bf7614a1a792873f19481ee8 . - _:cb27a11dfb9f0596012dfa11a2e90982fcd3f563fae377f0b54b2c25a8d5fd2b833 . - _:cb2901721e4c091193c4a20c1bd5ca511041ba22625906480032d1dded426e4c3a8 . - _:cb2a3227bee2c818a432f61adb96aa923b9f4dccb58093bf6fc5490945bc59586f7 . - _:cb2ca4bb2d727b7c18cdb247369b15b04322625d0ebc5721da8bb8c6e344edaf78b . - _:cb2cb4e1fec6eb5699f09380fe4a8fc707d2e8560f57a574cd07f0ad44fd17a26a6 . - _:cb2cbb40c2f63ead4e90a67a6ffe2ffac89cba69dbdaad6486c6b163baa50037371 . - _:cb2e15362173eed22c873b7b773750951b40ad1476cd33cc17ef9665896988c321b . + _:c14n0 . + _:c14n11 . + _:c14n15 . + _:c14n16 . + _:c14n21 . + _:c14n25 . + _:c14n31 . + _:c14n6 . + _:c14n8 . + _:c14n9 . . . . . "Thing" . - _:cb18a14288127d647d54c91079b08a551d276c4665ae2feb1c07c9504c757817f14 . - _:cb1a0196aabf80e00feb710af29d4b2330953104cd23be4266ef6f0890e1e938a89 . - _:cb1dbb654f69b67bcab7757946c5b0cce8f0314c46a5655eed834e8e5e447b23a52 . - _:cb1dc44781343e4bf0de94ad7dcf85f6baa016a27764022969adea0c36c1f4c12e5 . - _:cb1f249ba3e141c783753ca7f6bc46c4ce0ddb60ded99080b4958fc47b2e65e1e5a . - _:cb22de6a488b77633e4141164ae4ac6e8668dba8585b379d3b296f4a4890f7cce23 . + _:c14n14 . + _:c14n18 . + _:c14n19 . + _:c14n22 . + _:c14n32 . + _:c14n5 . . . . @@ -266,103 +266,103 @@ _:cb314417b22ec189a2eaf05c5bff52d506a25055a8a063d0e4dea9b52822b25605f . . "personinfo" . -_:cb18a14288127d647d54c91079b08a551d276c4665ae2feb1c07c9504c757817f14 . -_:cb18a14288127d647d54c91079b08a551d276c4665ae2feb1c07c9504c757817f14 . -_:cb18a14288127d647d54c91079b08a551d276c4665ae2feb1c07c9504c757817f14 . -_:cb19bb3d4e5067478c4b42732ad7814ace5f746c27d7053c41587dd574fcf2f4606 . -_:cb19bb3d4e5067478c4b42732ad7814ace5f746c27d7053c41587dd574fcf2f4606 "0"^^ . -_:cb19bb3d4e5067478c4b42732ad7814ace5f746c27d7053c41587dd574fcf2f4606 . -_:cb1a0196aabf80e00feb710af29d4b2330953104cd23be4266ef6f0890e1e938a89 . -_:cb1a0196aabf80e00feb710af29d4b2330953104cd23be4266ef6f0890e1e938a89 "0"^^ . -_:cb1a0196aabf80e00feb710af29d4b2330953104cd23be4266ef6f0890e1e938a89 . -_:cb1a82d378de24ceee4d880fb97fa60edc01c846e778d2e8849eb068d68cf447011 . -_:cb1a82d378de24ceee4d880fb97fa60edc01c846e778d2e8849eb068d68cf447011 "0"^^ . -_:cb1a82d378de24ceee4d880fb97fa60edc01c846e778d2e8849eb068d68cf447011 . -_:cb1d750bf2fa9ce3471746e17effe6f486ba74b3a158ac58c7ec5d5b425f84df5cf . -_:cb1d750bf2fa9ce3471746e17effe6f486ba74b3a158ac58c7ec5d5b425f84df5cf "1"^^ . -_:cb1d750bf2fa9ce3471746e17effe6f486ba74b3a158ac58c7ec5d5b425f84df5cf . -_:cb1dbb654f69b67bcab7757946c5b0cce8f0314c46a5655eed834e8e5e447b23a52 . -_:cb1dbb654f69b67bcab7757946c5b0cce8f0314c46a5655eed834e8e5e447b23a52 "1"^^ . -_:cb1dbb654f69b67bcab7757946c5b0cce8f0314c46a5655eed834e8e5e447b23a52 . -_:cb1dc44781343e4bf0de94ad7dcf85f6baa016a27764022969adea0c36c1f4c12e5 . -_:cb1dc44781343e4bf0de94ad7dcf85f6baa016a27764022969adea0c36c1f4c12e5 . -_:cb1dc44781343e4bf0de94ad7dcf85f6baa016a27764022969adea0c36c1f4c12e5 . -_:cb1def0490d3233d18bbcdc75ea440636f2bbe33bf0d9fd842e9ac6c85d56782eb1 . -_:cb1def0490d3233d18bbcdc75ea440636f2bbe33bf0d9fd842e9ac6c85d56782eb1 . -_:cb1def0490d3233d18bbcdc75ea440636f2bbe33bf0d9fd842e9ac6c85d56782eb1 . -_:cb1e3ca21d885a6aa9198c7e0da80bb8945cc88e60fa7a050b328feea3ef8631fda . -_:cb1e3ca21d885a6aa9198c7e0da80bb8945cc88e60fa7a050b328feea3ef8631fda "1"^^ . -_:cb1e3ca21d885a6aa9198c7e0da80bb8945cc88e60fa7a050b328feea3ef8631fda . -_:cb1f249ba3e141c783753ca7f6bc46c4ce0ddb60ded99080b4958fc47b2e65e1e5a . -_:cb1f249ba3e141c783753ca7f6bc46c4ce0ddb60ded99080b4958fc47b2e65e1e5a "0"^^ . -_:cb1f249ba3e141c783753ca7f6bc46c4ce0ddb60ded99080b4958fc47b2e65e1e5a . -_:cb2005559eb2b21a3c9355da151f33d642e4af7b0d372076ce53935aec788fa6f67 . -_:cb2005559eb2b21a3c9355da151f33d642e4af7b0d372076ce53935aec788fa6f67 "0"^^ . -_:cb2005559eb2b21a3c9355da151f33d642e4af7b0d372076ce53935aec788fa6f67 . -_:cb223ace63a5d4c89c0969a4be05e62755cf1328abcf1fc6fdf5e09256462f70a2b . -_:cb223ace63a5d4c89c0969a4be05e62755cf1328abcf1fc6fdf5e09256462f70a2b . -_:cb223ace63a5d4c89c0969a4be05e62755cf1328abcf1fc6fdf5e09256462f70a2b . -_:cb2251a2cc284802be192c79b4968c5e3e0d0a6310ef22b3c5b7486c1426634bfd4 . -_:cb2251a2cc284802be192c79b4968c5e3e0d0a6310ef22b3c5b7486c1426634bfd4 . -_:cb2251a2cc284802be192c79b4968c5e3e0d0a6310ef22b3c5b7486c1426634bfd4 . -_:cb2280f6fd5f67e623a7774cdf132efc33d1f1888d2169c7cdabd52865033586c09 . -_:cb2280f6fd5f67e623a7774cdf132efc33d1f1888d2169c7cdabd52865033586c09 . -_:cb2280f6fd5f67e623a7774cdf132efc33d1f1888d2169c7cdabd52865033586c09 . -_:cb22de6a488b77633e4141164ae4ac6e8668dba8585b379d3b296f4a4890f7cce23 . -_:cb22de6a488b77633e4141164ae4ac6e8668dba8585b379d3b296f4a4890f7cce23 "1"^^ . -_:cb22de6a488b77633e4141164ae4ac6e8668dba8585b379d3b296f4a4890f7cce23 . -_:cb23b1f6eed54b7e50afd4742d834d2c517acf217864b10b109eee245892d46cb49 . -_:cb23b1f6eed54b7e50afd4742d834d2c517acf217864b10b109eee245892d46cb49 "0"^^ . -_:cb23b1f6eed54b7e50afd4742d834d2c517acf217864b10b109eee245892d46cb49 . -_:cb23bf24435ce7b5f75f5a486947997ffb3fafc286b8c79354e772e0b9db2191f30 . -_:cb23bf24435ce7b5f75f5a486947997ffb3fafc286b8c79354e772e0b9db2191f30 "1"^^ . -_:cb23bf24435ce7b5f75f5a486947997ffb3fafc286b8c79354e772e0b9db2191f30 . -_:cb23e14b200c6b61b63e1f4757ffefca473fb646f496f81f18937ae0a96fa6a777e . -_:cb23e14b200c6b61b63e1f4757ffefca473fb646f496f81f18937ae0a96fa6a777e "0"^^ . -_:cb23e14b200c6b61b63e1f4757ffefca473fb646f496f81f18937ae0a96fa6a777e . -_:cb2437057497c9a38863099ba911a51798d2811dd43bf7614a1a792873f19481ee8 . -_:cb2437057497c9a38863099ba911a51798d2811dd43bf7614a1a792873f19481ee8 . -_:cb2437057497c9a38863099ba911a51798d2811dd43bf7614a1a792873f19481ee8 . -_:cb24a85419af6cf0d15e7bae94c471eaf0632069a8e2af85f5833415d38d3e2a7c1 . -_:cb24a85419af6cf0d15e7bae94c471eaf0632069a8e2af85f5833415d38d3e2a7c1 . -_:cb24a85419af6cf0d15e7bae94c471eaf0632069a8e2af85f5833415d38d3e2a7c1 . -_:cb266ac9ae64a083e9a8801a1402dd3b934dad4729f78a3a400cdb27cd817aefd45 . -_:cb266ac9ae64a083e9a8801a1402dd3b934dad4729f78a3a400cdb27cd817aefd45 . -_:cb266ac9ae64a083e9a8801a1402dd3b934dad4729f78a3a400cdb27cd817aefd45 . -_:cb2766e399d6d56b5602562bcdfa8bcade0989eca8ed336f11b67f3c7ad7edf0c62 . -_:cb2766e399d6d56b5602562bcdfa8bcade0989eca8ed336f11b67f3c7ad7edf0c62 "0"^^ . -_:cb2766e399d6d56b5602562bcdfa8bcade0989eca8ed336f11b67f3c7ad7edf0c62 . -_:cb276bc5937f811a0b7bd8e281abb2d609d5cf68f1e658279732cdaa25f56657b12 . -_:cb276bc5937f811a0b7bd8e281abb2d609d5cf68f1e658279732cdaa25f56657b12 "1"^^ . -_:cb276bc5937f811a0b7bd8e281abb2d609d5cf68f1e658279732cdaa25f56657b12 . -_:cb279b19c4b6a0fd710a23b5ac285573ff9ab68e6e189f3b9f275a6676d23892747 . -_:cb279b19c4b6a0fd710a23b5ac285573ff9ab68e6e189f3b9f275a6676d23892747 "1"^^ . -_:cb279b19c4b6a0fd710a23b5ac285573ff9ab68e6e189f3b9f275a6676d23892747 . -_:cb27a11dfb9f0596012dfa11a2e90982fcd3f563fae377f0b54b2c25a8d5fd2b833 . -_:cb27a11dfb9f0596012dfa11a2e90982fcd3f563fae377f0b54b2c25a8d5fd2b833 . -_:cb27a11dfb9f0596012dfa11a2e90982fcd3f563fae377f0b54b2c25a8d5fd2b833 . -_:cb27cb1dd111a3ff7c3f28148cef9e09a6bb7205916d18918af480e011edec108ba . -_:cb27cb1dd111a3ff7c3f28148cef9e09a6bb7205916d18918af480e011edec108ba "0"^^ . -_:cb27cb1dd111a3ff7c3f28148cef9e09a6bb7205916d18918af480e011edec108ba . -_:cb2901721e4c091193c4a20c1bd5ca511041ba22625906480032d1dded426e4c3a8 . -_:cb2901721e4c091193c4a20c1bd5ca511041ba22625906480032d1dded426e4c3a8 "0"^^ . -_:cb2901721e4c091193c4a20c1bd5ca511041ba22625906480032d1dded426e4c3a8 . -_:cb2a3227bee2c818a432f61adb96aa923b9f4dccb58093bf6fc5490945bc59586f7 . -_:cb2a3227bee2c818a432f61adb96aa923b9f4dccb58093bf6fc5490945bc59586f7 "0"^^ . -_:cb2a3227bee2c818a432f61adb96aa923b9f4dccb58093bf6fc5490945bc59586f7 . -_:cb2b84ec75bbd99b370b2c82e11803b35f16724d0aeebfae11886065df507dfb883 . -_:cb2b84ec75bbd99b370b2c82e11803b35f16724d0aeebfae11886065df507dfb883 "1"^^ . -_:cb2b84ec75bbd99b370b2c82e11803b35f16724d0aeebfae11886065df507dfb883 . -_:cb2ca4bb2d727b7c18cdb247369b15b04322625d0ebc5721da8bb8c6e344edaf78b . -_:cb2ca4bb2d727b7c18cdb247369b15b04322625d0ebc5721da8bb8c6e344edaf78b . -_:cb2ca4bb2d727b7c18cdb247369b15b04322625d0ebc5721da8bb8c6e344edaf78b . -_:cb2cb4e1fec6eb5699f09380fe4a8fc707d2e8560f57a574cd07f0ad44fd17a26a6 . -_:cb2cb4e1fec6eb5699f09380fe4a8fc707d2e8560f57a574cd07f0ad44fd17a26a6 . -_:cb2cb4e1fec6eb5699f09380fe4a8fc707d2e8560f57a574cd07f0ad44fd17a26a6 . -_:cb2cbb40c2f63ead4e90a67a6ffe2ffac89cba69dbdaad6486c6b163baa50037371 . -_:cb2cbb40c2f63ead4e90a67a6ffe2ffac89cba69dbdaad6486c6b163baa50037371 "1"^^ . -_:cb2cbb40c2f63ead4e90a67a6ffe2ffac89cba69dbdaad6486c6b163baa50037371 . -_:cb2e15362173eed22c873b7b773750951b40ad1476cd33cc17ef9665896988c321b . -_:cb2e15362173eed22c873b7b773750951b40ad1476cd33cc17ef9665896988c321b "0"^^ . -_:cb2e15362173eed22c873b7b773750951b40ad1476cd33cc17ef9665896988c321b . +_:c14n0 . +_:c14n0 . +_:c14n0 . +_:c14n1 . +_:c14n1 . +_:c14n1 . +_:c14n10 . +_:c14n10 "1"^^ . +_:c14n10 . +_:c14n11 . +_:c14n11 "0"^^ . +_:c14n11 . +_:c14n12 . +_:c14n12 "0"^^ . +_:c14n12 . +_:c14n13 . +_:c14n13 "0"^^ . +_:c14n13 . +_:c14n14 . +_:c14n14 "1"^^ . +_:c14n14 . +_:c14n15 . +_:c14n15 . +_:c14n15 . +_:c14n16 . +_:c14n16 . +_:c14n16 . +_:c14n17 . +_:c14n17 "0"^^ . +_:c14n17 . +_:c14n18 . +_:c14n18 "1"^^ . +_:c14n18 . +_:c14n19 . +_:c14n19 "0"^^ . +_:c14n19 . +_:c14n2 . +_:c14n2 "0"^^ . +_:c14n2 . +_:c14n20 . +_:c14n20 "1"^^ . +_:c14n20 . +_:c14n21 . +_:c14n21 "0"^^ . +_:c14n21 . +_:c14n22 . +_:c14n22 . +_:c14n22 . +_:c14n23 . +_:c14n23 . +_:c14n23 . +_:c14n24 . +_:c14n24 . +_:c14n24 . +_:c14n25 . +_:c14n25 "1"^^ . +_:c14n25 . +_:c14n26 . +_:c14n26 . +_:c14n26 . +_:c14n27 . +_:c14n27 "1"^^ . +_:c14n27 . +_:c14n28 . +_:c14n28 "1"^^ . +_:c14n28 . +_:c14n29 . +_:c14n29 "0"^^ . +_:c14n29 . +_:c14n3 . +_:c14n3 "1"^^ . +_:c14n3 . +_:c14n30 . +_:c14n30 . +_:c14n30 . +_:c14n31 . +_:c14n31 "0"^^ . +_:c14n31 . +_:c14n32 . +_:c14n32 . +_:c14n32 . +_:c14n4 . +_:c14n4 . +_:c14n4 . +_:c14n5 . +_:c14n5 "0"^^ . +_:c14n5 . +_:c14n6 . +_:c14n6 "1"^^ . +_:c14n6 . +_:c14n7 . +_:c14n7 "0"^^ . +_:c14n7 . +_:c14n8 . +_:c14n8 "0"^^ . +_:c14n8 . +_:c14n9 . +_:c14n9 . +_:c14n9 . From 0265ad1b2ae2184f9164b498966243db56835126 Mon Sep 17 00:00:00 2001 From: Nico Matentzoglu Date: Wed, 29 Apr 2026 23:27:38 +0300 Subject: [PATCH 12/15] handle _expand_trailing_dot_curies --- .../linkml_runtime/utils/rdf_canonicalize.py | 53 ++++++++++++++++++- .../test_utils/test_rdf_canonicalize.py | 21 ++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py index 20656690ea..ed0ec9556f 100644 --- a/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py +++ b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py @@ -25,10 +25,17 @@ prefix whose namespace equals the base IRI (e.g. rdflib's auto-bound ``base:`` prefix), pyoxigraph emits CURIEs like ``base:label`` that rdflib rejects. We skip such prefixes during serialization. + +5. **Trailing escaped dot in PN_LOCAL**: pyoxigraph emits CURIEs like + ``prefix:local\\.`` for IRIs whose local part ends with ``.``. This + is valid Turtle (PN_LOCAL_ESC), but rdflib's notation3 parser rejects + it because it conflicts with the statement-terminator dot. We + post-process the output to expand such CURIEs to full ```` form. """ import io import logging +import re import pyoxigraph as ox import rdflib @@ -55,6 +62,45 @@ _PREFIX_FORMATS = frozenset({ox.RdfFormat.TURTLE, ox.RdfFormat.TRIG, ox.RdfFormat.N3, ox.RdfFormat.RDF_XML}) +# Characters that may appear escaped in a Turtle PN_LOCAL via PN_LOCAL_ESC. +_PN_LOCAL_ESC_UNESCAPE = re.compile(r"\\([_~.\-!$&'()*+,;=/?#@%])") + + +def _expand_trailing_dot_curies(turtle_text: str, prefixes: dict[str, str]) -> str: + """Replace CURIEs whose local part ends in ``\\.`` with full ```` form. + + rdflib's notation3 parser rejects PN_LOCAL ending in an escaped dot + even though Turtle permits it (PN_LOCAL_ESC). pyoxigraph emits this + form for IRIs ending in ``.`` (e.g. ``biolink:StrandEnum#.``). We + rewrite each such CURIE to its expanded ```` form so the output + round-trips through rdflib. + """ + if not prefixes: + return turtle_text + + # Match: a prefix name, ':', a local part (no whitespace or token + # delimiters), ending in ``\.``, followed by whitespace. Use a + # negative lookbehind to avoid matching inside ``<...>`` or word + # characters that would make this a substring of something else. + pattern = re.compile( + r"(?\"'\[\]]*?\\\.)" + r"(?=\s)" + ) + + def replace(match: re.Match[str]) -> str: + prefix = match.group(1) + local_escaped = match.group(2) + namespace = prefixes.get(prefix) + if namespace is None: + return match.group(0) + local = _PN_LOCAL_ESC_UNESCAPE.sub(r"\1", local_escaped) + return f"<{namespace}{local}>" + + return pattern.sub(replace, turtle_text) + + def _is_safe_prefix_iri(iri: str) -> bool: """Check whether a namespace IRI is safe for prefix serialization. @@ -152,6 +198,7 @@ def canonicalize_rdf_graph( if not _is_safe_prefix_iri(ns_str): continue prefixes[str(prefix)] = ns_str + used_prefixes = prefixes try: result_bytes = ox.serialize( sorted_triples, @@ -172,4 +219,8 @@ def canonicalize_rdf_graph( sorted_triples, format=ox_format, ) - return result_bytes.decode("utf-8") + used_prefixes = None + result = result_bytes.decode("utf-8") + if ox_format in _PREFIX_FORMATS and used_prefixes: + result = _expand_trailing_dot_curies(result, used_prefixes) + return result diff --git a/tests/linkml_runtime/test_utils/test_rdf_canonicalize.py b/tests/linkml_runtime/test_utils/test_rdf_canonicalize.py index dd706c1d82..a0e2ca12a9 100644 --- a/tests/linkml_runtime/test_utils/test_rdf_canonicalize.py +++ b/tests/linkml_runtime/test_utils/test_rdf_canonicalize.py @@ -120,6 +120,27 @@ def test_xsd_string_normalized(): assert str(obj) == "hello" +def test_iri_with_trailing_dot_round_trips(): + """IRIs whose local part ends in '.' are emitted as full form so rdflib can parse them. + + pyoxigraph emits ``prefix:local\\.`` per the Turtle PN_LOCAL_ESC rule, + but rdflib's notation3 parser rejects an escaped dot at the end of a + PN_LOCAL. The serializer rewrites such CURIEs to full IRI form to + preserve round-trip parseability. + """ + g = Graph() + g.bind("ex", "http://example.com/vocab/") + iri = URIRef("http://example.com/vocab/Strand#.") + g.add((iri, RDF.type, URIRef("http://example.com/vocab/Thing"))) + ttl = canonicalize_rdf_graph(g, output_format="turtle") + # CURIE form with trailing escaped dot must not appear; full IRI must + assert "ex:Strand\\#\\." not in ttl + assert "" in ttl + g2 = Graph() + g2.parse(data=ttl, format="turtle") + assert rdflib.compare.isomorphic(g, g2) + + def test_fallback_on_invalid_rdf(): """Graphs with literal predicates fall back to rdflib serializer.""" g = Graph() From a6f98361358fcf4cc1721286dd694a6d2b73df3a Mon Sep 17 00:00:00 2001 From: Carlo van Driesten Date: Mon, 4 May 2026 15:24:10 +0200 Subject: [PATCH 13/15] feat(generators): add --deterministic flag with diff-stable WL hashing Add a --deterministic / --no-deterministic CLI flag (default off) to OWL, SHACL, JSON-LD Context, and JSON-LD generators that produces diff-stable output using Weisfeiler-Lehman structural hashing on top of the RDFC-1.0 canonicalization from upstream (#3407). Three-phase hybrid pipeline (when --deterministic is set): 1. RDFC-1.0 canonicalization (upstream) produces sequential _:c14nN IDs 2. Weisfeiler-Lehman structural hashing replaces sequential IDs with content-based _:b hashes that remain stable when unrelated triples are added/removed 3. rdflib re-serialization recovers idiomatic Turtle (inline blank nodes, collection syntax, filtered prefixes, preserved xsd:string) Without --deterministic, upstream's always-on RDFC-1.0 canonicalization is used directly (via canonicalize_rdf_graph). Additional features gated behind --deterministic: - Expression sorting (any_of/all_of/none_of/exactly_one_of) in owlgen - Collection sorting (sh:in, sh:ignoredProperties) in shaclgen - Permissible value sorting in owlgen and shaclgen - JSON-LD deterministic key ordering (deterministic_json) - JSON-LD context structured ordering (jsonldcontextgen) Rebased on top of upstream linkml/linkml#3407 (pyoxigraph RDFC-1.0). Refs: linkml#1847, linkml#3407 Signed-off-by: Carlo van Driesten --- .../src/linkml/generators/jsonldcontextgen.py | 54 ++ .../linkml/src/linkml/generators/jsonldgen.py | 5 + .../linkml/src/linkml/generators/owlgen.py | 71 ++- .../linkml/src/linkml/generators/rdfgen.py | 2 +- .../linkml/src/linkml/generators/shaclgen.py | 19 +- .../linkml/src/linkml/generators/shexgen.py | 2 +- packages/linkml/src/linkml/utils/generator.py | 288 +++++++++++ .../src/linkml/utils/rdf_canonicalize.py | 223 ++++++++ .../test_deterministic_benchmark.py | 356 +++++++++++++ .../test_deterministic_output.py | 481 ++++++++++++++++++ 10 files changed, 1480 insertions(+), 21 deletions(-) create mode 100644 packages/linkml/src/linkml/utils/rdf_canonicalize.py create mode 100644 tests/linkml/test_generators/test_deterministic_benchmark.py create mode 100644 tests/linkml/test_generators/test_deterministic_output.py diff --git a/packages/linkml/src/linkml/generators/jsonldcontextgen.py b/packages/linkml/src/linkml/generators/jsonldcontextgen.py index 38dd938860..bc52c11008 100644 --- a/packages/linkml/src/linkml/generators/jsonldcontextgen.py +++ b/packages/linkml/src/linkml/generators/jsonldcontextgen.py @@ -309,8 +309,62 @@ def end_schema( with open(frame_path, "w", encoding="UTF-8") as f: json.dump(frame, f, indent=2, ensure_ascii=False) + if self.deterministic: + return self._deterministic_context_json(json.loads(str(as_json(context))), indent=3) + "\n" return str(as_json(context)) + "\n" + @staticmethod + def _deterministic_context_json(data: dict, indent: int = 3) -> str: + """Serialize a JSON-LD context with deterministic key ordering. + + Preserves the conventional JSON-LD context structure: + 1. ``comments`` block first (metadata) + 2. ``@context`` block second, with: + a. ``@``-prefixed directives (``@vocab``, ``@base``) first + b. Prefix declarations (string values) second + c. Class/property term entries (object values) last + 3. Each group sorted alphabetically within itself + + Unlike :func:`deterministic_json`, this understands JSON-LD + conventions so that the output remains human-readable while + still being byte-identical across invocations. + """ + from linkml.utils.generator import deterministic_json + + ordered = {} + + # 1. "comments" first (if present) + if "comments" in data: + ordered["comments"] = data["comments"] + + # 2. "@context" with structured internal ordering + if "@context" in data: + ctx = data["@context"] + ordered_ctx = {} + + # 2a. @-prefixed directives (@vocab, @base, etc.) + for k in sorted(k for k in ctx if k.startswith("@")): + ordered_ctx[k] = ctx[k] + + # 2b. Prefix declarations (string values — short namespace URIs) + for k in sorted(k for k in ctx if not k.startswith("@") and isinstance(ctx[k], str)): + ordered_ctx[k] = ctx[k] + + # 2c. Term definitions (object values) — deep-sorted for determinism + term_entries = {k: v for k, v in ctx.items() if not k.startswith("@") and not isinstance(v, str)} + sorted_terms = json.loads(deterministic_json(term_entries)) + for k in sorted(sorted_terms): + ordered_ctx[k] = sorted_terms[k] + + ordered["@context"] = ordered_ctx + + # 3. Any remaining top-level keys + for k in sorted(data): + if k not in ordered: + ordered[k] = data[k] + + return json.dumps(ordered, indent=indent, ensure_ascii=False) + def visit_class(self, cls: ClassDefinition) -> bool: if self.exclude_imports and cls.name not in self._local_classes: return False diff --git a/packages/linkml/src/linkml/generators/jsonldgen.py b/packages/linkml/src/linkml/generators/jsonldgen.py index ee2fd0cf4e..c94c74d9dd 100644 --- a/packages/linkml/src/linkml/generators/jsonldgen.py +++ b/packages/linkml/src/linkml/generators/jsonldgen.py @@ -1,5 +1,6 @@ """Generate JSONld from a LinkML schema.""" +import json import os from collections.abc import Sequence from copy import deepcopy @@ -205,6 +206,10 @@ def end_schema(self, context: str | Sequence[str] | None = None, context_kwargs: self.schema["@context"].append({"@base": base_prefix}) # json_obj["@id"] = self.schema.id out = str(as_json(self.schema, indent=" ")) + "\n" + if self.deterministic: + from linkml.utils.generator import deterministic_json + + out = deterministic_json(json.loads(out), indent=2) + "\n" self.schema = self.original_schema return out diff --git a/packages/linkml/src/linkml/generators/owlgen.py b/packages/linkml/src/linkml/generators/owlgen.py index 51c2c941a4..bf32222370 100644 --- a/packages/linkml/src/linkml/generators/owlgen.py +++ b/packages/linkml/src/linkml/generators/owlgen.py @@ -43,7 +43,8 @@ ) from linkml_runtime.utils.formatutils import camelcase, underscore from linkml_runtime.utils.introspection import package_schemaview -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph +from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph +from linkml_runtime.utils.yamlutils import YAMLRoot logger = logging.getLogger(__name__) @@ -56,6 +57,21 @@ SWRLB = rdflib.Namespace("http://www.w3.org/2003/11/swrlb#") +def _expression_sort_key(expr: YAMLRoot) -> str: + """Return a stable sort key for LinkML anonymous expressions. + + Used by ``--deterministic`` to order ``any_of``, ``all_of``, + ``none_of``, and ``exactly_one_of`` members reproducibly. + + This relies on ``YAMLRoot.__repr__()`` which formats objects using + their **field values** (not memory addresses). All anonymous + expression dataclasses in ``linkml_runtime.linkml_model.meta`` + use ``@dataclass(repr=False)`` and inherit this field-based repr, + so the output is deterministic across runs. + """ + return repr(expr) + + @unique class MetadataProfile(Enum): """ @@ -376,6 +392,10 @@ def serialize(self, **kwargs: Any) -> str: """ self.as_graph() fmt = "turtle" if self.format in ["owl", "ttl"] else self.format + if self.deterministic and fmt == "turtle": + from linkml.utils.generator import deterministic_turtle + + return deterministic_turtle(self.graph) return canonicalize_rdf_graph(self.graph, output_format=fmt) def add_metadata(self, e: Definition | PermissibleValue, uri: URIRef) -> None: @@ -673,12 +693,18 @@ def transform_class_expression( own_slots = self.get_own_slots(cls) owl_exprs: list[OWL_EXPRESSION] = [] if cls.any_of: - any_of_expr = self._union_of([self.transform_class_expression(x) for x in cls.any_of]) + members = list(cls.any_of) + if self.deterministic: + members = sorted(members, key=_expression_sort_key) + any_of_expr = self._union_of([self.transform_class_expression(x) for x in members]) if any_of_expr: owl_exprs.append(any_of_expr) if cls.exactly_one_of: + members = list(cls.exactly_one_of) + if self.deterministic: + members = sorted(members, key=_expression_sort_key) sub_exprs: list[OWL_EXPRESSION] = self._present( - self.transform_class_expression(x) for x in cls.exactly_one_of + self.transform_class_expression(x) for x in members ) if isinstance(cls, ClassDefinition): cls_uri = self._class_uri(cls.name) @@ -687,11 +713,11 @@ def transform_class_expression( graph.add((cls_uri, OWL.disjointUnionOf, listnode)) else: sub_sub_exprs: list[OWL_EXPRESSION] = [] - for i, x in enumerate(cls.exactly_one_of): + for i, x in enumerate(members): operand_expr = self.transform_class_expression(x) if not operand_expr: continue - rest = cls.exactly_one_of[0:i] + cls.exactly_one_of[i + 1 :] + rest = members[0:i] + members[i + 1 :] neg_expr = self._complement_of_union_of([self.transform_class_expression(nx) for nx in rest]) pos_expr = self._intersection_of([operand_expr, neg_expr]) if pos_expr: @@ -701,11 +727,17 @@ def transform_class_expression( owl_exprs.append(union_expr) # owl_exprs.extend(sub_exprs) if cls.all_of: - all_of_expr = self._intersection_of([self.transform_class_expression(x) for x in cls.all_of]) + members = list(cls.all_of) + if self.deterministic: + members = sorted(members, key=_expression_sort_key) + all_of_expr = self._intersection_of([self.transform_class_expression(x) for x in members]) if all_of_expr: owl_exprs.append(all_of_expr) if cls.none_of: - none_of_expr = self._complement_of_union_of([self.transform_class_expression(x) for x in cls.none_of]) + members = list(cls.none_of) + if self.deterministic: + members = sorted(members, key=_expression_sort_key) + none_of_expr = self._complement_of_union_of([self.transform_class_expression(x) for x in members]) if none_of_expr: owl_exprs.append(none_of_expr) for slot in own_slots: @@ -878,19 +910,29 @@ def _get_slot_nodes( ) return rdflib_nodes or None - if any_of_rdflib_nodes := _get_slot_nodes(slot.any_of): + def _maybe_sort_slots( + slot_definitions: Sequence[SlotDefinition | AnonymousSlotExpression] | None, + ) -> Sequence[SlotDefinition | AnonymousSlotExpression] | None: + if slot_definitions and self.deterministic: + return sorted(slot_definitions, key=_expression_sort_key) + return slot_definitions + + if any_of_rdflib_nodes := _get_slot_nodes(_maybe_sort_slots(slot.any_of)): owl_exprs.append(self._union_of(any_of_rdflib_nodes)) - if all_of_rdflib_nodes := _get_slot_nodes(slot.all_of): + if all_of_rdflib_nodes := _get_slot_nodes(_maybe_sort_slots(slot.all_of)): owl_exprs.append(self._intersection_of(all_of_rdflib_nodes)) - if none_of_rdflib_nodes := _get_slot_nodes(slot.none_of): + if none_of_rdflib_nodes := _get_slot_nodes(_maybe_sort_slots(slot.none_of)): owl_exprs.append(self._complement_of_union_of(none_of_rdflib_nodes)) if slot.exactly_one_of: + members = list(slot.exactly_one_of) + if self.deterministic: + members = sorted(members, key=_expression_sort_key) disj_exprs: list[OWL_EXPRESSION] = [] - for i, operand in enumerate(slot.exactly_one_of): + for i, operand in enumerate(members): operand_expr = self.transform_class_slot_expression(cls, operand, main_slot, owl_types) if not operand_expr: continue - rest = slot.exactly_one_of[0:i] + slot.exactly_one_of[i + 1 :] + rest = members[0:i] + members[i + 1 :] neg_expr = self._complement_of_union_of( [self.transform_class_slot_expression(cls, x, main_slot, owl_types) for x in rest], owl_types=owl_types, @@ -1164,7 +1206,10 @@ def add_enum(self, e: EnumDefinition) -> None: owl_types: list[URIRef | None] = [] enum_owl_type = self._get_metatype(e, self.default_permissible_value_type) - for pv in e.permissible_values.values(): + pvs = e.permissible_values.values() + if self.deterministic: + pvs = sorted(pvs, key=lambda x: x.text) + for pv in pvs: pv_owl_type = self._get_metatype(pv, enum_owl_type) owl_types.append(pv_owl_type) if pv_owl_type == RDFS.Literal: diff --git a/packages/linkml/src/linkml/generators/rdfgen.py b/packages/linkml/src/linkml/generators/rdfgen.py index a3fcf6a848..ea2a04fd9b 100644 --- a/packages/linkml/src/linkml/generators/rdfgen.py +++ b/packages/linkml/src/linkml/generators/rdfgen.py @@ -20,7 +20,7 @@ from linkml.generators.jsonldgen import JSONLDGenerator from linkml.utils.generator import Generator, shared_arguments from linkml_runtime.linkml_model import SchemaDefinition -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph +from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph @dataclass diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index 97f25b21da..017bac29f1 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -17,7 +17,7 @@ from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName, PresenceEnum from linkml_runtime.utils.formatutils import underscore -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph +from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.yamlutils import TypedNode, extended_float, extended_int, extended_str logger = logging.getLogger(__name__) @@ -184,6 +184,10 @@ def generate_header(self) -> str: def serialize(self, **args) -> str: g = self.as_graph() fmt = "turtle" if self.format in ["owl", "ttl"] else self.format + if self.deterministic and fmt == "turtle": + from linkml.utils.generator import deterministic_turtle + + return deterministic_turtle(g) return canonicalize_rdf_graph(g, output_format=fmt) def as_graph(self) -> Graph: @@ -652,13 +656,13 @@ def _add_enum(self, g: Graph, func: Callable, r: ElementName) -> None: sv = self.schemaview enum = sv.get_enum(r) pv_node = BNode() + pv_items = list(enum.permissible_values.items()) + if self.deterministic: + pv_items = sorted(pv_items, key=lambda x: x[0]) Collection( g, pv_node, - [ - URIRef(sv.expand_curie(pv.meaning)) if pv.meaning else Literal(pv_name) - for pv_name, pv in enum.permissible_values.items() - ], + [URIRef(sv.expand_curie(pv.meaning)) if pv.meaning else Literal(pv_name) for pv_name, pv in pv_items], ) func(SH["in"], pv_node) @@ -817,7 +821,10 @@ def collect_child_properties(class_name: str, output: set) -> None: list_node = BNode() ignored_properties.add(RDF.type) - Collection(g, list_node, list(ignored_properties)) + props = list(ignored_properties) + if self.deterministic: + props = sorted(props, key=str) + Collection(g, list_node, props) return list_node diff --git a/packages/linkml/src/linkml/generators/shexgen.py b/packages/linkml/src/linkml/generators/shexgen.py index 40a93ffbc9..2787af0b93 100644 --- a/packages/linkml/src/linkml/generators/shexgen.py +++ b/packages/linkml/src/linkml/generators/shexgen.py @@ -26,7 +26,7 @@ from linkml_runtime.linkml_model.types import SHEX from linkml_runtime.utils.formatutils import camelcase, sfx from linkml_runtime.utils.metamodelcore import URIorCURIE -from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph +from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph @dataclass diff --git a/packages/linkml/src/linkml/utils/generator.py b/packages/linkml/src/linkml/utils/generator.py index 72b977eaa7..efa83aadbd 100644 --- a/packages/linkml/src/linkml/utils/generator.py +++ b/packages/linkml/src/linkml/utils/generator.py @@ -38,6 +38,10 @@ from linkml.utils.schemaloader import SchemaLoader from linkml.utils.typereferences import References from linkml_runtime import SchemaView + +if TYPE_CHECKING: + from rdflib import Graph as RdfGraph + from linkml_runtime.linkml_model.meta import ( ClassDefinition, ClassDefinitionName, @@ -228,6 +232,277 @@ def normalize_graph_prefixes(graph: "Graph", schema_prefixes: dict[str, str]) -> if std_pfx in current_bindings and current_bindings[std_pfx] != ns_str: continue graph.bind(std_pfx, Namespace(ns_str), override=True, replace=True) +def _wl_signatures( + quads: list, + iterations: int = 4, +) -> dict[str, str]: + """Compute Weisfeiler-Lehman structural signatures for blank nodes. + + Uses 1-dimensional WL colour refinement [1]_ to assign each blank + node a deterministic signature derived from its multi-hop + neighbourhood structure. The signature depends only on predicate + IRIs, literal values, and named-node IRIs — **not** on blank-node + identifiers — so it remains stable when unrelated triples are added + or removed. + + Parameters + ---------- + quads : list + Canonical quads from pyoxigraph (after RDFC-1.0). + iterations : int + Number of WL refinement rounds (default 4). + + Returns + ------- + dict[str, str] + Mapping from canonical blank-node ID (e.g. ``c14n42``) to a + truncated SHA-256 hash suitable for use as a stable blank-node + label. + + References + ---------- + .. [1] Weisfeiler, B. & Leman, A. (1968). "The reduction of a graph + to canonical form and the algebra which appears therein." + """ + import hashlib + + import pyoxigraph # guaranteed available — caller (deterministic_turtle) checks + + # Collect all blank node IDs and build adjacency index. + bnode_ids: set[str] = set() + # outgoing[b] = list of (predicate_str, object_str_or_bnode_id, is_bnode) + outgoing: dict[str, list[tuple[str, str, bool]]] = {} + # incoming[b] = list of (subject_str_or_bnode_id, predicate_str, is_bnode) + incoming: dict[str, list[tuple[str, str, bool]]] = {} + + for q in quads: + s, p, o = q.subject, q.predicate, q.object + s_is_bn = isinstance(s, pyoxigraph.BlankNode) + o_is_bn = isinstance(o, pyoxigraph.BlankNode) + p_str = str(p) + + if s_is_bn: + bnode_ids.add(s.value) + outgoing.setdefault(s.value, []).append((p_str, o.value if o_is_bn else str(o), o_is_bn)) + if o_is_bn: + bnode_ids.add(o.value) + incoming.setdefault(o.value, []).append((s.value if s_is_bn else str(s), p_str, s_is_bn)) + + # Initialise signatures: named-node edges only (no bnode IDs). + sig: dict[str, str] = {} + for bid in bnode_ids: + parts = [] + for p_str, o_str, o_is_bn in outgoing.get(bid, []): + if not o_is_bn: + parts.append(f"+{p_str}={o_str}") + for s_str, p_str, s_is_bn in incoming.get(bid, []): + if not s_is_bn: + parts.append(f"-{s_str}={p_str}") + sig[bid] = "|".join(sorted(parts)) + + # Iterative refinement: incorporate neighbour signatures. + for _ in range(iterations): + new_sig: dict[str, str] = {} + for bid in bnode_ids: + parts = [sig[bid]] + for p_str, o_str, o_is_bn in outgoing.get(bid, []): + if o_is_bn: + parts.append(f"+{p_str}={sig.get(o_str, '')}") + for s_str, p_str, s_is_bn in incoming.get(bid, []): + if s_is_bn: + parts.append(f"-{sig.get(s_str, '')}={p_str}") + new_sig[bid] = "|".join(sorted(parts)) + sig = new_sig + + # Convert signatures to truncated SHA-256 hashes. + # Use 12 hex chars (48 bits) — birthday-bound collision probability + # is ~n²/2^49: ~0.002% at 100k nodes. Collisions are handled by + # appending a counter (see below), so correctness is preserved. + hash_map: dict[str, str] = {} + seen_hashes: dict[str, int] = {} + for bid in sorted(bnode_ids): + digest = hashlib.sha256(sig[bid].encode("utf-8")).hexdigest()[:12] + # Handle collisions by appending a counter. + count = seen_hashes.get(digest, 0) + seen_hashes[digest] = count + 1 + label = f"b{digest}" if count == 0 else f"b{digest}_{count}" + hash_map[bid] = label + + return hash_map + + +def deterministic_turtle(graph: "RdfGraph") -> str: + """Serialize an RDF graph to Turtle with deterministic output ordering. + + Uses a three-phase hybrid pipeline for **correctness**, **diff + stability**, and **readability**: + + 1. **RDFC-1.0** [1]_ (via ``pyoxigraph``) canonicalizes the graph, + ensuring isomorphic inputs produce identical triple sets. + 2. **Weisfeiler-Lehman structural hashing** replaces the sequential + ``_:c14nN`` identifiers with content-based hashes derived from + each blank node's multi-hop neighbourhood. These hashes depend + only on predicate IRIs, literal values, and named-node IRIs — + not on blank-node numbering — so adding or removing a triple + only affects the identifiers of directly involved blank nodes. + 3. **Hybrid rdflib re-serialization** parses the canonicalized, + WL-hashed triples back into an rdflib ``Graph`` and serializes + with rdflib's native Turtle writer. This recovers idiomatic + Turtle features that pyoxigraph cannot emit: + + - **Inline blank nodes** (``[ … ]``) for singly-referenced + blank nodes (Turtle §2.7 [2]_), instead of verbose named + ``_:bHASH`` syntax. + - **Collection syntax** (``( … )``) for ``rdf:List`` chains + (Turtle §2.8 [2]_). + - **Prefix filtering**: only prefixes actually used in the + graph's IRIs are declared, following the practice of Apache + Jena, Eclipse RDF4J, and Raptor. + + All triples from the source graph are preserved — the hybrid step + only changes syntactic form, never semantic content. + + Parameters + ---------- + graph : rdflib.Graph + An rdflib Graph to serialize. + + Returns + ------- + str + Deterministic Turtle string with ``@prefix`` declarations. + + References + ---------- + .. [1] W3C (2024). "RDF Dataset Canonicalization (RDFC-1.0)." + W3C Recommendation. https://www.w3.org/TR/rdf-canon/ + .. [2] W3C (2014). "RDF 1.1 Turtle — Terse RDF Triple Language." + W3C Recommendation. https://www.w3.org/TR/turtle/ + """ + try: + import pyoxigraph + except ImportError as exc: + raise ImportError( + "pyoxigraph >= 0.4.0 is required for --deterministic output. " + "Install it with: pip install 'pyoxigraph>=0.4.0'" + ) from exc + + from rdflib import BNode, Graph, Literal, URIRef + + # ── Phase 1: RDFC-1.0 canonicalization ────────────────────────── + nt_data = graph.serialize(format="nt") + + dataset = pyoxigraph.Dataset(pyoxigraph.parse(nt_data, format=pyoxigraph.RdfFormat.N_TRIPLES)) + dataset.canonicalize(pyoxigraph.CanonicalizationAlgorithm.RDFC_1_0) + + canonical_quads = list(dataset) + + # ── Phase 2: WL structural hashing for diff-stable blank node IDs + wl_map = _wl_signatures(canonical_quads) + + def _remap(term): + if isinstance(term, pyoxigraph.BlankNode) and term.value in wl_map: + return pyoxigraph.BlankNode(wl_map[term.value]) + return term + + remapped = [pyoxigraph.Triple(_remap(q.subject), q.predicate, _remap(q.object)) for q in canonical_quads] + + # ── Phase 3: Hybrid rdflib re-serialization ───────────────────── + # Convert pyoxigraph terms to rdflib terms and populate a clean + # Graph that only carries explicitly-bound prefixes. + def _to_rdflib(term): + """Convert a pyoxigraph term to the equivalent rdflib term.""" + if isinstance(term, pyoxigraph.NamedNode): + return URIRef(term.value) + if isinstance(term, pyoxigraph.BlankNode): + return BNode(term.value) + if isinstance(term, pyoxigraph.Literal): + if term.language: + return Literal(term.value, lang=term.language) + if term.datatype: + dt_iri = term.datatype.value + # In RDF 1.1, simple literals are syntactic sugar for + # xsd:string (Turtle §2.5.1). Preserve the shorter form + # to match the original owlgen output and avoid spurious + # diffs on every string literal. + if dt_iri == "http://www.w3.org/2001/XMLSchema#string": + return Literal(term.value) + return Literal(term.value, datatype=URIRef(dt_iri)) + return Literal(term.value) + raise TypeError(f"Unexpected pyoxigraph term type: {type(term).__name__}: {term}") + + result_graph = Graph(bind_namespaces="none") + for triple in remapped: + result_graph.add( + ( + _to_rdflib(triple.subject), + _to_rdflib(triple.predicate), + _to_rdflib(triple.object), + ) + ) + + # Bind only prefixes whose namespace IRI is actually referenced + # by at least one subject, predicate, or object in the graph. + # This filters out rdflib's ~27 built-in default bindings + # (brick, csvw, doap, …) that leak through Graph() even when + # the schema never declared them. + used_iris: set[str] = set() + for s, p, o in result_graph: + for term in (s, p, o): + if isinstance(term, URIRef): + used_iris.add(str(term)) + + for pfx, ns in sorted(graph.namespaces()): + pfx_s, ns_s = str(pfx), str(ns) + if pfx_s and any(iri.startswith(ns_s) for iri in used_iris): + result_graph.bind(pfx_s, ns_s) + + return result_graph.serialize(format="turtle") + + +def deterministic_json(obj: object, indent: int = 3, preserve_list_order_keys: frozenset[str] | None = None) -> str: + """Serialize a JSON-compatible object with deterministic ordering. + + Recursively sorts all dict keys *and* list elements to produce + stable output across Python versions and process invocations. + + List elements are sorted by their canonical JSON representation + (``json.dumps(item, sort_keys=True)``), which handles lists of + dicts, strings, and mixed types. + + :param obj: A JSON-serializable object (typically parsed from ``as_json``). + :param indent: Number of spaces for indentation. + :param preserve_list_order_keys: Dict keys whose list values must NOT be + sorted (e.g. ``@context``, ``@list`` in JSON-LD where array order is + semantic). Defaults to ``_JSONLD_ORDERED_KEYS``. + :returns: Deterministic JSON string. + """ + import json + + skip = preserve_list_order_keys if preserve_list_order_keys is not None else _JSONLD_ORDERED_KEYS + + def _deep_sort(value: object, parent_key: str = "") -> object: + if isinstance(value, dict): + return {k: _deep_sort(v, parent_key=k) for k, v in sorted(value.items())} + if isinstance(value, list): + sorted_items = [_deep_sort(item) for item in value] + if parent_key in skip: + return sorted_items + try: + return sorted(sorted_items, key=lambda x: json.dumps(x, sort_keys=True, ensure_ascii=False)) + except TypeError: + return sorted_items + return value + + return json.dumps(_deep_sort(obj), indent=indent, ensure_ascii=False) + + +# JSON-LD keys whose array values carry ordering semantics and must not +# be sorted. @context arrays define an override cascade (JSON-LD 1.1 +# §4.1); @list containers are explicitly ordered; @graph and @set are +# included defensively. +_JSONLD_ORDERED_KEYS: frozenset[str] = frozenset({"@context", "@list", "@graph", "@set", "imports"}) + @dataclass @@ -291,6 +566,9 @@ class Generator(metaclass=abc.ABCMeta): mergeimports: bool | None = True """True means merge non-linkml sources into importing package. False means separate packages""" + deterministic: bool = False + """True means produce stable, reproducible output with sorted keys and canonical blank-node ordering""" + source_file_date: str | None = None """Modification date of input source file""" @@ -1144,6 +1422,16 @@ def decorator(f: Command) -> Command: callback=stacktrace_callback, ) ) + f.params.append( + Option( + ("--deterministic/--no-deterministic",), + default=False, + show_default=True, + help="Generate stable, reproducible output with sorted keys and canonical blank-node ordering. " + "Supported by OWL, SHACL, JSON-LD, and JSON-LD Context generators. " + "Useful when generated artifacts are stored in version control.", + ) + ) f.params.append( Option( ("--normalize-prefixes/--no-normalize-prefixes",), diff --git a/packages/linkml/src/linkml/utils/rdf_canonicalize.py b/packages/linkml/src/linkml/utils/rdf_canonicalize.py new file mode 100644 index 0000000000..da57f23399 --- /dev/null +++ b/packages/linkml/src/linkml/utils/rdf_canonicalize.py @@ -0,0 +1,223 @@ +"""Deterministic RDF serialization via pyoxigraph RDFC-1.0 canonicalization. + +This module provides a function to canonicalize an rdflib Graph using +pyoxigraph's RDFC-1.0 implementation, producing deterministic output +with stable blank node labels and sorted triples. + +**Known limitations:** + +1. **xsd:string normalization**: pyoxigraph follows RDF 1.1, where plain + string literals and ``"text"^^xsd:string`` are identical. The output + will never contain explicit ``^^xsd:string`` annotations. Code that + re-parses the output with rdflib will see ``Literal("x")`` (datatype + ``None``) rather than ``Literal("x", datatype=XSD.string)``. + +2. **Non-standard RDF**: Graphs with literal predicates (e.g. SHACL + annotation mode) are rejected by pyoxigraph. This function falls + back to rdflib's serializer for such graphs. + +3. **Numeric short forms**: pyoxigraph uses Turtle short forms for + ``xsd:integer`` (``42``), ``xsd:boolean`` (``true``), and + ``xsd:decimal`` (``1.23``). rdflib parses these back with the + correct datatype, so this is lossless. + +4. **Base IRI / prefix collision**: When a graph has ``@base`` and a + prefix whose namespace equals the base IRI (e.g. rdflib's auto-bound + ``base:`` prefix), pyoxigraph emits CURIEs like ``base:label`` that + rdflib rejects. We skip such prefixes during serialization. + +5. **Trailing escaped dot in PN_LOCAL**: pyoxigraph emits CURIEs like + ``prefix:local\\.`` for IRIs whose local part ends with ``.``. This + is valid Turtle (PN_LOCAL_ESC), but rdflib's notation3 parser rejects + it because it conflicts with the statement-terminator dot. We + post-process the output to expand such CURIEs to full ```` form. +""" + +import io +import logging +import re + +import pyoxigraph as ox +import rdflib + +logger = logging.getLogger(__name__) + +# Mapping from rdflib/LinkML format strings to pyoxigraph RdfFormat objects. +_FORMAT_MAP: dict[str, ox.RdfFormat] = { + "turtle": ox.RdfFormat.TURTLE, + "ttl": ox.RdfFormat.TURTLE, + "nt": ox.RdfFormat.N_TRIPLES, + "ntriples": ox.RdfFormat.N_TRIPLES, + "n-triples": ox.RdfFormat.N_TRIPLES, + "nt11": ox.RdfFormat.N_TRIPLES, + "nquads": ox.RdfFormat.N_QUADS, + "n-quads": ox.RdfFormat.N_QUADS, + "xml": ox.RdfFormat.RDF_XML, + "rdf/xml": ox.RdfFormat.RDF_XML, + "trig": ox.RdfFormat.TRIG, + "n3": ox.RdfFormat.N3, +} + +# Formats that support prefix declarations. +_PREFIX_FORMATS = frozenset({ox.RdfFormat.TURTLE, ox.RdfFormat.TRIG, ox.RdfFormat.N3, ox.RdfFormat.RDF_XML}) + + +# Characters that may appear escaped in a Turtle PN_LOCAL via PN_LOCAL_ESC. +_PN_LOCAL_ESC_UNESCAPE = re.compile(r"\\([_~.\-!$&'()*+,;=/?#@%])") + + +def _expand_trailing_dot_curies(turtle_text: str, prefixes: dict[str, str]) -> str: + """Replace CURIEs whose local part ends in ``\\.`` with full ```` form. + + rdflib's notation3 parser rejects PN_LOCAL ending in an escaped dot + even though Turtle permits it (PN_LOCAL_ESC). pyoxigraph emits this + form for IRIs ending in ``.`` (e.g. ``biolink:StrandEnum#.``). We + rewrite each such CURIE to its expanded ```` form so the output + round-trips through rdflib. + """ + if not prefixes: + return turtle_text + + # Match: a prefix name, ':', a local part (no whitespace or token + # delimiters), ending in ``\.``, followed by whitespace. Use a + # negative lookbehind to avoid matching inside ``<...>`` or word + # characters that would make this a substring of something else. + pattern = re.compile( + r"(?\"'\[\]]*?\\\.)" + r"(?=\s)" + ) + + def replace(match: re.Match[str]) -> str: + prefix = match.group(1) + local_escaped = match.group(2) + namespace = prefixes.get(prefix) + if namespace is None: + return match.group(0) + local = _PN_LOCAL_ESC_UNESCAPE.sub(r"\1", local_escaped) + return f"<{namespace}{local}>" + + return pattern.sub(replace, turtle_text) + + +def _is_safe_prefix_iri(iri: str) -> bool: + """Check whether a namespace IRI is safe for prefix serialization. + + pyoxigraph rejects IRIs with invalid code-points (e.g. double ``#``), + and rdflib's Turtle parser cannot round-trip CURIEs whose namespace + contains query parameters or fragments in unexpected positions. This + function returns ``False`` for such IRIs so they can be skipped during + prefix collection. + """ + # A namespace IRI should end with '/' or '#'. If '#' appears + # *before* the final character, the IRI contains an embedded + # fragment which produces unusable CURIEs. + if "#" in iri[:-1]: + return False + # Query parameters in namespace IRIs produce CURIEs that rdflib + # cannot parse back. + if "?" in iri: + return False + return True + + +def canonicalize_rdf_graph( + graph: rdflib.Graph, + output_format: str = "turtle", +) -> str: + """Serialize an rdflib Graph deterministically using RDFC-1.0 canonicalization. + + The graph is transferred to pyoxigraph via N-Triples, canonicalized + with RDFC-1.0, sorted, and serialized back to the requested format. + Prefix bindings from the rdflib Graph are preserved in the output + for formats that support them (Turtle, TriG, N3, RDF/XML). + + Falls back to plain rdflib serialization for unsupported formats or + graphs containing non-standard RDF (e.g. literal predicates). + + :param graph: The rdflib Graph to serialize. + :param output_format: Target serialization format (e.g. ``"turtle"``, ``"nt"``). + :return: Deterministic string serialization of the graph. + """ + ox_format = _FORMAT_MAP.get(output_format.lower()) + if ox_format is None: + logger.warning( + "pyoxigraph does not support format %r; falling back to rdflib serializer", + output_format, + ) + return graph.serialize(format=output_format) + + # 1. Transfer rdflib graph to pyoxigraph via N-Triples. + nt_data = graph.serialize(format="nt") + nt_bytes = nt_data.encode("utf-8") if isinstance(nt_data, str) else nt_data + + # 2. Parse into pyoxigraph and build a Dataset for canonicalization. + # Fall back to rdflib if the graph contains non-standard RDF + # (e.g. literal predicates from annotations) that pyoxigraph rejects. + try: + triples = list(ox.parse(io.BytesIO(nt_bytes), format=ox.RdfFormat.N_TRIPLES)) + except SyntaxError: + logger.warning( + "Graph contains non-standard RDF that pyoxigraph cannot parse; falling back to rdflib serializer" + ) + return graph.serialize(format=output_format) + + dataset = ox.Dataset() + for triple in triples: + dataset.add(ox.Quad(triple.subject, triple.predicate, triple.object, ox.DefaultGraph())) + + # 3. Canonicalize blank node labels with RDFC-1.0. + dataset.canonicalize(ox.CanonicalizationAlgorithm.RDFC_1_0) + + # 4. Sort triples for deterministic ordering. + quads = list(dataset) + sorted_triples = sorted( + (ox.Triple(q.subject, q.predicate, q.object) for q in quads), + key=lambda t: (str(t.subject), str(t.predicate), str(t.object)), + ) + + # 5. Collect prefixes for formats that support them. + base_iri = str(graph.base) if graph.base else None + prefixes: dict[str, str] | None = None + if ox_format in _PREFIX_FORMATS: + prefixes = {} + for prefix, namespace in graph.namespace_manager.namespaces(): + if not prefix: # skip empty prefix (base) + continue + ns_str = str(namespace) + # Skip prefixes whose namespace matches the base IRI to avoid + # pyoxigraph emitting CURIEs like `base:label` that conflict + # with the @base directive. + if base_iri and ns_str == base_iri: + continue + # Skip namespace IRIs that pyoxigraph rejects or that produce + # CURIEs rdflib cannot round-trip. Valid namespace IRIs for + # prefix use should end with '/' or '#' and contain no query + # parameters or fragment-like characters in the middle. + if not _is_safe_prefix_iri(ns_str): + continue + prefixes[str(prefix)] = ns_str + used_prefixes = prefixes + try: + result_bytes = ox.serialize( + sorted_triples, + format=ox_format, + prefixes=prefixes, + base_iri=base_iri, + ) + except ValueError: + # pyoxigraph rejects prefixes with invalid IRIs (e.g. containing + # fragment-like characters such as double '#'). Retry without + # the offending prefixes by falling back to no prefixes, which + # still produces valid (if verbose) Turtle. + logger.warning("pyoxigraph rejected one or more prefix IRIs; serializing without prefix declarations") + result_bytes = ox.serialize( + sorted_triples, + format=ox_format, + ) + used_prefixes = None + result = result_bytes.decode("utf-8") + if ox_format in _PREFIX_FORMATS and used_prefixes: + result = _expand_trailing_dot_curies(result, used_prefixes) + return result diff --git a/tests/linkml/test_generators/test_deterministic_benchmark.py b/tests/linkml/test_generators/test_deterministic_benchmark.py new file mode 100644 index 0000000000..b7488a8dda --- /dev/null +++ b/tests/linkml/test_generators/test_deterministic_benchmark.py @@ -0,0 +1,356 @@ +"""Benchmark: deterministic Turtle serializer on real-world ontologies. + +Evaluates the ``--deterministic`` flag against schema.org (~16 000 triples, +~800 classes, ~1 400 properties) and the kitchen_sink LinkML schema to +demonstrate four properties: + +1. **Semantic equivalence** — ``rdflib.compare.isomorphic()`` confirms that + deterministic and non-deterministic outputs encode the same RDF graph. +2. **Byte-level stability** — SHA-256 identity across repeated runs proves + that deterministic output is truly reproducible. +3. **Diff quality** — controlled mutations show that small schema changes + produce small, focused diffs (high signal-to-noise ratio). +4. **Performance** — generation time stays within acceptable bounds even + on large real-world graphs. + +Schema.org tests exercise ``deterministic_turtle()`` directly on a +pre-existing OWL ontology. Kitchen_sink tests exercise the full +``OwlSchemaGenerator`` / ``ShaclGenerator`` pipeline with LinkML schemas. + +References +---------- +- W3C RDFC-1.0: https://www.w3.org/TR/rdf-canon/ +- W3C Turtle 1.1: https://www.w3.org/TR/turtle/ +- schema.org: https://schema.org/docs/developers.html +""" + +import difflib +import hashlib +import time +from pathlib import Path + +import pytest +import yaml +from rdflib import Graph +from rdflib.compare import isomorphic + +from linkml.generators.owlgen import OwlSchemaGenerator +from linkml.generators.shaclgen import ShaclGenerator +from linkml.utils.generator import deterministic_turtle + +_has_pyoxigraph = False +try: + import pyoxigraph + + _has_pyoxigraph = hasattr(pyoxigraph, "Dataset") +except ImportError: + pass + +pytestmark = pytest.mark.skipif( + not _has_pyoxigraph, + reason="pyoxigraph >= 0.4.0 required for deterministic benchmarks", +) + +KITCHEN_SINK = str(Path(__file__).parent / "input" / "kitchen_sink.yaml") +SCHEMA_ORG_URL = "https://schema.org/version/latest/schemaorg-current-https.ttl" + + +def _sha256(text: str) -> str: + return hashlib.sha256(text.encode()).hexdigest() + + +def _diff_line_count(a: str, b: str) -> int: + """Count lines present in *b* but not in *a* (unified-diff additions).""" + al = a.strip().splitlines() + bl = b.strip().splitlines() + return sum( + 1 for line in difflib.unified_diff(al, bl, lineterm="") if line.startswith("+") and not line.startswith("+++") + ) + + +# ── Schema.org: direct serializer benchmark ──────────────────────── + + +@pytest.fixture(scope="module") +def schema_org_graph(): + """Download and parse schema.org as an rdflib Graph. + + Cached for the module so the network fetch only happens once. + Skips all dependent tests if the download fails. + """ + try: + import urllib.request + + with urllib.request.urlopen(SCHEMA_ORG_URL, timeout=60) as resp: + data = resp.read().decode("utf-8") + except Exception as exc: + pytest.skip(f"Could not fetch schema.org: {exc}") + + g = Graph() + g.parse(data=data, format="turtle") + return g + + +@pytest.mark.network +class TestSchemaOrgDeterministicSerializer: + """Benchmark ``deterministic_turtle()`` on schema.org OWL ontology.""" + + def test_semantic_equivalence(self, schema_org_graph): + """Deterministic serialization must be isomorphic to the original graph.""" + det_ttl = deterministic_turtle(schema_org_graph) + + g_det = Graph() + g_det.parse(data=det_ttl, format="turtle") + + assert len(g_det) == len(schema_org_graph), ( + f"Triple count mismatch: original={len(schema_org_graph)}, deterministic={len(g_det)}" + ) + assert isomorphic(g_det, schema_org_graph), ( + "Deterministic output is NOT isomorphic to original schema.org graph" + ) + + def test_byte_stability(self, schema_org_graph): + """Two deterministic runs must produce byte-identical output.""" + run1 = deterministic_turtle(schema_org_graph) + run2 = deterministic_turtle(schema_org_graph) + assert _sha256(run1) == _sha256(run2), "Deterministic serializer produced different output across runs" + + def test_prefix_filtering(self, schema_org_graph): + """Only prefixes actually used in the graph should be declared.""" + det_ttl = deterministic_turtle(schema_org_graph) + + # Extract declared prefixes + declared = {} + for line in det_ttl.splitlines(): + if line.startswith("@prefix"): + parts = line.split() + pfx = parts[1].rstrip(":") + ns = parts[2].strip("<>") + declared[pfx] = ns + + # Collect all IRIs in the graph + from rdflib import URIRef + + used_iris = set() + for s, p, o in schema_org_graph: + for term in (s, p, o): + if isinstance(term, URIRef): + used_iris.add(str(term)) + + # Every declared prefix must have at least one IRI using it + for pfx, ns in declared.items(): + assert any(iri.startswith(ns) for iri in used_iris), f"Prefix '{pfx}:' <{ns}> declared but no IRI uses it" + + def test_performance(self, schema_org_graph): + """Serialization must complete within 60 seconds for ~16K triples.""" + start = time.time() + det_ttl = deterministic_turtle(schema_org_graph) + elapsed = time.time() - start + triple_count = len(schema_org_graph) + throughput = triple_count / elapsed if elapsed > 0 else float("inf") + + # Log for benchmark visibility (shows with pytest -v) + print(f"\n schema.org: {triple_count} triples in {elapsed:.1f}s ({throughput:.0f} triples/s)") + + assert elapsed < 60.0, f"Serialization took {elapsed:.1f}s (limit: 60s) for {triple_count} triples" + assert len(det_ttl) > 1000, "Output suspiciously short" + + +# ── Kitchen_sink: full pipeline benchmark ─────────────────────────── + + +def _mutate_kitchen_sink(description_suffix: str = "", add_slot: bool = False) -> str: + """Create a mutated copy of kitchen_sink.yaml **in the same directory** and return its path. + + The copy must live alongside the original so that LinkML relative imports + (``linkml:types``, ``core``, etc.) resolve correctly. + + Uses a unique filename (via ``os.getpid()``) to avoid race conditions + when tests run in parallel under pytest-xdist. + + Parameters + ---------- + description_suffix + Text appended to the first class description found. + add_slot + If True, adds a synthetic ``benchmark_notes`` slot to the first class. + """ + import os + + ks_path = Path(KITCHEN_SINK) + schema = yaml.safe_load(ks_path.read_text()) + + if description_suffix or add_slot: + # Find the first class with a description + for cls_name, cls_def in schema.get("classes", {}).items(): + if isinstance(cls_def, dict) and cls_def.get("description"): + if description_suffix: + cls_def["description"] += description_suffix + if add_slot: + slots = cls_def.get("slots", []) + slots.append("benchmark_notes") + cls_def["slots"] = slots + break + + # Define the synthetic slot if adding one + if add_slot: + slots_dict = schema.setdefault("slots", {}) + slots_dict["benchmark_notes"] = { + "description": "Synthetic benchmark slot for diff quality testing.", + "range": "string", + } + + # Write in the same directory so relative imports resolve. + # Use PID to avoid race conditions with pytest-xdist workers. + out_path = ks_path.parent / f"_benchmark_mutated_{os.getpid()}_kitchen_sink.yaml" + out_path.write_text( + yaml.dump(schema, default_flow_style=False, allow_unicode=True), + encoding="utf-8", + ) + return str(out_path) + + +@pytest.mark.parametrize( + "generator_cls", + [OwlSchemaGenerator, ShaclGenerator], + ids=["owl", "shacl"], +) +class TestKitchenSinkDiffQuality: + """Measure diff quality on the kitchen_sink schema with controlled mutations.""" + + def test_mutation_description_change(self, generator_cls): + """A single description change must produce a small, focused diff. + + Deterministic mode should change only the affected line(s) and their + immediate context (e.g. SHACL may repeat descriptions in sh:description). + Non-deterministic mode produces a much larger diff due to blank-node + and property-ordering instability. + """ + base = generator_cls(KITCHEN_SINK, deterministic=True).serialize() + mutated_path = _mutate_kitchen_sink(description_suffix=" (benchmark edit)") + try: + mutated = generator_cls(mutated_path, deterministic=True).serialize() + finally: + Path(mutated_path).unlink(missing_ok=True) + + det_diff = _diff_line_count(base, mutated) + + # Non-deterministic baseline for comparison + non_base = generator_cls(KITCHEN_SINK, deterministic=False).serialize() + non_mutated_path = _mutate_kitchen_sink(description_suffix=" (benchmark edit)") + try: + non_mutated = generator_cls(non_mutated_path, deterministic=False).serialize() + finally: + Path(non_mutated_path).unlink(missing_ok=True) + + non_diff = _diff_line_count(non_base, non_mutated) + + # The deterministic diff must be small (description + any SHACL mirrors) + assert det_diff <= 20, ( + f"Deterministic diff too large for a 1-description change: {det_diff} lines (expected ≤20)" + ) + # Signal-to-noise: deterministic must be at least 5× smaller + if non_diff > 0: + ratio = non_diff / max(det_diff, 1) + assert ratio >= 5, ( + f"Insufficient noise reduction: det={det_diff}, non-det={non_diff}, ratio={ratio:.1f}× (expected ≥5×)" + ) + + print( + f"\n {generator_cls.__name__} description mutation: " + f"det={det_diff} lines, non-det={non_diff} lines, " + f"noise reduction={non_diff / max(det_diff, 1):.0f}×" + ) + + def test_mutation_add_slot(self, generator_cls): + """Adding a new slot must produce a proportionally small diff. + + A new slot adds ~10-20 triples (label, range, domain, restrictions). + The diff should be roughly proportional to the new content, not a + full-file rewrite. + """ + base = generator_cls(KITCHEN_SINK, deterministic=True).serialize() + mutated_path = _mutate_kitchen_sink(add_slot=True) + try: + mutated = generator_cls(mutated_path, deterministic=True).serialize() + finally: + Path(mutated_path).unlink(missing_ok=True) + + det_diff = _diff_line_count(base, mutated) + + # Non-deterministic baseline for comparison + non_base = generator_cls(KITCHEN_SINK, deterministic=False).serialize() + non_mutated_path = _mutate_kitchen_sink(add_slot=True) + try: + non_mutated = generator_cls(non_mutated_path, deterministic=False).serialize() + finally: + Path(non_mutated_path).unlink(missing_ok=True) + + non_diff = _diff_line_count(non_base, non_mutated) + + g_base = Graph() + g_base.parse(data=base, format="turtle") + g_mut = Graph() + g_mut.parse(data=mutated, format="turtle") + new_triples = len(g_mut) - len(g_base) + + # Diff should be proportional to new triples (allow 5× margin) + assert det_diff <= max(new_triples * 5, 40), ( + f"Deterministic diff ({det_diff} lines) disproportionate to new triples ({new_triples})" + ) + # Signal-to-noise: deterministic must be at least 5× smaller + if non_diff > 0: + ratio = non_diff / max(det_diff, 1) + assert ratio >= 5, ( + f"Insufficient noise reduction: det={det_diff}, non-det={non_diff}, ratio={ratio:.1f}× (expected ≥5×)" + ) + + print( + f"\n {generator_cls.__name__} add-slot mutation: " + f"det_diff={det_diff} lines, non-det={non_diff} lines, " + f"new_triples={new_triples}, noise reduction={non_diff / max(det_diff, 1):.0f}×" + ) + + print(f"\n {generator_cls.__name__} add-slot mutation: det_diff={det_diff} lines, new_triples={new_triples}") + + +@pytest.mark.parametrize( + "generator_cls", + [OwlSchemaGenerator, ShaclGenerator], + ids=["owl", "shacl"], +) +class TestKitchenSinkEquivalence: + """Verify semantic equivalence between deterministic and non-deterministic modes.""" + + def test_triple_count_matches(self, generator_cls): + """Both modes must produce the same number of triples.""" + det = generator_cls(KITCHEN_SINK, deterministic=True).serialize() + nondet = generator_cls(KITCHEN_SINK, deterministic=False).serialize() + + g_det = Graph() + g_det.parse(data=det, format="turtle") + g_nondet = Graph() + g_nondet.parse(data=nondet, format="turtle") + + assert len(g_det) == len(g_nondet), ( + f"Triple count mismatch: deterministic={len(g_det)}, non-deterministic={len(g_nondet)}" + ) + + def test_byte_stability_across_runs(self, generator_cls): + """Three deterministic runs must produce identical output.""" + runs = [generator_cls(KITCHEN_SINK, deterministic=True).serialize() for _ in range(3)] + hashes = [_sha256(r) for r in runs] + assert hashes[0] == hashes[1] == hashes[2], f"Deterministic output varies across runs: {hashes}" + + def test_non_deterministic_instability(self, generator_cls): + """Non-deterministic output should vary across runs (documents the problem). + + This test is advisory — it passes regardless but logs the instability. + """ + runs = [generator_cls(KITCHEN_SINK, deterministic=False).serialize() for _ in range(3)] + hashes = [_sha256(r) for r in runs] + identical = hashes[0] == hashes[1] == hashes[2] + print( + f"\n {generator_cls.__name__} non-det stable: {identical} " + f"(expected: False for Turtle due to bnode/ordering instability)" + ) diff --git a/tests/linkml/test_generators/test_deterministic_output.py b/tests/linkml/test_generators/test_deterministic_output.py new file mode 100644 index 0000000000..6721c2ac93 --- /dev/null +++ b/tests/linkml/test_generators/test_deterministic_output.py @@ -0,0 +1,481 @@ +"""Tests for deterministic generator output. + +When ``deterministic=True``, generators must produce byte-identical output +across multiple invocations. This ensures version-controlled artifacts don't +show spurious diffs from blank-node relabeling or dict-ordering instability. + +Generators must also produce **isomorphic** output — the deterministic +serialization must encode the same RDF graph as non-deterministic mode. +""" + +import json +import time +from pathlib import Path + +import pytest +from rdflib import Graph +from rdflib.compare import isomorphic + +from linkml.generators.jsonldcontextgen import ContextGenerator +from linkml.generators.jsonldgen import JSONLDGenerator +from linkml.generators.owlgen import OwlSchemaGenerator +from linkml.generators.shaclgen import ShaclGenerator + +# Deterministic Turtle requires pyoxigraph >= 0.4.0 (for Dataset/canonicalize). +# When an older version is present (e.g. pulled in by morph-kgc), skip these tests. +_has_pyoxigraph = False +try: + import pyoxigraph + + _has_pyoxigraph = hasattr(pyoxigraph, "Dataset") +except ImportError: + pass + +pytestmark = pytest.mark.skipif(not _has_pyoxigraph, reason="pyoxigraph >= 0.4.0 required for deterministic tests") + +SCHEMA = str(Path(__file__).parent / "input" / "personinfo.yaml") + + +@pytest.mark.parametrize( + "generator_cls,kwargs", + [ + (OwlSchemaGenerator, {}), + (ShaclGenerator, {}), + (ContextGenerator, {}), + (JSONLDGenerator, {}), + ], + ids=["owl", "shacl", "context", "jsonld"], +) +def test_deterministic_output_is_identical_across_runs(generator_cls, kwargs): + """Generate output twice with deterministic=True and verify identity.""" + out1 = generator_cls(SCHEMA, deterministic=True, **kwargs).serialize() + out2 = generator_cls(SCHEMA, deterministic=True, **kwargs).serialize() + # JSONLDGenerator embeds a generation_date timestamp — normalize it + if generator_cls is JSONLDGenerator: + import re + + ts_re = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}") + out1 = ts_re.sub("TIMESTAMP", out1) + out2 = ts_re.sub("TIMESTAMP", out2) + assert out1 == out2, f"{generator_cls.__name__} produced different output across runs" + assert len(out1) > 100, "Output suspiciously short — generator may have failed silently" + + +@pytest.mark.parametrize( + "generator_cls", + [ContextGenerator, JSONLDGenerator], + ids=["context", "jsonld"], +) +def test_deterministic_json_has_sorted_keys(generator_cls): + """When deterministic=True, JSON dict keys should be sorted at all levels. + + For the ContextGenerator, @context keys use grouped ordering (prefixes + before term entries) — each group is sorted, but not globally. + """ + out = generator_cls(SCHEMA, deterministic=True).serialize() + parsed = json.loads(out) + + is_context_gen = generator_cls is ContextGenerator + + def _check_sorted_keys(obj, path="root"): + if isinstance(obj, dict): + keys = list(obj.keys()) + # Context generator groups @context keys: @-directives, prefixes, terms + if is_context_gen and path == "root.@context": + at_keys = [k for k in keys if k.startswith("@")] + prefix_keys = [k for k in keys if not k.startswith("@") and isinstance(obj[k], str)] + term_keys = [k for k in keys if not k.startswith("@") and not isinstance(obj[k], str)] + assert at_keys == sorted(at_keys), f"@-keys not sorted: {at_keys}" + assert prefix_keys == sorted(prefix_keys), f"Prefix keys not sorted: {prefix_keys}" + assert term_keys == sorted(term_keys), f"Term keys not sorted: {term_keys}" + else: + assert keys == sorted(keys), f"Keys not sorted at {path}: {keys}" + for k, v in obj.items(): + _check_sorted_keys(v, f"{path}.{k}") + elif isinstance(obj, list): + for i, item in enumerate(obj): + _check_sorted_keys(item, f"{path}[{i}]") + + _check_sorted_keys(parsed) + + +@pytest.mark.parametrize( + "generator_cls", + [ContextGenerator, JSONLDGenerator], + ids=["context", "jsonld"], +) +def test_deterministic_json_lists_are_sorted(generator_cls): + """When deterministic=True, JSON list elements should be sorted. + + Lists under JSON-LD structural keys (``@context``, ``@list``, ``imports``, + etc.) are exempt because their ordering carries semantic meaning. + """ + out = generator_cls(SCHEMA, deterministic=True).serialize() + parsed = json.loads(out) + + # JSON-LD keys whose array values carry ordering semantics. + _ORDERED_KEYS = {"@context", "@list", "@graph", "@set", "imports"} + + def _check_sorted_lists(obj, path="root", parent_key=""): + if isinstance(obj, dict): + for k, v in obj.items(): + _check_sorted_lists(v, f"{path}.{k}", parent_key=k) + elif isinstance(obj, list): + if parent_key not in _ORDERED_KEYS: + str_items = [json.dumps(item, sort_keys=True, ensure_ascii=False) for item in obj] + assert str_items == sorted(str_items), f"List not sorted at {path}" + for i, item in enumerate(obj): + _check_sorted_lists(item, f"{path}[{i}]") + + _check_sorted_lists(parsed) + + +@pytest.mark.parametrize( + "generator_cls", + [OwlSchemaGenerator, ShaclGenerator], + ids=["owl", "shacl"], +) +def test_deterministic_turtle_preserves_at_prefix(generator_cls): + """deterministic_turtle must produce standard @prefix, not SPARQL PREFIX.""" + out = generator_cls(SCHEMA, deterministic=True).serialize() + assert "@prefix" in out, "Output uses non-standard prefix syntax" + assert "PREFIX " not in out, "Output uses SPARQL PREFIX instead of Turtle @prefix" + + +def test_deterministic_turtle_performance(): + """Deterministic OWL generation must complete within 10 seconds for personinfo. + + The Weisfeiler-Lehman approach is O(n log n), so this should easily pass. + The previous canon=True approach was exponential and failed this test + for graphs above ~250 triples. + """ + start = time.time() + out = OwlSchemaGenerator(SCHEMA, deterministic=True).serialize() + elapsed = time.time() - start + assert elapsed < 10.0, f"Deterministic generation took {elapsed:.1f}s (limit: 10s)" + assert len(out) > 100, "Output suspiciously short" + + +def test_shacl_closed_ignored_properties_deterministic(): + """sh:ignoredProperties in closed shapes must be deterministic. + + ``_build_ignored_properties`` collects inherited slots into a set; without + explicit sorting this produces different ``rdf:first``/``rdf:rest`` chains + on each run. With ``deterministic=True`` (and sorted Collection inputs) + the output must be byte-identical. + """ + runs = [ShaclGenerator(SCHEMA, deterministic=True, closed=True).serialize() for _ in range(3)] + assert runs[0] == runs[1] == runs[2], "sh:ignoredProperties ordering differs across runs" + assert "sh:ignoredProperties" in runs[0], "Expected closed shapes with sh:ignoredProperties" + + +def test_shacl_enum_in_deterministic(): + """sh:in RDF lists for enums must be deterministic. + + ``_build_enum_constraint`` iterates ``enum.permissible_values.items()`` + (dict iteration order) into a ``Collection``. Without sorting, the + ``rdf:first``/``rdf:rest`` chain varies across runs. + """ + runs = [ShaclGenerator(SCHEMA, deterministic=True).serialize() for _ in range(3)] + assert runs[0] == runs[1] == runs[2], "sh:in enum list ordering differs across runs" + assert "sh:in" in runs[0], "Expected sh:in constraints for enums" + + +def test_owl_enum_one_of_deterministic(): + """owl:oneOf RDF lists for enums must be deterministic. + + ``_boolean_expression`` feeds ``pv_uris`` (from ``permissible_values``) + into a ``Collection``. Without sorting, ``owl:oneOf`` list ordering varies. + """ + runs = [OwlSchemaGenerator(SCHEMA, deterministic=True).serialize() for _ in range(3)] + assert runs[0] == runs[1] == runs[2], "owl:oneOf enum list ordering differs across runs" + + +KITCHEN_SINK = str(Path(__file__).parent / "input" / "kitchen_sink.yaml") + + +def test_deterministic_large_schema(): + """End-to-end idempotency on a complex schema (kitchen_sink). + + Exercises many code paths simultaneously: closed shapes, enums, imports, + class hierarchies, and mixed ranges. + """ + owl1 = OwlSchemaGenerator(KITCHEN_SINK, deterministic=True).serialize() + owl2 = OwlSchemaGenerator(KITCHEN_SINK, deterministic=True).serialize() + assert owl1 == owl2, "OWL output differs across runs for kitchen_sink" + assert len(owl1) > 500, "kitchen_sink output suspiciously short" + + shacl1 = ShaclGenerator(KITCHEN_SINK, deterministic=True).serialize() + shacl2 = ShaclGenerator(KITCHEN_SINK, deterministic=True).serialize() + assert shacl1 == shacl2, "SHACL output differs across runs for kitchen_sink" + assert len(shacl1) > 500, "kitchen_sink output suspiciously short" + + +def test_deterministic_context_preserves_jsonld_structure(): + """Deterministic JSON-LD context must preserve conventional structure. + + JSON-LD contexts have a conventional layout: + 1. ``comments`` block first (metadata) + 2. ``@context`` block second, with prefixes grouped before term entries + + ``deterministic_json()`` would scramble this by sorting all keys + uniformly. The context generator must use JSON-LD-aware ordering. + """ + out = ContextGenerator(SCHEMA, deterministic=True, metadata=True).serialize() + parsed = json.loads(out) + + # Top-level key order: "comments" before "@context" + top_keys = list(parsed.keys()) + assert "comments" in top_keys, "Expected 'comments' block with metadata=True" + assert top_keys.index("comments") < top_keys.index("@context"), ( + f"'comments' should precede '@context', got: {top_keys}" + ) + + # Inside @context: @-directives, then prefixes (str values), then terms (dict values) + ctx = parsed["@context"] + ctx_keys = list(ctx.keys()) + + at_keys = [k for k in ctx_keys if k.startswith("@")] + prefix_keys = [k for k in ctx_keys if not k.startswith("@") and isinstance(ctx[k], str)] + term_keys = [k for k in ctx_keys if not k.startswith("@") and not isinstance(ctx[k], str)] + + # Verify grouping: all @-keys before all prefix keys before all term keys + last_at = max(ctx_keys.index(k) for k in at_keys) if at_keys else -1 + first_prefix = min(ctx_keys.index(k) for k in prefix_keys) if prefix_keys else len(ctx_keys) + last_prefix = max(ctx_keys.index(k) for k in prefix_keys) if prefix_keys else -1 + first_term = min(ctx_keys.index(k) for k in term_keys) if term_keys else len(ctx_keys) + + assert last_at < first_prefix, "@-directives must come before prefixes" + assert last_prefix < first_term, "Prefixes must come before term entries" + + # Verify each group is sorted internally + assert at_keys == sorted(at_keys), f"@-directives not sorted: {at_keys}" + assert prefix_keys == sorted(prefix_keys), f"Prefixes not sorted: {prefix_keys}" + assert term_keys == sorted(term_keys), f"Term entries not sorted: {term_keys}" + + +def test_non_deterministic_is_default(): + """Verify that ``deterministic`` defaults to False.""" + gen = OwlSchemaGenerator(SCHEMA) + assert gen.deterministic is False + + +def test_wl_handles_structurally_similar_bnodes(): + """Blank nodes with identical local structure but different named neighbours + must receive different WL signatures and thus different stable labels. + + This tests the core WL property: two BNodes that differ only in their + connected named nodes (URIs/literals) must be distinguishable. + """ + from rdflib import BNode, Graph, Namespace, URIRef + + from linkml.utils.generator import deterministic_turtle + + RDF_TYPE = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") + OWL_RESTRICTION = URIRef("http://www.w3.org/2002/07/owl#Restriction") + OWL_ON_PROP = URIRef("http://www.w3.org/2002/07/owl#onProperty") + OWL_ALL_VALUES = URIRef("http://www.w3.org/2002/07/owl#allValuesFrom") + + EX = Namespace("http://example.org/") + g = Graph() + + # Two restrictions with same structure but different property URIs + r1 = BNode() + g.add((r1, RDF_TYPE, OWL_RESTRICTION)) + g.add((r1, OWL_ON_PROP, EX.alpha)) + g.add((r1, OWL_ALL_VALUES, EX.Target1)) + + r2 = BNode() + g.add((r2, RDF_TYPE, OWL_RESTRICTION)) + g.add((r2, OWL_ON_PROP, EX.beta)) + g.add((r2, OWL_ALL_VALUES, EX.Target2)) + + RDFS_SUBCLASS = URIRef("http://www.w3.org/2000/01/rdf-schema#subClassOf") + g.add((EX.MyClass, RDFS_SUBCLASS, r1)) + g.add((EX.MyClass, RDFS_SUBCLASS, r2)) + + # Must be deterministic across runs + out1 = deterministic_turtle(g) + out2 = deterministic_turtle(g) + assert out1 == out2, "WL-based serializer is not deterministic for similar BNodes" + + # Both restrictions must appear (not collapsed) + assert "alpha" in out1 + assert "beta" in out1 + + +def test_deterministic_turtle_no_bnodes(): + """Graphs with no blank nodes should still produce sorted, deterministic output.""" + from rdflib import Graph, Literal, Namespace + from rdflib.namespace import RDFS + + from linkml.utils.generator import deterministic_turtle + + EX = Namespace("http://example.org/") + g = Graph() + g.add((EX.B, RDFS.label, Literal("B"))) + g.add((EX.A, RDFS.label, Literal("A"))) + + out1 = deterministic_turtle(g) + out2 = deterministic_turtle(g) + assert out1 == out2 + + # A should appear before B (sorted) + a_pos = out1.find("example.org/A") + b_pos = out1.find("example.org/B") + assert a_pos < b_pos, "Triples should be sorted: A before B" + + +@pytest.mark.xfail( + reason=( + "Collection sorting (owl:oneOf, sh:in) in deterministic mode intentionally " + "reorders RDF list triples for canonical output. The resulting graph is " + "semantically equivalent (OWL/SHACL interpret these as unordered sets) but " + "not RDF-isomorphic because rdf:first/rdf:rest chains encode ordering." + ), + strict=True, +) +@pytest.mark.parametrize( + "generator_cls", + [OwlSchemaGenerator, ShaclGenerator], + ids=["owl", "shacl"], +) +def test_deterministic_turtle_is_isomorphic(generator_cls): + """Deterministic output is NOT RDF-isomorphic to non-deterministic output. + + This documents the trade-off identified in linkml/linkml#3295 review: + deterministic mode sorts Collection inputs (owl:oneOf, sh:in, + sh:ignoredProperties) to produce canonical RDF list ordering. Since RDF + Collections encode order via rdf:first/rdf:rest triples, the sorted graph + is structurally different from the insertion-order graph — even though the + OWL/SHACL semantics are identical (these Collections represent sets). + + The test is marked xfail(strict=True) so that it: + - Documents the known, intentional non-isomorphism + - Alerts maintainers if the behaviour changes (strict xfail fails on pass) + """ + out_det = generator_cls(SCHEMA, deterministic=True).serialize() + out_nondet = generator_cls(SCHEMA, deterministic=False).serialize() + + g_det = Graph() + g_det.parse(data=out_det, format="turtle") + + g_nondet = Graph() + g_nondet.parse(data=out_nondet, format="turtle") + + assert len(g_det) == len(g_nondet), ( + f"Triple count mismatch: deterministic={len(g_det)}, non-deterministic={len(g_nondet)}" + ) + assert isomorphic(g_det, g_nondet), ( + f"{generator_cls.__name__}: deterministic output is NOT isomorphic " + "to non-deterministic output — the serialization changed the graph" + ) + + +@pytest.mark.parametrize( + "generator_cls", + [OwlSchemaGenerator, ShaclGenerator], + ids=["owl", "shacl"], +) +def test_non_deterministic_output_unchanged(generator_cls): + """Non-deterministic output must still produce valid RDF. + + Ensures that changes for deterministic mode don't break default behavior. + """ + out = generator_cls(SCHEMA, deterministic=False).serialize() + assert len(out) > 100, "Output suspiciously short" + g = Graph() + g.parse(data=out, format="turtle") + assert len(g) > 50, f"Graph has too few triples ({len(g)})" + + +@pytest.mark.parametrize( + "generator_cls,kwargs", + [ + (OwlSchemaGenerator, {}), + (ShaclGenerator, {}), + (ContextGenerator, {}), + (JSONLDGenerator, {}), + ], + ids=["owl", "shacl", "context", "jsonld"], +) +def test_non_deterministic_produces_valid_output(generator_cls, kwargs): + """All generators must produce valid output in non-deterministic mode.""" + out = generator_cls(SCHEMA, deterministic=False, **kwargs).serialize() + assert len(out) > 100, f"{generator_cls.__name__} output suspiciously short" + + +@pytest.mark.xfail( + reason=( + "Collection sorting in deterministic mode produces non-isomorphic RDF " + "(different rdf:first/rdf:rest triples). See test_deterministic_turtle_is_isomorphic." + ), + strict=True, +) +@pytest.mark.parametrize( + "generator_cls", + [OwlSchemaGenerator, ShaclGenerator], + ids=["owl", "shacl"], +) +def test_deterministic_kitchen_sink_isomorphic(generator_cls): + """Isomorphism check on the complex kitchen_sink schema. + + Expected to fail for the same reason as test_deterministic_turtle_is_isomorphic: + Collection sorting changes the RDF structure while preserving OWL/SHACL semantics. + """ + out_det = generator_cls(KITCHEN_SINK, deterministic=True).serialize() + out_nondet = generator_cls(KITCHEN_SINK, deterministic=False).serialize() + + g_det = Graph() + g_det.parse(data=out_det, format="turtle") + + g_nondet = Graph() + g_nondet.parse(data=out_nondet, format="turtle") + + assert isomorphic(g_det, g_nondet), ( + f"{generator_cls.__name__}: kitchen_sink deterministic output is NOT isomorphic to non-deterministic output" + ) + + +@pytest.mark.skipif(False, reason="does not require pyoxigraph") +def test_expression_sort_key_is_stable(): + """``_expression_sort_key`` must produce stable, content-based keys. + + LinkML anonymous expressions inherit ``YAMLRoot.__repr__()``, which + formats objects using **field values** (not memory addresses). + The ``_expression_sort_key`` helper relies on this for deterministic + ordering of ``any_of`` / ``all_of`` / ``none_of`` members. + + This test verifies that: + 1. Two distinct objects with identical fields produce the same key. + 2. Objects with different fields produce different keys. + 3. Sorting is stable across repeated calls. + """ + from linkml.generators.owlgen import _expression_sort_key + from linkml_runtime.linkml_model.meta import AnonymousClassExpression, AnonymousSlotExpression + + # Two distinct objects with identical content → same key + a1 = AnonymousClassExpression(is_a="Parent") + a2 = AnonymousClassExpression(is_a="Parent") + assert a1 is not a2 + assert _expression_sort_key(a1) == _expression_sort_key(a2) + + # Different content → different keys + b = AnonymousClassExpression(is_a="Child") + assert _expression_sort_key(a1) != _expression_sort_key(b) + + # Sorting stability: same order every time + items = [b, a1, a2] + for _ in range(5): + result = sorted(items, key=_expression_sort_key) + # "Child" < "Parent" alphabetically, so b comes first + assert _expression_sort_key(result[0]) == _expression_sort_key(b) + assert _expression_sort_key(result[1]) == _expression_sort_key(result[2]) # a1, a2 together + + # Slot expressions work too + s1 = AnonymousSlotExpression(range="string") + s2 = AnonymousSlotExpression(range="integer") + assert _expression_sort_key(s1) != _expression_sort_key(s2) + order1 = sorted([s2, s1], key=_expression_sort_key) + order2 = sorted([s1, s2], key=_expression_sort_key) + assert [_expression_sort_key(x) for x in order1] == [_expression_sort_key(x) for x in order2] From 90b499e5abaf1bcf89442598b98fa56fe6eafaeb Mon Sep 17 00:00:00 2001 From: Carlo van Driesten Date: Tue, 12 May 2026 11:55:43 +0200 Subject: [PATCH 14/15] fix(shaclgen): apply default_language to SPARQL constraint messages When --default-language is set, the sh:message literal on SPARQL constraints (sh:SPARQLConstraint) was emitted without a language tag. Add lang=self._resolve_language() to the Literal() constructor call for SPARQL rule descriptions. Signed-off-by: Carlo van Driesten --- examples/tutorial/tutorial01/data.ttl | 1 - examples/tutorial/tutorial04/data-semantic.ttl | 1 - examples/tutorial/tutorial04/data.ttl | 1 - examples/tutorial/tutorial04/personinfo-semantic.shacl.ttl | 1 - packages/linkml/src/linkml/generators/golanggen/template.py | 4 ++-- packages/linkml/src/linkml/generators/owlgen.py | 6 ++---- packages/linkml/src/linkml/generators/rdfgen.py | 2 +- packages/linkml/src/linkml/generators/shaclgen.py | 4 ++-- packages/linkml/src/linkml/generators/shexgen.py | 2 +- packages/linkml/src/linkml/utils/generator.py | 3 ++- .../src/linkml_runtime/dumpers/rdflib_dumper.py | 2 +- .../src/linkml_runtime/utils/rdf_canonicalize.py | 5 +---- tests/linkml/test_generators/test_shaclgen.py | 4 +++- 13 files changed, 15 insertions(+), 21 deletions(-) diff --git a/examples/tutorial/tutorial01/data.ttl b/examples/tutorial/tutorial01/data.ttl index fff62c9b84..7cf6fd3b9b 100644 --- a/examples/tutorial/tutorial01/data.ttl +++ b/examples/tutorial/tutorial01/data.ttl @@ -36,4 +36,3 @@ _:c14n0 a personinfo:Person ; personinfo:full_name "Clark Kent" ; personinfo:id "ORCID:1234" ; personinfo:phone "555-555-5555" . - diff --git a/examples/tutorial/tutorial04/data-semantic.ttl b/examples/tutorial/tutorial04/data-semantic.ttl index 380c50d030..cf1efa1827 100644 --- a/examples/tutorial/tutorial04/data-semantic.ttl +++ b/examples/tutorial/tutorial04/data-semantic.ttl @@ -45,4 +45,3 @@ ORCID:4567 schema1:name "Lois Lane" ; personinfo:age 34 . _:c14n0 a personinfo:Container ; personinfo:persons ORCID:1234 , ORCID:4567 . - diff --git a/examples/tutorial/tutorial04/data.ttl b/examples/tutorial/tutorial04/data.ttl index bc1a04229e..ecc381e3a7 100644 --- a/examples/tutorial/tutorial04/data.ttl +++ b/examples/tutorial/tutorial04/data.ttl @@ -45,4 +45,3 @@ ORCID:4567 a personinfo:Person ; personinfo:full_name "Lois Lane" . _:c14n0 a personinfo:Container ; personinfo:persons ORCID:1234 , ORCID:4567 . - diff --git a/examples/tutorial/tutorial04/personinfo-semantic.shacl.ttl b/examples/tutorial/tutorial04/personinfo-semantic.shacl.ttl index 4925ac8cd6..af0cf2eb5e 100644 --- a/examples/tutorial/tutorial04/personinfo-semantic.shacl.ttl +++ b/examples/tutorial/tutorial04/personinfo-semantic.shacl.ttl @@ -79,4 +79,3 @@ _:c14n6 sh:datatype xsd:string ; sh:path personinfo:aliases . _:c14n7 rdf:first rdf:type ; rdf:rest rdf:nil . - diff --git a/packages/linkml/src/linkml/generators/golanggen/template.py b/packages/linkml/src/linkml/generators/golanggen/template.py index 479d53d667..96f906353d 100644 --- a/packages/linkml/src/linkml/generators/golanggen/template.py +++ b/packages/linkml/src/linkml/generators/golanggen/template.py @@ -41,8 +41,8 @@ class GolangTemplateModel(TemplateModel): trim_blocks=True, lstrip_blocks=True, ) - _environment.filters["go_comment"] = lambda text, indent="": ( - "\n".join(f"{indent}// {line}" if line.strip() else f"{indent}//" for line in text.splitlines()) + _environment.filters["go_comment"] = lambda text, indent="": "\n".join( + f"{indent}// {line}" if line.strip() else f"{indent}//" for line in text.splitlines() ) meta_exclude: ClassVar[list[str]] = None diff --git a/packages/linkml/src/linkml/generators/owlgen.py b/packages/linkml/src/linkml/generators/owlgen.py index bf32222370..ab13c9f13d 100644 --- a/packages/linkml/src/linkml/generators/owlgen.py +++ b/packages/linkml/src/linkml/generators/owlgen.py @@ -23,6 +23,7 @@ from linkml.generators.common.subproperty import is_xsd_anyuri_range from linkml.utils.deprecation import deprecation_warning from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments +from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime import SchemaView from linkml_runtime.linkml_model.meta import ( AnonymousClassExpression, @@ -43,7 +44,6 @@ ) from linkml_runtime.utils.formatutils import camelcase, underscore from linkml_runtime.utils.introspection import package_schemaview -from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.yamlutils import YAMLRoot logger = logging.getLogger(__name__) @@ -703,9 +703,7 @@ def transform_class_expression( members = list(cls.exactly_one_of) if self.deterministic: members = sorted(members, key=_expression_sort_key) - sub_exprs: list[OWL_EXPRESSION] = self._present( - self.transform_class_expression(x) for x in members - ) + sub_exprs: list[OWL_EXPRESSION] = self._present(self.transform_class_expression(x) for x in members) if isinstance(cls, ClassDefinition): cls_uri = self._class_uri(cls.name) listnode = BNode() diff --git a/packages/linkml/src/linkml/generators/rdfgen.py b/packages/linkml/src/linkml/generators/rdfgen.py index ea2a04fd9b..95d832f2b3 100644 --- a/packages/linkml/src/linkml/generators/rdfgen.py +++ b/packages/linkml/src/linkml/generators/rdfgen.py @@ -19,8 +19,8 @@ from linkml._version import __version__ from linkml.generators.jsonldgen import JSONLDGenerator from linkml.utils.generator import Generator, shared_arguments -from linkml_runtime.linkml_model import SchemaDefinition from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph +from linkml_runtime.linkml_model import SchemaDefinition @dataclass diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index 017bac29f1..148f702d7c 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -15,9 +15,9 @@ from linkml.generators.shacl.shacl_data_type import ShaclDataType from linkml.generators.shacl.shacl_ifabsent_processor import ShaclIfAbsentProcessor from linkml.utils.generator import Generator, normalize_graph_prefixes, shared_arguments +from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName, PresenceEnum from linkml_runtime.utils.formatutils import underscore -from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.yamlutils import TypedNode, extended_float, extended_int, extended_str logger = logging.getLogger(__name__) @@ -480,7 +480,7 @@ def _add_rules(self, g: Graph, shape_uri: URIRef, cls: ClassDefinition) -> None: message = getattr(rule, "description", None) if message: - g.add((constraint, SH.message, Literal(message))) + g.add((constraint, SH.message, Literal(message, lang=self._resolve_language()))) g.add((constraint, SH.select, Literal(sparql_query))) diff --git a/packages/linkml/src/linkml/generators/shexgen.py b/packages/linkml/src/linkml/generators/shexgen.py index 2787af0b93..704dd1ae61 100644 --- a/packages/linkml/src/linkml/generators/shexgen.py +++ b/packages/linkml/src/linkml/generators/shexgen.py @@ -15,6 +15,7 @@ from linkml._version import __version__ from linkml.generators.common.subproperty import get_subproperty_values from linkml.utils.generator import Generator, shared_arguments +from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.linkml_model.meta import ( ClassDefinition, ElementName, @@ -26,7 +27,6 @@ from linkml_runtime.linkml_model.types import SHEX from linkml_runtime.utils.formatutils import camelcase, sfx from linkml_runtime.utils.metamodelcore import URIorCURIE -from linkml.utils.rdf_canonicalize import canonicalize_rdf_graph @dataclass diff --git a/packages/linkml/src/linkml/utils/generator.py b/packages/linkml/src/linkml/utils/generator.py index efa83aadbd..0aab3c40dd 100644 --- a/packages/linkml/src/linkml/utils/generator.py +++ b/packages/linkml/src/linkml/utils/generator.py @@ -232,6 +232,8 @@ def normalize_graph_prefixes(graph: "Graph", schema_prefixes: dict[str, str]) -> if std_pfx in current_bindings and current_bindings[std_pfx] != ns_str: continue graph.bind(std_pfx, Namespace(ns_str), override=True, replace=True) + + def _wl_signatures( quads: list, iterations: int = 4, @@ -504,7 +506,6 @@ def _deep_sort(value: object, parent_key: str = "") -> object: _JSONLD_ORDERED_KEYS: frozenset[str] = frozenset({"@context", "@list", "@graph", "@set", "imports"}) - @dataclass class Generator(metaclass=abc.ABCMeta): """ diff --git a/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py b/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py index ed3797f86d..96b064dbbb 100644 --- a/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py +++ b/packages/linkml_runtime/src/linkml_runtime/dumpers/rdflib_dumper.py @@ -9,7 +9,7 @@ from rdflib.term import BNode, Literal, Node from linkml_runtime.dumpers.dumper_root import Dumper -from linkml_runtime.linkml_model import ElementName, PermissibleValue, PermissibleValueText, SlotDefinition +from linkml_runtime.linkml_model import ElementName, PermissibleValue, SlotDefinition from linkml_runtime.utils.rdf_canonicalize import canonicalize_rdf_graph from linkml_runtime.utils.schemaview import SchemaView from linkml_runtime.utils.yamlutils import YAMLRoot diff --git a/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py index ed0ec9556f..da57f23399 100644 --- a/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py +++ b/packages/linkml_runtime/src/linkml_runtime/utils/rdf_canonicalize.py @@ -211,10 +211,7 @@ def canonicalize_rdf_graph( # fragment-like characters such as double '#'). Retry without # the offending prefixes by falling back to no prefixes, which # still produces valid (if verbose) Turtle. - logger.warning( - "pyoxigraph rejected one or more prefix IRIs; " - "serializing without prefix declarations" - ) + logger.warning("pyoxigraph rejected one or more prefix IRIs; serializing without prefix declarations") result_bytes = ox.serialize( sorted_triples, format=ox_format, diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index ccf477ed22..96d5e23583 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -1303,6 +1303,7 @@ def _build_message_test_schema(): sb.add_defaults() return sb.schema + def _parse_shacl(schema, **kwargs): shacl = ShaclGenerator(schema, mergeimports=False, **kwargs).serialize() g = rdflib.Graph() @@ -1692,6 +1693,8 @@ def test_message_template_with_default_language(): # Verify the message is NOT a plain literal assert Literal("Validation of vehicle_name failed!") not in msgs + + # --------------------------------------------------------------------------- # --emit-rules / sh:sparql tests # --------------------------------------------------------------------------- @@ -2247,7 +2250,6 @@ def test_exclusive_value_sparql_uses_enum_iri(): assert f"<{edge_none_iri}>" in query, f"SPARQL must reference EdgeNone as full IRI <{edge_none_iri}>, got:\n{query}" - def test_exclusive_value_max_card_1_sparql_structure(): """For maximum_cardinality: 1, SPARQL uses FILTER(?other != ). From 3d3a52a3645107d4218ae4d24c1d4c15154656df Mon Sep 17 00:00:00 2001 From: Carlo van Driesten Date: Tue, 12 May 2026 14:17:49 +0200 Subject: [PATCH 15/15] fix(generators): normalize trailing newline in Turtle serialization rdflib's Turtle serializer always emits a trailing double newline. Normalize to single newline in deterministic_turtle() and the rdflib fallback path in canonicalize_rdf_graph() for consistent file endings. Note: CLI print() still adds a newline after serialize()'s trailing newline. Callers capturing stdout should strip trailing blank lines (e.g. via sed). Signed-off-by: Carlo van Driesten --- packages/linkml/src/linkml/generators/jsonldcontextgen.py | 4 ++-- packages/linkml/src/linkml/utils/generator.py | 4 +++- packages/linkml/src/linkml/utils/rdf_canonicalize.py | 5 ++++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/packages/linkml/src/linkml/generators/jsonldcontextgen.py b/packages/linkml/src/linkml/generators/jsonldcontextgen.py index bc52c11008..0c81a0edc4 100644 --- a/packages/linkml/src/linkml/generators/jsonldcontextgen.py +++ b/packages/linkml/src/linkml/generators/jsonldcontextgen.py @@ -310,8 +310,8 @@ def end_schema( json.dump(frame, f, indent=2, ensure_ascii=False) if self.deterministic: - return self._deterministic_context_json(json.loads(str(as_json(context))), indent=3) + "\n" - return str(as_json(context)) + "\n" + return self._deterministic_context_json(json.loads(str(as_json(context))), indent=3) + return str(as_json(context)) @staticmethod def _deterministic_context_json(data: dict, indent: int = 3) -> str: diff --git a/packages/linkml/src/linkml/utils/generator.py b/packages/linkml/src/linkml/utils/generator.py index 0aab3c40dd..99121b50e4 100644 --- a/packages/linkml/src/linkml/utils/generator.py +++ b/packages/linkml/src/linkml/utils/generator.py @@ -459,7 +459,9 @@ def _to_rdflib(term): if pfx_s and any(iri.startswith(ns_s) for iri in used_iris): result_graph.bind(pfx_s, ns_s) - return result_graph.serialize(format="turtle") + # rdflib's Turtle serializer always emits a trailing double newline; + # normalize to a single newline for consistent file endings. + return result_graph.serialize(format="turtle").rstrip("\n") + "\n" def deterministic_json(obj: object, indent: int = 3, preserve_list_order_keys: frozenset[str] | None = None) -> str: diff --git a/packages/linkml/src/linkml/utils/rdf_canonicalize.py b/packages/linkml/src/linkml/utils/rdf_canonicalize.py index da57f23399..4b6f093b29 100644 --- a/packages/linkml/src/linkml/utils/rdf_canonicalize.py +++ b/packages/linkml/src/linkml/utils/rdf_canonicalize.py @@ -146,7 +146,10 @@ def canonicalize_rdf_graph( "pyoxigraph does not support format %r; falling back to rdflib serializer", output_format, ) - return graph.serialize(format=output_format) + # rdflib's Turtle serializer emits a trailing double newline; + # normalize to single newline for consistent file endings. + data = graph.serialize(format=output_format) + return data.rstrip("\n") + "\n" if data.endswith("\n") else data # 1. Transfer rdflib graph to pyoxigraph via N-Triples. nt_data = graph.serialize(format="nt")