diff --git a/packages/linkml/src/linkml/generators/shaclgen.py b/packages/linkml/src/linkml/generators/shaclgen.py index 2f2e6b5d1e..3faed1af15 100644 --- a/packages/linkml/src/linkml/generators/shaclgen.py +++ b/packages/linkml/src/linkml/generators/shaclgen.py @@ -14,7 +14,7 @@ from linkml.generators.shacl.shacl_data_type import ShaclDataType from linkml.generators.shacl.shacl_ifabsent_processor import ShaclIfAbsentProcessor from linkml.utils.generator import Generator, shared_arguments -from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName +from linkml_runtime.linkml_model.meta import ClassDefinition, ElementName, PresenceEnum from linkml_runtime.utils.formatutils import underscore from linkml_runtime.utils.yamlutils import TypedNode, extended_float, extended_int, extended_str @@ -74,6 +74,23 @@ class ShaclGenerator(Generator): """ expand_subproperty_of: bool = True """If True, expand subproperty_of to sh:in constraints with slot descendants""" + + emit_rules: bool = True + """Emit ``sh:sparql`` constraints from LinkML ``rules:`` blocks. + + When ``True`` (default), recognised rule patterns are translated into + SHACL-SPARQL constraints (``sh:SPARQLConstraint``) on the corresponding + ``sh:NodeShape``. Currently two patterns are recognised: + + * *Boolean guard* — a precondition with ``value_presence: PRESENT`` on a + value slot and a postcondition with ``equals_string: "true"`` on a + boolean flag slot. + * *Exclusive value* — a precondition with ``equals_string`` on a slot and + a postcondition with ``maximum_cardinality`` on the *same* slot. + + See `W3C SHACL §5 `_ + and `linkml/linkml#2464 `_. + """ generatorname = os.path.basename(__file__) generatorversion = "0.0.1" valid_formats = ["ttl"] @@ -283,10 +300,228 @@ def st_node_pv(p, v): if default_value: prop_pv(SH.defaultValue, default_value) + if self.emit_rules: + self._add_rules(g, class_uri_with_suffix, c) + return g LINKML_ANY_URI = "https://w3id.org/linkml/Any" + # ------------------------------------------------------------------- + # Rules → sh:sparql + # ------------------------------------------------------------------- + + def _add_rules(self, g: Graph, shape_uri: URIRef, cls: ClassDefinition) -> None: + """Emit ``sh:sparql`` constraints from LinkML ``rules:`` blocks. + + Each recognised rule is converted into an ``sh:SPARQLConstraint`` + attached to *shape_uri*. Unrecognised patterns are logged at + ``DEBUG`` level and silently skipped. + + Currently recognised patterns: + + * **Boolean guard** — a *precondition* with + ``value_presence: PRESENT`` on a value slot and a *postcondition* + with ``equals_string: "true"`` on a boolean flag slot. + + * **Exclusive value** — a *precondition* with ``equals_string`` on + a slot and a *postcondition* with ``maximum_cardinality`` on the + *same* slot. Enforces that when a specific value is present in a + multivalued slot, the total number of values must not exceed the + given cardinality (typically 1 for mutual exclusion). + + See `W3C SHACL §5 `_. + """ + if not cls.rules: + return + + sv = self.schemaview + for rule in cls.rules: + if getattr(rule, "deactivated", False): + continue + + if getattr(rule, "bidirectional", False): + logger.warning( + "Rule in class %r has bidirectional=true; " + "SHACL-SPARQL generation does not yet support bidirectional rules. " + "Only the forward direction is emitted.", + cls.name, + ) + + if getattr(rule, "open_world", False): + logger.warning( + "Rule in class %r has open_world=true; " + "SHACL operates under closed-world assumption. " + "The constraint is emitted but may not match open-world semantics.", + cls.name, + ) + + sparql_query = self._rule_to_sparql(sv, cls, rule) + if sparql_query is None: + logger.debug( + "Skipping unsupported rule pattern in class %r: %s", + cls.name, + getattr(rule, "description", "(no description)"), + ) + continue + + constraint = BNode() + g.add((shape_uri, SH.sparql, constraint)) + g.add((constraint, RDF.type, SH.SPARQLConstraint)) + + message = getattr(rule, "description", None) + if message: + g.add((constraint, SH.message, Literal(message))) + + g.add((constraint, SH.select, Literal(sparql_query))) + + def _rule_to_sparql(self, sv, cls: ClassDefinition, rule) -> str | None: + """Convert a ``ClassRule`` to a SPARQL SELECT query string. + + Returns ``None`` when the rule does not match any supported pattern. + """ + pre = getattr(rule, "preconditions", None) + post = getattr(rule, "postconditions", None) + if not pre or not post: + return None + + pre_slots = getattr(pre, "slot_conditions", None) or {} + post_slots = getattr(post, "slot_conditions", None) or {} + + # Pattern: boolean guard + # preconditions: exactly one slot with value_presence PRESENT + # postconditions: exactly one slot with equals_string "true" + if len(pre_slots) == 1 and len(post_slots) == 1: + pre_slot_name = next(iter(pre_slots)) + post_slot_name = next(iter(post_slots)) + + pre_cond = pre_slots[pre_slot_name] + post_cond = post_slots[post_slot_name] + + is_value_present = getattr(pre_cond, "value_presence", None) == PresenceEnum(PresenceEnum.PRESENT) + is_flag_true = getattr(post_cond, "equals_string", None) == "true" + + if is_value_present and is_flag_true: + return self._build_boolean_guard_sparql(sv, cls, post_slot_name, pre_slot_name) + + # Pattern: exclusive value + # preconditions: slot X has equals_string (a specific enum value) + # postconditions: same slot X has maximum_cardinality N + # Semantics: "If value V is present in slot X, then X has at most N values." + pre_equals = getattr(pre_cond, "equals_string", None) + post_max_card = getattr(post_cond, "maximum_cardinality", None) + + if pre_equals is not None and post_max_card is not None and pre_slot_name == post_slot_name: + return self._build_exclusive_value_sparql(sv, cls, pre_slot_name, pre_equals, int(post_max_card)) + + return None + + def _build_boolean_guard_sparql(self, sv, cls: ClassDefinition, flag_slot_name: str, value_slot_name: str) -> str: + """Build a SPARQL SELECT query for the boolean-guard pattern. + + The query detects violations where the value property is present + but the boolean flag is absent or not ``true``. + + Conforms to `SHACL §5.3.1 + `_: + ``$this`` is pre-bound to each focus node. + """ + flag_uri = self._slot_uri(sv, flag_slot_name, cls) + value_uri = self._slot_uri(sv, value_slot_name, cls) + + return ( + f"SELECT $this WHERE {{\n" + f" OPTIONAL {{ $this <{flag_uri}> ?flag . }}\n" + f" OPTIONAL {{ $this <{value_uri}> ?value . }}\n" + f" FILTER (\n" + f" ( !BOUND(?flag) || ?flag != true ) &&\n" + f" BOUND(?value)\n" + f" )\n" + f"}}" + ) + + def _build_exclusive_value_sparql( + self, + sv, + cls: ClassDefinition, + slot_name: str, + value_name: str, + max_card: int, + ) -> str | None: + """Build a SPARQL SELECT query for the exclusive-value pattern. + + Detects violations where a specific value is present in a multivalued + slot but the total number of values exceeds *max_card*. + + For the common case ``max_card == 1``, the query checks whether the + exclusive value coexists with any other value (simple existence test). + For ``max_card > 1``, a subquery counts all values and checks against + the limit. + + The exclusive value is resolved to its full IRI via the slot's enum + ``meaning`` field. If the slot is not an enum or the value has no + ``meaning``, the value is compared as a plain literal. + + Conforms to `SHACL §5.3.1 + `_: + ``$this`` is pre-bound to each focus node. + """ + slot_uri = self._slot_uri(sv, slot_name, cls) + value_ref = self._resolve_enum_value_ref(sv, slot_name, value_name) + + if max_card == 1: + return ( + f"SELECT $this WHERE {{\n" + f" $this <{slot_uri}> {value_ref} .\n" + f" $this <{slot_uri}> ?other .\n" + f" FILTER (?other != {value_ref})\n" + f"}}" + ) + + return ( + f"SELECT $this WHERE {{\n" + f" $this <{slot_uri}> {value_ref} .\n" + f" {{\n" + f" SELECT $this (COUNT(?val) AS ?count)\n" + f" WHERE {{ $this <{slot_uri}> ?val . }}\n" + f" GROUP BY $this\n" + f" HAVING (?count > {max_card})\n" + f" }}\n" + f"}}" + ) + + def _resolve_enum_value_ref(self, sv, slot_name: str, value_name: str) -> str: + """Resolve an enum value name to a SPARQL term (IRI or literal). + + Looks up the slot's range as an enum, finds the permissible value + matching *value_name*, and returns its ``meaning`` as a full IRI + wrapped in angle brackets. Falls back to a quoted literal if the + slot is not an enum or the value lacks a ``meaning``. + """ + slot = sv.get_slot(slot_name) + if slot: + range_name = slot.range + if range_name and range_name in sv.all_enums(): + enum = sv.get_enum(range_name) + pv = enum.permissible_values.get(value_name) + if pv and pv.meaning: + iri = sv.expand_curie(pv.meaning) + return f"<{iri}>" + return f'"{value_name}"' + + def _slot_uri(self, sv, slot_name: str, cls: ClassDefinition) -> str: + """Resolve a slot name to a full IRI string for use in SPARQL queries. + + Mirrors the resolution logic used for ``sh:path`` in the main slot loop: + prefer ``sv.get_uri()`` for slots registered in the schema map, fall + back to ``default_prefix:underscored_name``. + """ + slot = sv.get_slot(slot_name) + if slot and slot_name in sv.element_by_schema_map(): + return sv.get_uri(slot, expand=True) + pfx = sv.schema.default_prefix + return sv.expand_curie(f"{pfx}:{underscore(slot_name)}") + def _add_class(self, func: Callable, r: ElementName) -> None: """Add an sh:class constraint for range class *r*. @@ -526,6 +761,17 @@ def add_simple_data_type(func: Callable, r: ElementName) -> None: help="If --expand-subproperty-of (default), slots with subproperty_of will generate sh:in constraints " "containing all slot descendants. Use --no-expand-subproperty-of to disable this behavior.", ) +@click.option( + "--emit-rules/--no-emit-rules", + default=True, + show_default=True, + help=( + "Emit sh:sparql constraints from LinkML rules: blocks. " + "When enabled (default), recognised rule patterns (e.g. boolean-guard) " + "are translated into SHACL-SPARQL constraints on the corresponding " + "sh:NodeShape. Use --no-emit-rules to suppress rule generation." + ), +) @click.version_option(__version__, "-V", "--version") def cli(yamlfile, **args): """Generate SHACL turtle from a LinkML model""" diff --git a/tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml b/tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml new file mode 100644 index 0000000000..f56c2eca6a --- /dev/null +++ b/tests/linkml/test_generators/input/shaclgen/boolean_guard_rules.yaml @@ -0,0 +1,70 @@ +id: https://example.org/boolean-guards +name: boolean_guard_rules +description: >- + Test schema for SHACL generation of sh:sparql constraints from LinkML rules. + Models the boolean-guard pattern where a boolean flag must be true if a + corresponding value property is present. + +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/boolean-guards/ + +imports: + - linkml:types + +default_prefix: ex +default_range: string + +slots: + WeatherWind: + description: Whether wind conditions are present. + range: boolean + slot_uri: ex:WeatherWind + weatherWindValue: + description: Wind speed value. + range: decimal + slot_uri: ex:weatherWindValue + WeatherRain: + description: Whether rain conditions are present. + range: boolean + slot_uri: ex:WeatherRain + weatherRainValue: + description: Rain intensity value. + range: decimal + slot_uri: ex:weatherRainValue + Temperature: + description: Ambient temperature. + range: decimal + slot_uri: ex:Temperature + +classes: + Environment: + description: Environmental conditions. + class_uri: ex:Environment + slots: + - WeatherWind + - weatherWindValue + - WeatherRain + - weatherRainValue + - Temperature + rules: + - description: >- + If weatherWindValue is provided, WeatherWind must be true. + preconditions: + slot_conditions: + weatherWindValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherWind: + equals_string: "true" + - description: >- + If weatherRainValue is provided, WeatherRain must be true. + preconditions: + slot_conditions: + weatherRainValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherRain: + equals_string: "true" diff --git a/tests/linkml/test_generators/test_shaclgen.py b/tests/linkml/test_generators/test_shaclgen.py index c99547df7e..3017b49251 100644 --- a/tests/linkml/test_generators/test_shaclgen.py +++ b/tests/linkml/test_generators/test_shaclgen.py @@ -1160,3 +1160,772 @@ def test_nodeidentifier_range_produces_blank_node_or_iri(): uri_ref = props["https://example.org/uriRef"] uri_kinds = list(g.objects(uri_ref, SH.nodeKind)) assert SH.IRI in uri_kinds, f"Expected sh:IRI for uri, got {uri_kinds}" + + +# --------------------------------------------------------------------------- +# Helper functions +# --------------------------------------------------------------------------- + + +def _parse_shacl(schema, **kwargs): + shacl = ShaclGenerator(schema, mergeimports=False, **kwargs).serialize() + g = rdflib.Graph() + g.parse(data=shacl) + return g + + +# --------------------------------------------------------------------------- +# --emit-rules / sh:sparql tests +# --------------------------------------------------------------------------- + +_RULES_SCHEMA_YAML = """ +id: https://example.org/boolean-guards +name: boolean_guard_rules +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/boolean-guards/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + WeatherWind: + range: boolean + slot_uri: ex:WeatherWind + weatherWindValue: + description: Wind speed value. + range: decimal + slot_uri: ex:weatherWindValue + WeatherRain: + range: boolean + slot_uri: ex:WeatherRain + weatherRainValue: + description: Rain intensity value. + range: decimal + slot_uri: ex:weatherRainValue + Temperature: + range: decimal + slot_uri: ex:Temperature +classes: + Environment: + class_uri: ex:Environment + slots: + - WeatherWind + - weatherWindValue + - WeatherRain + - weatherRainValue + - Temperature + rules: + - description: If weatherWindValue is provided, WeatherWind must be true. + preconditions: + slot_conditions: + weatherWindValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherWind: + equals_string: "true" + - description: If weatherRainValue is provided, WeatherRain must be true. + preconditions: + slot_conditions: + weatherRainValue: + value_presence: PRESENT + postconditions: + slot_conditions: + WeatherRain: + equals_string: "true" +""" + +EX_RULES = rdflib.Namespace("https://example.org/boolean-guards/") + + +def test_rule_boolean_guard_generates_sparql(): + """Boolean-guard rules produce sh:sparql constraints on the NodeShape.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 2, f"Expected 2 sh:sparql constraints, got {len(sparql_nodes)}" + + for node in sparql_nodes: + assert (node, RDF.type, SH.SPARQLConstraint) in g + selects = list(g.objects(node, SH.select)) + assert len(selects) == 1, "Each constraint must have exactly one sh:select" + query = str(selects[0]) + assert "$this" in query, "SPARQL must use $this pre-bound variable" + assert "OPTIONAL" in query, "SPARQL must use OPTIONAL for flag/value" + assert "FILTER" in query, "SPARQL must have a FILTER clause" + assert "BOUND" in query, "SPARQL must use BOUND()" + + +def test_rule_with_description_generates_message(): + """Rule description is emitted as sh:message on the SPARQLConstraint.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + + messages = set() + for node in sparql_nodes: + for msg in g.objects(node, SH.message): + messages.add(str(msg)) + + assert "If weatherWindValue is provided, WeatherWind must be true." in messages + assert "If weatherRainValue is provided, WeatherRain must be true." in messages + + +def test_rule_sparql_contains_correct_uris(): + """SPARQL queries reference the correct slot URIs.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + + queries = [str(list(g.objects(n, SH.select))[0]) for n in sparql_nodes] + all_sparql = "\n".join(queries) + + assert str(EX_RULES.WeatherWind) in all_sparql + assert str(EX_RULES.weatherWindValue) in all_sparql + assert str(EX_RULES.WeatherRain) in all_sparql + assert str(EX_RULES.weatherRainValue) in all_sparql + + +_DEACTIVATED_RULE_SCHEMA_YAML = """ +id: https://example.org/deactivated-test +name: deactivated_rule_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/deactivated-test/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + Flag: + range: boolean + slot_uri: ex:Flag + flagValue: + range: decimal + slot_uri: ex:flagValue +classes: + TestClass: + class_uri: ex:TestClass + slots: + - Flag + - flagValue + rules: + - description: This rule is deactivated. + deactivated: true + preconditions: + slot_conditions: + flagValue: + value_presence: PRESENT + postconditions: + slot_conditions: + Flag: + equals_string: "true" +""" + + +def test_rule_deactivated_skipped(): + """Deactivated rules do not produce sh:sparql constraints.""" + g = _parse_shacl(_DEACTIVATED_RULE_SCHEMA_YAML) + + shape = URIRef("https://example.org/deactivated-test/TestClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0, f"Deactivated rule should not emit sh:sparql, got {len(sparql_nodes)}" + + +_UNSUPPORTED_RULE_SCHEMA_YAML = """ +id: https://example.org/unsupported-test +name: unsupported_rule_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/unsupported-test/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + slotA: + range: string + slot_uri: ex:slotA + slotB: + range: string + slot_uri: ex:slotB +classes: + TestClass: + class_uri: ex:TestClass + slots: + - slotA + - slotB + rules: + - description: Rule with no postconditions. + preconditions: + slot_conditions: + slotA: + value_presence: PRESENT +""" + + +def test_rule_unsupported_pattern_skipped(): + """Unrecognised rule patterns are silently skipped (no sh:sparql emitted).""" + g = _parse_shacl(_UNSUPPORTED_RULE_SCHEMA_YAML) + + shape = URIRef("https://example.org/unsupported-test/TestClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0 + + +def test_rule_no_emit_rules_flag(): + """--no-emit-rules suppresses sh:sparql constraint generation.""" + g = _parse_shacl(_RULES_SCHEMA_YAML, emit_rules=False) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0, f"emit_rules=False should suppress rules, got {len(sparql_nodes)}" + + +_NO_RULES_SCHEMA_YAML = """ +id: https://example.org/no-rules +name: no_rules_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/no-rules/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + name: + range: string + slot_uri: ex:name +classes: + SimpleClass: + class_uri: ex:SimpleClass + slots: + - name +""" + + +def test_rule_no_rules_no_sparql(): + """Classes without rules: blocks produce no sh:sparql constraints.""" + g = _parse_shacl(_NO_RULES_SCHEMA_YAML) + + shape = URIRef("https://example.org/no-rules/SimpleClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0 + + +def test_rule_multiple_rules_per_class(): + """Multiple boolean-guard rules on one class produce multiple sh:sparql constraints.""" + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 2 + + # Each constraint should reference different slot pairs + queries = [str(list(g.objects(n, SH.select))[0]) for n in sparql_nodes] + wind_query = [q for q in queries if "weatherWindValue" in q] + rain_query = [q for q in queries if "weatherRainValue" in q] + assert len(wind_query) == 1, "Expected exactly one wind query" + assert len(rain_query) == 1, "Expected exactly one rain query" + + +# --------------------------------------------------------------------------- +# Tests for URI resolution without explicit slot_uri +# --------------------------------------------------------------------------- + +_NO_SLOT_URI_SCHEMA_YAML = """ +id: https://example.org/no-slot-uri +name: no_slot_uri_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/no-slot-uri/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + is_active: + range: boolean + measured_value: + range: decimal +classes: + Reading: + class_uri: ex:Reading + slots: + - is_active + - measured_value + rules: + - description: If measured_value is provided, is_active must be true. + preconditions: + slot_conditions: + measured_value: + value_presence: PRESENT + postconditions: + slot_conditions: + is_active: + equals_string: "true" +""" + + +def test_rule_no_explicit_slot_uri(): + """Slots without explicit slot_uri resolve via default_prefix + underscore(name).""" + g = _parse_shacl(_NO_SLOT_URI_SCHEMA_YAML) + + shape = URIRef("https://example.org/no-slot-uri/Reading") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 1 + + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + # URIs should be default_prefix:underscore(name) + assert "https://example.org/no-slot-uri/is_active" in query + assert "https://example.org/no-slot-uri/measured_value" in query + + +# --------------------------------------------------------------------------- +# Tests for elseconditions rejection +# --------------------------------------------------------------------------- + +_ELSE_COND_SCHEMA_YAML = """ +id: https://example.org/else-test +name: else_cond_test +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/else-test/ +imports: + - linkml:types +default_prefix: ex +default_range: string +slots: + Flag: + range: boolean + slot_uri: ex:Flag + flagValue: + range: decimal + slot_uri: ex:flagValue + fallbackValue: + range: string + slot_uri: ex:fallbackValue +classes: + TestClass: + class_uri: ex:TestClass + slots: + - Flag + - flagValue + - fallbackValue + rules: + - description: Rule with elseconditions should be skipped. + preconditions: + slot_conditions: + flagValue: + value_presence: PRESENT + postconditions: + slot_conditions: + Flag: + equals_string: "true" + elseconditions: + slot_conditions: + fallbackValue: + value_presence: PRESENT +""" + + +def test_rule_with_elseconditions_emitted(): + """Rules with elseconditions now emit the forward (if/then) branch as sh:sparql.""" + g = _parse_shacl(_ELSE_COND_SCHEMA_YAML) + + shape = URIRef("https://example.org/else-test/TestClass") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) >= 1, "Rule with elseconditions should emit sh:sparql for the forward branch" + + +# --------------------------------------------------------------------------- +# SPARQL syntax validation +# --------------------------------------------------------------------------- + + +def test_rule_sparql_syntax_valid(): + """Generated SPARQL queries must be syntactically valid.""" + from rdflib.plugins.sparql import prepareQuery + + g = _parse_shacl(_RULES_SCHEMA_YAML) + + shape = EX_RULES.Environment + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) >= 1 + + for node in sparql_nodes: + query_text = str(list(g.objects(node, SH.select))[0]) + # prepareQuery validates SPARQL syntax; $this is a valid variable name + prepareQuery(query_text) + + +# =========================================================================== +# Exclusive-value pattern tests (SHACL §5 SPARQL constraints) +# =========================================================================== +# +# The "exclusive value" pattern translates a LinkML rule where: +# - preconditions: slot X has equals_string (a specific enum value name) +# - postconditions: same slot X has maximum_cardinality N +# +# Semantics: "If value V is present in multivalued slot X, then X has at most +# N values total." For N=1 this means V must be the sole value (mutual +# exclusion with other enum members). +# +# Generated SHACL: sh:SPARQLConstraint per W3C SHACL §5.3.1, using $this +# pre-bound to each focus node. +# +# References: +# - W3C SHACL §5 +# - W3C SHACL §5.3.1 +# - ISO 34503:2023, 9.3.6 (motivating use case: EdgeNone exclusivity) +# =========================================================================== + +_EXCLUSIVE_VALUE_SCHEMA_YAML = """ +id: https://example.org/exclusive-value +name: exclusive_value_rules +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/exclusive-value/ +imports: + - linkml:types +default_prefix: ex +default_range: string + +enums: + EdgeTypeEnum: + permissible_values: + EdgeNone: + meaning: ex:EdgeNone + EdgeBarriers: + meaning: ex:EdgeBarriers + EdgeMarkers: + meaning: ex:EdgeMarkers + + PriorityEnum: + permissible_values: + High: + description: High priority (no meaning IRI). + Medium: + description: Medium priority (no meaning IRI). + Low: + description: Low priority (no meaning IRI). + +slots: + edgeType: + range: EdgeTypeEnum + multivalued: true + slot_uri: ex:edgeType + priority: + range: PriorityEnum + multivalued: true + slot_uri: ex:priority + otherSlot: + range: string + slot_uri: ex:otherSlot + +classes: + Road: + class_uri: ex:Road + slots: + - edgeType + - otherSlot + rules: + - description: >- + EdgeNone is mutually exclusive with other edge types. + preconditions: + slot_conditions: + edgeType: + equals_string: "EdgeNone" + postconditions: + slot_conditions: + edgeType: + maximum_cardinality: 1 + + Intersection: + class_uri: ex:Intersection + slots: + - edgeType + rules: + - description: >- + EdgeNone allows at most 2 total edge values. + preconditions: + slot_conditions: + edgeType: + equals_string: "EdgeNone" + postconditions: + slot_conditions: + edgeType: + maximum_cardinality: 2 + + Task: + class_uri: ex:Task + slots: + - priority + rules: + - description: >- + High priority is exclusive (literal fallback test). + preconditions: + slot_conditions: + priority: + equals_string: "High" + postconditions: + slot_conditions: + priority: + maximum_cardinality: 1 + + MismatchedSlots: + class_uri: ex:MismatchedSlots + slots: + - edgeType + - otherSlot + rules: + - description: >- + Different slots in pre/post — not an exclusive-value pattern. + preconditions: + slot_conditions: + edgeType: + equals_string: "EdgeNone" + postconditions: + slot_conditions: + otherSlot: + maximum_cardinality: 1 +""" + +EX_EXCL = rdflib.Namespace("https://example.org/exclusive-value/") + + +def test_exclusive_value_generates_sparql(): + """Exclusive-value rules produce sh:sparql constraints on the NodeShape.""" + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Road + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 1, f"Expected 1 sh:sparql constraint, got {len(sparql_nodes)}" + + node = sparql_nodes[0] + assert (node, RDF.type, SH.SPARQLConstraint) in g + selects = list(g.objects(node, SH.select)) + assert len(selects) == 1, "Constraint must have exactly one sh:select" + + +def test_exclusive_value_sparql_uses_enum_iri(): + """SPARQL references the enum value's meaning IRI, not a string literal. + + Per the enum definition, EdgeNone has meaning: ex:EdgeNone which expands + to . The generated SPARQL + must use this full IRI in angle brackets. + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Road + sparql_nodes = list(g.objects(shape, SH.sparql)) + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + + edge_none_iri = str(EX_EXCL.EdgeNone) + assert f"<{edge_none_iri}>" in query, f"SPARQL must reference EdgeNone as full IRI <{edge_none_iri}>, got:\n{query}" + + +def test_exclusive_value_max_card_1_sparql_structure(): + """For maximum_cardinality: 1, SPARQL uses FILTER(?other != ). + + The query pattern for N=1 is: + SELECT $this WHERE { + $this . + $this ?other . + FILTER (?other != ) + } + + This is more efficient than the COUNT-based approach for the common + singleton exclusion case. + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Road + sparql_nodes = list(g.objects(shape, SH.sparql)) + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + + assert "$this" in query, "SPARQL must use $this pre-bound variable (SHACL §5.3.1)" + assert "FILTER" in query, "N=1 pattern must use FILTER for exclusion check" + assert "?other" in query, "N=1 pattern must bind ?other for comparison" + # Must NOT use COUNT for the N=1 case (simpler pattern) + assert "COUNT" not in query, "N=1 pattern should use FILTER, not COUNT" + # The slot URI must appear (property path) + assert str(EX_EXCL.edgeType) in query, "SPARQL must reference the slot URI" + + +def test_exclusive_value_max_card_gt1_sparql_structure(): + """For maximum_cardinality > 1, SPARQL uses COUNT-based subquery. + + The query pattern for N>1 is: + SELECT $this WHERE { + $this . + { + SELECT $this (COUNT(?val) AS ?count) + WHERE { $this ?val . } + GROUP BY $this + HAVING (?count > N) + } + } + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Intersection + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 1, f"Expected 1 sh:sparql constraint, got {len(sparql_nodes)}" + + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + + assert "$this" in query, "SPARQL must use $this pre-bound variable" + assert "COUNT" in query, "N>1 pattern must use COUNT" + assert "GROUP BY" in query, "N>1 pattern must GROUP BY $this" + assert "HAVING" in query, "N>1 pattern must use HAVING for count check" + assert "> 2" in query, "HAVING must check count > maximum_cardinality (2)" + + +def test_exclusive_value_no_meaning_falls_back_to_literal(): + """When enum values lack a meaning IRI, the value is compared as a literal. + + PriorityEnum values have no meaning field, so 'High' is used as a + quoted string in the SPARQL rather than an IRI in angle brackets. + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Task + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 1, f"Expected 1 sh:sparql constraint, got {len(sparql_nodes)}" + + query = str(list(g.objects(sparql_nodes[0], SH.select))[0]) + + # Should use quoted literal, not angle-bracket IRI + assert '"High"' in query, f"No-meaning enum should use literal '\"High\"', got:\n{query}" + assert "" not in query, "Should not emit as IRI when meaning is absent" + + +def test_exclusive_value_different_slots_not_recognised(): + """Rules where pre/post reference different slots are NOT exclusive-value. + + The pattern requires the SAME slot in both preconditions and + postconditions. When they differ, the rule is unrecognised and + silently skipped (no sh:sparql emitted). + """ + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.MismatchedSlots + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 0, ( + f"Mismatched slots should not trigger exclusive-value pattern, got {len(sparql_nodes)}" + ) + + +def test_exclusive_value_message_from_description(): + """Rule description is emitted as sh:message on the SPARQLConstraint.""" + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + shape = EX_EXCL.Road + sparql_nodes = list(g.objects(shape, SH.sparql)) + messages = [str(m) for node in sparql_nodes for m in g.objects(node, SH.message)] + + assert any("EdgeNone is mutually exclusive" in m for m in messages), ( + f"Expected message about EdgeNone exclusivity, got: {messages}" + ) + + +def test_exclusive_value_sparql_syntax_valid(): + """Generated SPARQL for exclusive-value rules must be syntactically valid. + + Uses rdflib's prepareQuery() which validates SPARQL syntax. + $this is a valid SPARQL variable name per the grammar. + """ + from rdflib.plugins.sparql import prepareQuery + + g = _parse_shacl(_EXCLUSIVE_VALUE_SCHEMA_YAML) + + for shape in (EX_EXCL.Road, EX_EXCL.Intersection, EX_EXCL.Task): + sparql_nodes = list(g.objects(shape, SH.sparql)) + for node in sparql_nodes: + query_text = str(list(g.objects(node, SH.select))[0]) + # prepareQuery validates SPARQL syntax + prepareQuery(query_text) + + +def test_exclusive_value_coexists_with_boolean_guard(): + """Exclusive-value and boolean-guard rules can coexist on the same class. + + When a class has both pattern types, both produce sh:sparql constraints. + """ + schema = """ +id: https://example.org/mixed-rules +name: mixed_rules +prefixes: + linkml: https://w3id.org/linkml/ + ex: https://example.org/mixed-rules/ +imports: + - linkml:types +default_prefix: ex +default_range: string + +enums: + StatusEnum: + permissible_values: + None: + meaning: ex:None + Active: + meaning: ex:Active + +slots: + status: + range: StatusEnum + multivalued: true + slot_uri: ex:status + Flag: + range: boolean + slot_uri: ex:Flag + flagValue: + range: decimal + slot_uri: ex:flagValue + +classes: + Widget: + class_uri: ex:Widget + slots: + - status + - Flag + - flagValue + rules: + - description: None is exclusive. + preconditions: + slot_conditions: + status: + equals_string: "None" + postconditions: + slot_conditions: + status: + maximum_cardinality: 1 + - description: If flagValue present, Flag must be true. + preconditions: + slot_conditions: + flagValue: + value_presence: PRESENT + postconditions: + slot_conditions: + Flag: + equals_string: "true" +""" + g = _parse_shacl(schema) + + shape = URIRef("https://example.org/mixed-rules/Widget") + sparql_nodes = list(g.objects(shape, SH.sparql)) + assert len(sparql_nodes) == 2, ( + f"Expected 2 sh:sparql constraints (1 exclusive + 1 boolean guard), got {len(sparql_nodes)}" + ) + + queries = [str(list(g.objects(n, SH.select))[0]) for n in sparql_nodes] + # One should have FILTER(?other != ...) pattern, the other BOUND pattern + has_exclusive = any("?other" in q for q in queries) + has_boolean = any("BOUND" in q for q in queries) + assert has_exclusive, "Expected one exclusive-value SPARQL constraint" + assert has_boolean, "Expected one boolean-guard SPARQL constraint"