From b139c26b915cd991013c6267906191a330b10543 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Mon, 1 Sep 2025 18:34:52 +0100 Subject: [PATCH 1/7] Add method to infer minimum compatible SSSOM version. Add a new method to the MappingSetDataFrame class to automatically determine the minimum version of the SSSOM specification the set is compatible with -- that is, the earliest version that defines all the slots and all the enum values present in the set. --- src/sssom/constants.py | 22 +++++++++++++++++++ src/sssom/util.py | 50 ++++++++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 37 +++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/src/sssom/constants.py b/src/sssom/constants.py index b858405c..d7cfa808 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -10,6 +10,7 @@ import yaml from linkml_runtime.utils.schema_as_dict import schema_as_dict from linkml_runtime.utils.schemaview import SchemaView +from sssom_schema.datamodel.sssom_schema import SssomVersionEnum HERE = pathlib.Path(__file__).parent.resolve() @@ -278,6 +279,27 @@ def propagatable_slots(self) -> List[str]: slots.append(slot_name) return slots + def get_minimum_version(self, slot_name: str, class_name: str = "mapping"): + """Get the minimum version of SSSOM required for a given slot. + + :param slot_name: The queried slot. + :param class_name: The class the slot belongs to. This is needed + because a slot may have been added to a class + in a later version than the version in which + it was first introduced in the schema. + :return: A SssomVersionEnum value representing the earliest + version of SSSOM that defines the given slot in the + given class. May be None if the requested slot name + is not a valid slot name. + """ + try: + slot = self.view.induced_slot(slot_name, class_name) + return SssomVersionEnum(slot.annotations.added_in.value) + except AttributeError: # No added_in annotation, defaults to 1.0 + return SssomVersionEnum("1.0") + except ValueError: # No such slot + return None + @lru_cache(1) def _get_sssom_schema_object() -> SSSOMSchemaView: diff --git a/src/sssom/util.py b/src/sssom/util.py index b49d08fd..8425f9e7 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -393,6 +393,56 @@ def condense(self) -> List[str]: self.df.drop(columns=condensed, inplace=True) return condensed + def get_compatible_version(self): + """Get the minimum version of SSSOM this set is compatible with.""" + schema = SSSOMSchemaView() + versions = set() + + # First get the minimum versions required by the slots present + # in the set; this is entirely provided by the SSSOM model. + for slot in self.metadata.keys(): + version = schema.get_minimum_version(slot, "mapping set") + if version is not None: + versions.add(str(version)) + for slot in self.df.columns: + version = schema.get_minimum_version(slot, "mapping") + if version is not None: + versions.add(str(version)) + + # Then take care of enum values; we cannot use the SSSOM model + # for that (enum values are not tagged with an "added_in" + # annotation the way slots are), so this has to be handled + # "manually" based on the informations provided in + # . + if ( + self.metadata.get("subject_type") == "composed entity expression" + or self.metadata.get("subject_type") == "composed entity expression" + or ( + "subject_type" in self.df.columns + and "composed entity expression" in self.df["subject_type"].values + ) + or ( + "object_type" in self.df.columns + and "composed entity expression" in self.df["object_type"].values + ) + ): + versions.add("1.1") + + if ( + "mapping_cardinality" in self.df.columns + and "0:0" in self.df["mapping_cardinality"].values + ): + versions.add("1.1") + + # Get the highest of the accumulated versions. We do a numerical + # sort, so that version 1.10 (if we ever get that far in the 1.x + # branch) does not get sorted before version 1.9. + def _version_to_compare_key(version): + major, minor = [int(s) for s in version.split(".")] + return (major * 100) + minor + + return sorted(versions, key=_version_to_compare_key)[-1] + def _standardize_curie_or_iri(curie_or_iri: str, *, converter: Converter) -> str: """Standardize a CURIE or IRI, returning the original if not possible. diff --git a/tests/test_utils.py b/tests/test_utils.py index 91e187d0..206deffc 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -595,3 +595,40 @@ def test_propagation_fill_empty_mode(self) -> None: self.assertIn("mapping_tool", propagated_slots) self.assertNotIn("mapping_tool", msdf.metadata) self.assertEqual(2, len(msdf.df["mapping_tool"].unique())) + + def test_inferring_compatible_version(self) -> None: + """Test that we can correctly infer the version a set is compatible with.""" + msdf10 = parse_sssom_table(f"{data_dir}/basic.tsv") + + # Nothing in that set requires 1.1 + self.assertEqual("1.0", msdf10.get_compatible_version()) + + def _clone(msdf): + return MappingSetDataFrame(df=msdf.df.copy(), metadata=msdf.metadata.copy()) + + # Inject a 1.1-specific mapping set slot + msdf11 = _clone(msdf10) + msdf11.metadata["cardinality_scope"] = "predicate_id" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject a 1.1-specific mapping slot + msdf11 = _clone(msdf10) + msdf11.df["predicate_type"] = "owl object property" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject a 1.1-specific entity_type_enum value + msdf11 = _clone(msdf10) + msdf11.metadata["subject_type"] = "composed entity expression" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Same, but on a single mapping record + msdf11 = _clone(msdf10) + msdf11.df["object_type"] = "owl class" + msdf11.df.loc[2, "object_type"] = "composed entity expression" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject the 1.1-specific "0:0" cardinality value + msdf11 = _clone(msdf10) + msdf11.df["mapping_cardinality"] = "1:1" + msdf11.df.loc[9, "mapping_cardinality"] = "0:0" + self.assertEqual("1.1", msdf11.get_compatible_version()) From 90066071a432c7470ec758f9d36a42b7fdeef5d2 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Tue, 2 Sep 2025 20:53:22 +0100 Subject: [PATCH 2/7] Add missing return type hints. --- src/sssom/constants.py | 2 +- src/sssom/util.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sssom/constants.py b/src/sssom/constants.py index d7cfa808..701200e8 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -279,7 +279,7 @@ def propagatable_slots(self) -> List[str]: slots.append(slot_name) return slots - def get_minimum_version(self, slot_name: str, class_name: str = "mapping"): + def get_minimum_version(self, slot_name: str, class_name: str = "mapping") -> SssomVersionEnum: """Get the minimum version of SSSOM required for a given slot. :param slot_name: The queried slot. diff --git a/src/sssom/util.py b/src/sssom/util.py index 8425f9e7..4d6819c3 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -393,7 +393,7 @@ def condense(self) -> List[str]: self.df.drop(columns=condensed, inplace=True) return condensed - def get_compatible_version(self): + def get_compatible_version(self) -> str: """Get the minimum version of SSSOM this set is compatible with.""" schema = SSSOMSchemaView() versions = set() From 20ea4e8311a7797100290e75c9ac793a5acb11f4 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Tue, 2 Sep 2025 21:37:02 +0100 Subject: [PATCH 3/7] Misc fixes. Fix wrong slot name when looking for "composed entity expression". Let Python compare version numbers as tuples of integers. Use `max(list)` instead of `sorted(list)[-1]`. --- src/sssom/util.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sssom/util.py b/src/sssom/util.py index 4d6819c3..8d02d3ab 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -416,7 +416,7 @@ def get_compatible_version(self) -> str: # . if ( self.metadata.get("subject_type") == "composed entity expression" - or self.metadata.get("subject_type") == "composed entity expression" + or self.metadata.get("object_type") == "composed entity expression" or ( "subject_type" in self.df.columns and "composed entity expression" in self.df["subject_type"].values @@ -438,10 +438,9 @@ def get_compatible_version(self) -> str: # sort, so that version 1.10 (if we ever get that far in the 1.x # branch) does not get sorted before version 1.9. def _version_to_compare_key(version): - major, minor = [int(s) for s in version.split(".")] - return (major * 100) + minor + return tuple(int(s) for s in version.split(".")) - return sorted(versions, key=_version_to_compare_key)[-1] + return max(versions, key=_version_to_compare_key) def _standardize_curie_or_iri(curie_or_iri: str, *, converter: Converter) -> str: From 588bf4ff0d568572b6f84cf26e75df6d08ec1f7a Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Tue, 2 Sep 2025 22:46:09 +0100 Subject: [PATCH 4/7] Use constants to refer to SSSOM slot names. --- src/sssom/constants.py | 1 + src/sssom/util.py | 20 ++++++++++---------- tests/test_utils.py | 19 ++++++++++++------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/sssom/constants.py b/src/sssom/constants.py index 701200e8..65cf4afb 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -90,6 +90,7 @@ MAPPING_SET_SOURCE = "mapping_set_source" MAPPING_SOURCE = "mapping_source" MAPPING_CARDINALITY = "mapping_cardinality" +CARDINALITY_SCOPE = "cardinality_scope" MAPPING_TOOL = "mapping_tool" MAPPING_TOOL_VERSION = "mapping_tool_version" MAPPING_DATE = "mapping_date" diff --git a/src/sssom/util.py b/src/sssom/util.py index 8d02d3ab..a60aa1dd 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -26,6 +26,7 @@ COLUMN_INVERT_DICTIONARY, COMMENT, CONFIDENCE, + MAPPING_CARDINALITY, MAPPING_JUSTIFICATION, MAPPING_SET_ID, MAPPING_SET_SOURCE, @@ -33,6 +34,7 @@ OBJECT_ID, OBJECT_LABEL, OBJECT_SOURCE, + OBJECT_TYPE, OBO_HAS_DB_XREF, OWL_DIFFERENT_FROM, OWL_EQUIVALENT_CLASS, @@ -55,6 +57,7 @@ SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, + SUBJECT_TYPE, UNKNOWN_IRI, MetadataType, PathOrIO, @@ -415,23 +418,20 @@ def get_compatible_version(self) -> str: # "manually" based on the informations provided in # . if ( - self.metadata.get("subject_type") == "composed entity expression" - or self.metadata.get("object_type") == "composed entity expression" + self.metadata.get(SUBJECT_TYPE) == "composed entity expression" + or self.metadata.get(OBJECT_TYPE) == "composed entity expression" or ( - "subject_type" in self.df.columns - and "composed entity expression" in self.df["subject_type"].values + SUBJECT_TYPE in self.df.columns + and "composed entity expression" in self.df[SUBJECT_TYPE].values ) or ( - "object_type" in self.df.columns - and "composed entity expression" in self.df["object_type"].values + OBJECT_TYPE in self.df.columns + and "composed entity expression" in self.df[OBJECT_TYPE].values ) ): versions.add("1.1") - if ( - "mapping_cardinality" in self.df.columns - and "0:0" in self.df["mapping_cardinality"].values - ): + if MAPPING_CARDINALITY in self.df.columns and "0:0" in self.df[MAPPING_CARDINALITY].values: versions.add("1.1") # Get the highest of the accumulated versions. We do a numerical diff --git a/tests/test_utils.py b/tests/test_utils.py index 206deffc..ce8f7473 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,13 +12,18 @@ from sssom_schema import slots as SSSOM_Slots from sssom.constants import ( + CARDINALITY_SCOPE, CREATOR_ID, + MAPPING_CARDINALITY, OBJECT_ID, OBJECT_LABEL, + OBJECT_TYPE, PREDICATE_ID, + PREDICATE_TYPE, SEMAPV, SUBJECT_ID, SUBJECT_LABEL, + SUBJECT_TYPE, ) from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter from sssom.io import extract_iris @@ -608,27 +613,27 @@ def _clone(msdf): # Inject a 1.1-specific mapping set slot msdf11 = _clone(msdf10) - msdf11.metadata["cardinality_scope"] = "predicate_id" + msdf11.metadata[CARDINALITY_SCOPE] = "predicate_id" self.assertEqual("1.1", msdf11.get_compatible_version()) # Inject a 1.1-specific mapping slot msdf11 = _clone(msdf10) - msdf11.df["predicate_type"] = "owl object property" + msdf11.df[PREDICATE_TYPE] = "owl object property" self.assertEqual("1.1", msdf11.get_compatible_version()) # Inject a 1.1-specific entity_type_enum value msdf11 = _clone(msdf10) - msdf11.metadata["subject_type"] = "composed entity expression" + msdf11.metadata[SUBJECT_TYPE] = "composed entity expression" self.assertEqual("1.1", msdf11.get_compatible_version()) # Same, but on a single mapping record msdf11 = _clone(msdf10) - msdf11.df["object_type"] = "owl class" - msdf11.df.loc[2, "object_type"] = "composed entity expression" + msdf11.df[OBJECT_TYPE] = "owl class" + msdf11.df.loc[2, OBJECT_TYPE] = "composed entity expression" self.assertEqual("1.1", msdf11.get_compatible_version()) # Inject the 1.1-specific "0:0" cardinality value msdf11 = _clone(msdf10) - msdf11.df["mapping_cardinality"] = "1:1" - msdf11.df.loc[9, "mapping_cardinality"] = "0:0" + msdf11.df[MAPPING_CARDINALITY] = "1:1" + msdf11.df.loc[9, MAPPING_CARDINALITY] = "0:0" self.assertEqual("1.1", msdf11.get_compatible_version()) From 28a23545a4cf55acb8041803b1e243a15b5ccc32 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Tue, 9 Sep 2025 14:11:40 +0100 Subject: [PATCH 5/7] Add missing type annotation. --- src/sssom/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sssom/util.py b/src/sssom/util.py index 8ed05a58..5f4cdfb3 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -501,7 +501,7 @@ def _to_string(row: dict[str, Any], side: str) -> str: def get_compatible_version(self) -> str: """Get the minimum version of SSSOM this set is compatible with.""" schema = SSSOMSchemaView() - versions = set() + versions: Set[str] = set() # First get the minimum versions required by the slots present # in the set; this is entirely provided by the SSSOM model. From f419fba8530a8c15d20fa5cfdac807f35d90ce27 Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Tue, 9 Sep 2025 19:46:28 +0100 Subject: [PATCH 6/7] Manipulate version numbers as tuples. Amend the SSSOMSchemaView#get_minimum_version() method to return a (major, minor) tuple, rather than a SssomVersionEnum object. The SssomVersionObject (which is automatically generated from the LinkML schema) is cumbersome to use, for at least two reasons: 1) obtaining the actual value of the enum requires accessing two levels of attributes (SssomVersionObject.code.text); 2) SssomVersionEnum values cannot be meaningfully compared (e.g. to check that a given version number is higher than another given version), we must (a) obtain the text value, (b) split that value over the middle dot, (c) convert the strings to integers, (d) put the integers into a tuple. OK, this can be done in one line of code, but this is cumbersome all the same, and it's best if that kind of things is not left to client code. --- src/sssom/constants.py | 23 ++++++++++++++--------- src/sssom/util.py | 17 +++++++---------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/sssom/constants.py b/src/sssom/constants.py index 1ac162ef..33fd6588 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -4,13 +4,12 @@ import uuid from enum import Enum from functools import cached_property, lru_cache -from typing import Any, Dict, List, Literal, Set, TextIO, Union +from typing import Any, Dict, List, Literal, Optional, Set, TextIO, Tuple, Union import importlib_resources import yaml from linkml_runtime.utils.schema_as_dict import schema_as_dict from linkml_runtime.utils.schemaview import SchemaView -from sssom_schema.datamodel.sssom_schema import SssomVersionEnum HERE = pathlib.Path(__file__).parent.resolve() @@ -284,7 +283,9 @@ def propagatable_slots(self) -> List[str]: slots.append(slot_name) return slots - def get_minimum_version(self, slot_name: str, class_name: str = "mapping") -> SssomVersionEnum: + def get_minimum_version( + self, slot_name: str, class_name: str = "mapping" + ) -> Optional[Tuple[int, int]]: """Get the minimum version of SSSOM required for a given slot. :param slot_name: The queried slot. @@ -292,16 +293,20 @@ def get_minimum_version(self, slot_name: str, class_name: str = "mapping") -> Ss because a slot may have been added to a class in a later version than the version in which it was first introduced in the schema. - :return: A SssomVersionEnum value representing the earliest - version of SSSOM that defines the given slot in the - given class. May be None if the requested slot name - is not a valid slot name. + :return: A tuple containing the major and minor numbers of the + earliest version of SSSOM that defines the given slot + in the given class. May be None if the requested slot + name is not a valid slot name. """ try: slot = self.view.induced_slot(slot_name, class_name) - return SssomVersionEnum(slot.annotations.added_in.value) + version = [int(s) for s in slot.annotations.added_in.value.split(".")] + if len(version) != 2: + # Should never happen, schema is incorrect + return None + return (version[0], version[1]) except AttributeError: # No added_in annotation, defaults to 1.0 - return SssomVersionEnum("1.0") + return (1, 0) except ValueError: # No such slot return None diff --git a/src/sssom/util.py b/src/sssom/util.py index 5f4cdfb3..20ce423c 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -501,18 +501,18 @@ def _to_string(row: dict[str, Any], side: str) -> str: def get_compatible_version(self) -> str: """Get the minimum version of SSSOM this set is compatible with.""" schema = SSSOMSchemaView() - versions: Set[str] = set() + versions: Set[Tuple[int, int]] = set() # First get the minimum versions required by the slots present # in the set; this is entirely provided by the SSSOM model. for slot in self.metadata.keys(): version = schema.get_minimum_version(slot, "mapping set") if version is not None: - versions.add(str(version)) + versions.add(version) for slot in self.df.columns: version = schema.get_minimum_version(slot, "mapping") if version is not None: - versions.add(str(version)) + versions.add(version) # Then take care of enum values; we cannot use the SSSOM model # for that (enum values are not tagged with an "added_in" @@ -531,16 +531,13 @@ def get_compatible_version(self) -> str: and "composed entity expression" in self.df[OBJECT_TYPE].values ) ): - versions.add("1.1") + versions.add((1, 1)) if MAPPING_CARDINALITY in self.df.columns and "0:0" in self.df[MAPPING_CARDINALITY].values: - versions.add("1.1") + versions.add((1, 1)) - # Get the highest of the accumulated versions. We do a numerical - # sort, so that version 1.10 (if we ever get that far in the 1.x - # branch) does not get sorted before version 1.9. - def _version_to_compare_key(version): - return tuple(int(s) for s in version.split(".")) + # Get the highest of the accumulated versions. + return ".".join([str(i) for i in max(versions)]) return max(versions, key=_version_to_compare_key) From b690800653783138904563c6afddbf2aa6e984bf Mon Sep 17 00:00:00 2001 From: Damien Goutte-Gattat Date: Wed, 10 Sep 2025 00:38:04 +0100 Subject: [PATCH 7/7] Remove dead code. --- src/sssom/util.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/sssom/util.py b/src/sssom/util.py index 20ce423c..27d3242a 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -539,8 +539,6 @@ def get_compatible_version(self) -> str: # Get the highest of the accumulated versions. return ".".join([str(i) for i in max(versions)]) - return max(versions, key=_version_to_compare_key) - def _standardize_curie_or_iri(curie_or_iri: str, *, converter: Converter) -> str: """Standardize a CURIE or IRI, returning the original if not possible.