diff --git a/src/sssom/constants.py b/src/sssom/constants.py index a200096f..33fd6588 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -4,7 +4,7 @@ import uuid from enum import Enum from functools import cached_property, lru_cache -from typing import Any, Dict, List, Literal, Set, TextIO, Union +from typing import Any, Dict, List, Literal, Optional, Set, TextIO, Tuple, Union import importlib_resources import yaml @@ -283,6 +283,33 @@ def propagatable_slots(self) -> List[str]: slots.append(slot_name) return slots + def get_minimum_version( + self, slot_name: str, class_name: str = "mapping" + ) -> Optional[Tuple[int, int]]: + """Get the minimum version of SSSOM required for a given slot. + + :param slot_name: The queried slot. + :param class_name: The class the slot belongs to. This is needed + because a slot may have been added to a class + in a later version than the version in which + it was first introduced in the schema. + :return: A tuple containing the major and minor numbers of the + earliest version of SSSOM that defines the given slot + in the given class. May be None if the requested slot + name is not a valid slot name. + """ + try: + slot = self.view.induced_slot(slot_name, class_name) + version = [int(s) for s in slot.annotations.added_in.value.split(".")] + if len(version) != 2: + # Should never happen, schema is incorrect + return None + return (version[0], version[1]) + except AttributeError: # No added_in annotation, defaults to 1.0 + return (1, 0) + except ValueError: # No such slot + return None + @lru_cache(1) def _get_sssom_schema_object() -> SSSOMSchemaView: diff --git a/src/sssom/util.py b/src/sssom/util.py index 0ae599a7..27d3242a 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -36,6 +36,7 @@ OBJECT_ID, OBJECT_LABEL, OBJECT_SOURCE, + OBJECT_TYPE, OBO_HAS_DB_XREF, OWL_DIFFERENT_FROM, OWL_EQUIVALENT_CLASS, @@ -58,6 +59,7 @@ SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, + SUBJECT_TYPE, UNKNOWN_IRI, MetadataType, PathOrIO, @@ -496,6 +498,47 @@ def _to_string(row: dict[str, Any], side: str) -> str: # No scope, so remove any pre-existing "cardinality_scope" column self.df.drop(columns=CARDINALITY_SCOPE, inplace=True, errors="ignore") + def get_compatible_version(self) -> str: + """Get the minimum version of SSSOM this set is compatible with.""" + schema = SSSOMSchemaView() + versions: Set[Tuple[int, int]] = set() + + # First get the minimum versions required by the slots present + # in the set; this is entirely provided by the SSSOM model. + for slot in self.metadata.keys(): + version = schema.get_minimum_version(slot, "mapping set") + if version is not None: + versions.add(version) + for slot in self.df.columns: + version = schema.get_minimum_version(slot, "mapping") + if version is not None: + versions.add(version) + + # Then take care of enum values; we cannot use the SSSOM model + # for that (enum values are not tagged with an "added_in" + # annotation the way slots are), so this has to be handled + # "manually" based on the informations provided in + # . + if ( + self.metadata.get(SUBJECT_TYPE) == "composed entity expression" + or self.metadata.get(OBJECT_TYPE) == "composed entity expression" + or ( + SUBJECT_TYPE in self.df.columns + and "composed entity expression" in self.df[SUBJECT_TYPE].values + ) + or ( + OBJECT_TYPE in self.df.columns + and "composed entity expression" in self.df[OBJECT_TYPE].values + ) + ): + versions.add((1, 1)) + + if MAPPING_CARDINALITY in self.df.columns and "0:0" in self.df[MAPPING_CARDINALITY].values: + versions.add((1, 1)) + + # Get the highest of the accumulated versions. + return ".".join([str(i) for i in max(versions)]) + def _standardize_curie_or_iri(curie_or_iri: str, *, converter: Converter) -> str: """Standardize a CURIE or IRI, returning the original if not possible. diff --git a/tests/test_utils.py b/tests/test_utils.py index 4fdb0369..3ff69e81 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -17,10 +17,13 @@ MAPPING_CARDINALITY, OBJECT_ID, OBJECT_LABEL, + OBJECT_TYPE, PREDICATE_ID, + PREDICATE_TYPE, SEMAPV, SUBJECT_ID, SUBJECT_LABEL, + SUBJECT_TYPE, ) from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter from sssom.io import extract_iris @@ -634,3 +637,40 @@ def test_infer_scoped_cardinality(self) -> None: expected = ["1:n", "1:n", "1:n", "1:n", "1:n", "1:n"] self.assertEqual(expected, list(msdf.df[MAPPING_CARDINALITY].values)) self.assertNotIn(CARDINALITY_SCOPE, msdf.df.columns) + + def test_inferring_compatible_version(self) -> None: + """Test that we can correctly infer the version a set is compatible with.""" + msdf10 = parse_sssom_table(f"{data_dir}/basic.tsv") + + # Nothing in that set requires 1.1 + self.assertEqual("1.0", msdf10.get_compatible_version()) + + def _clone(msdf): + return MappingSetDataFrame(df=msdf.df.copy(), metadata=msdf.metadata.copy()) + + # Inject a 1.1-specific mapping set slot + msdf11 = _clone(msdf10) + msdf11.metadata[CARDINALITY_SCOPE] = "predicate_id" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject a 1.1-specific mapping slot + msdf11 = _clone(msdf10) + msdf11.df[PREDICATE_TYPE] = "owl object property" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject a 1.1-specific entity_type_enum value + msdf11 = _clone(msdf10) + msdf11.metadata[SUBJECT_TYPE] = "composed entity expression" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Same, but on a single mapping record + msdf11 = _clone(msdf10) + msdf11.df[OBJECT_TYPE] = "owl class" + msdf11.df.loc[2, OBJECT_TYPE] = "composed entity expression" + self.assertEqual("1.1", msdf11.get_compatible_version()) + + # Inject the 1.1-specific "0:0" cardinality value + msdf11 = _clone(msdf10) + msdf11.df[MAPPING_CARDINALITY] = "1:1" + msdf11.df.loc[9, MAPPING_CARDINALITY] = "0:0" + self.assertEqual("1.1", msdf11.get_compatible_version())