xopr.util

  1import itertools
  2import json
  3from typing import Any, Dict, List, Optional, Sequence, TypeVar
  4
  5import numpy as np
  6import pandas as pd
  7import requests
  8
  9T = TypeVar("T")
 10
 11def dict_equiv(first: dict, second: dict) -> bool:
 12    """Compare two dictionaries for equivalence (identity or equality).
 13
 14    Parameters
 15    ----------
 16    first : dict
 17        First dictionary to compare.
 18    second : dict
 19        Second dictionary to compare.
 20
 21    Returns
 22    -------
 23    bool
 24        True if dictionaries are identical or have the same keys with
 25        equivalent values (as determined by the equivalent function), False otherwise.
 26    """
 27    if first is second:
 28        return True
 29    if len(first) != len(second):
 30        return False
 31    for key in first:
 32        if key not in second:
 33            return False
 34        if not equivalent(first[key], second[key]):
 35            return False
 36    return True
 37
 38def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
 39    """Compare two sequences for element-wise equivalence.
 40
 41    Parameters
 42    ----------
 43    first : Sequence[T]
 44        First sequence to compare.
 45    second : Sequence[T]
 46        Second sequence to compare.
 47
 48    Returns
 49    -------
 50    bool
 51        True if sequences have equal length and all corresponding elements
 52        are equivalent (as determined by the equivalent function), False otherwise.
 53    """
 54    if len(first) != len(second):
 55        return False
 56    return all(itertools.starmap(equivalent, zip(first, second, strict=True)))
 57
 58def equivalent(first, second) -> bool:
 59    """Compare two objects for equivalence (identity or equality).
 60
 61    Handles different data types:
 62
 63    * Arrays: Uses numpy.array_equal for comparison
 64    * Lists/sequences: Recursively compares all elements
 65    * Dictionaries: Compares keys and values recursively
 66    * Other types: Uses equality operator or pandas null checking
 67
 68    Parameters
 69    ----------
 70    first : Any
 71        First object to compare.
 72    second : Any
 73        Second object to compare.
 74
 75    Returns
 76    -------
 77    bool
 78        True if objects are identical, equal, or both are null/NaN, False otherwise.
 79    """
 80
 81    if first is second:
 82        return True
 83    if isinstance(first, np.ndarray) or isinstance(second, np.ndarray):
 84        try:
 85            return np.array_equal(first, second)
 86        except Exception:
 87            return False
 88    if isinstance(first, list) or isinstance(second, list):
 89        return list_equiv(first, second)  # type: ignore[arg-type]
 90    if isinstance(first, dict) or isinstance(second, dict): # Added: Also supports dictionaries
 91        return dict_equiv(first, second)
 92    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
 93
 94def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]:
 95    """Merge a list of dictionaries, dropping conflicting keys.
 96
 97    This function is designed to be passed to xarray's combine_attrs parameter.
 98    It merges dictionaries by keeping only keys where all values are equivalent.
 99    For nested dictionaries, merging is applied recursively.
100
101    Parameters
102    ----------
103    dicts : List[Dict[str, Any]]
104        List of dictionaries to merge.
105    context : Any, optional
106        Optional context parameter (unused but included for xarray compatibility).
107
108    Returns
109    -------
110    Dict[str, Any]
111        Dictionary containing only non-conflicting key-value pairs from input dictionaries.
112        Keys with conflicting values across dictionaries are dropped.
113
114    Examples
115    --------
116    >>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}]
117    >>> merge_dicts_no_conflicts(dicts)
118    {'a': 1}  # 'b' dropped due to conflict
119    """
120    merged = {}
121    # Create set of all keys across dictionaries
122    all_keys = set().union(*(d.keys() for d in dicts))
123    for key in all_keys:
124        # Collect values for the current key from all dictionaries
125        values = [d.get(key) for d in dicts if key in d]
126        if len(values) == 1:
127            merged[key] = values[0]  # Only one value, no conflict
128        else:
129            # Check if all values have the same type
130            types = set(type(v) for v in values if v is not None)
131            if len(types) > 1:
132                continue  # Skip conflicting keys
133            if isinstance(values[0], dict):
134                # If values are dictionaries, merge them recursively
135                merged_dict = merge_dicts_no_conflicts(values)
136                if len(merged_dict) > 0:
137                    merged[key] = merged_dict
138            else:
139                all_equiv = True
140                for idx in range(1, len(values)):
141                    if not equivalent(values[0], values[idx]):
142                        all_equiv = False
143                        break
144                if all_equiv:
145                    merged[key] = values[0]
146    return merged
147
148def get_ror_display_name(ror_id: str) -> Optional[str]:
149    """
150    Parse ROR API response to find the for_display name of a given ROR ID.
151
152    Args:
153        ror_id (str): The ROR identifier (e.g., "https://ror.org/02jx3x895" or just "02jx3x895")
154
155    Returns:
156        Optional[str]: The for_display name if found, None otherwise
157    """
158    # Clean the ROR ID - extract just the identifier part if full URL is provided
159    if ror_id.startswith('https://ror.org/'):
160        ror_id = ror_id.replace('https://ror.org/', '')
161
162    try:
163        # Make request to ROR API
164        url = f"https://api.ror.org/organizations/{ror_id}"
165        response = requests.get(url)
166        response.raise_for_status()
167
168        # Parse JSON response
169        data = response.json()
170
171        # Extract for_display name
172        names = data.get('names', [])
173        for name_entry in names:
174            if name_entry.get('types') and 'ror_display' in name_entry['types']:
175                return name_entry.get('value')
176
177        # Fallback to primary name if no for_display found
178        return data.get('name')
179
180    except requests.exceptions.RequestException as e:
181        print(f"Error fetching data from ROR API: {e}")
182        return None
183    except (json.JSONDecodeError, KeyError) as e:
184        print(f"Error parsing ROR API response: {e}")
185        return None
def dict_equiv(first: dict, second: dict) -> bool:
12def dict_equiv(first: dict, second: dict) -> bool:
13    """Compare two dictionaries for equivalence (identity or equality).
14
15    Parameters
16    ----------
17    first : dict
18        First dictionary to compare.
19    second : dict
20        Second dictionary to compare.
21
22    Returns
23    -------
24    bool
25        True if dictionaries are identical or have the same keys with
26        equivalent values (as determined by the equivalent function), False otherwise.
27    """
28    if first is second:
29        return True
30    if len(first) != len(second):
31        return False
32    for key in first:
33        if key not in second:
34            return False
35        if not equivalent(first[key], second[key]):
36            return False
37    return True

Compare two dictionaries for equivalence (identity or equality).

Parameters
  • first (dict): First dictionary to compare.
  • second (dict): Second dictionary to compare.
Returns
  • bool: True if dictionaries are identical or have the same keys with equivalent values (as determined by the equivalent function), False otherwise.
def list_equiv(first: Sequence[~T], second: Sequence[~T]) -> bool:
39def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
40    """Compare two sequences for element-wise equivalence.
41
42    Parameters
43    ----------
44    first : Sequence[T]
45        First sequence to compare.
46    second : Sequence[T]
47        Second sequence to compare.
48
49    Returns
50    -------
51    bool
52        True if sequences have equal length and all corresponding elements
53        are equivalent (as determined by the equivalent function), False otherwise.
54    """
55    if len(first) != len(second):
56        return False
57    return all(itertools.starmap(equivalent, zip(first, second, strict=True)))

Compare two sequences for element-wise equivalence.

Parameters
  • first (Sequence[T]): First sequence to compare.
  • second (Sequence[T]): Second sequence to compare.
Returns
  • bool: True if sequences have equal length and all corresponding elements are equivalent (as determined by the equivalent function), False otherwise.
def equivalent(first, second) -> bool:
59def equivalent(first, second) -> bool:
60    """Compare two objects for equivalence (identity or equality).
61
62    Handles different data types:
63
64    * Arrays: Uses numpy.array_equal for comparison
65    * Lists/sequences: Recursively compares all elements
66    * Dictionaries: Compares keys and values recursively
67    * Other types: Uses equality operator or pandas null checking
68
69    Parameters
70    ----------
71    first : Any
72        First object to compare.
73    second : Any
74        Second object to compare.
75
76    Returns
77    -------
78    bool
79        True if objects are identical, equal, or both are null/NaN, False otherwise.
80    """
81
82    if first is second:
83        return True
84    if isinstance(first, np.ndarray) or isinstance(second, np.ndarray):
85        try:
86            return np.array_equal(first, second)
87        except Exception:
88            return False
89    if isinstance(first, list) or isinstance(second, list):
90        return list_equiv(first, second)  # type: ignore[arg-type]
91    if isinstance(first, dict) or isinstance(second, dict): # Added: Also supports dictionaries
92        return dict_equiv(first, second)
93    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]

Compare two objects for equivalence (identity or equality).

Handles different data types:

  • Arrays: Uses numpy.array_equal for comparison
  • Lists/sequences: Recursively compares all elements
  • Dictionaries: Compares keys and values recursively
  • Other types: Uses equality operator or pandas null checking
Parameters
  • first (Any): First object to compare.
  • second (Any): Second object to compare.
Returns
  • bool: True if objects are identical, equal, or both are null/NaN, False otherwise.
def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]:
 95def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]:
 96    """Merge a list of dictionaries, dropping conflicting keys.
 97
 98    This function is designed to be passed to xarray's combine_attrs parameter.
 99    It merges dictionaries by keeping only keys where all values are equivalent.
100    For nested dictionaries, merging is applied recursively.
101
102    Parameters
103    ----------
104    dicts : List[Dict[str, Any]]
105        List of dictionaries to merge.
106    context : Any, optional
107        Optional context parameter (unused but included for xarray compatibility).
108
109    Returns
110    -------
111    Dict[str, Any]
112        Dictionary containing only non-conflicting key-value pairs from input dictionaries.
113        Keys with conflicting values across dictionaries are dropped.
114
115    Examples
116    --------
117    >>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}]
118    >>> merge_dicts_no_conflicts(dicts)
119    {'a': 1}  # 'b' dropped due to conflict
120    """
121    merged = {}
122    # Create set of all keys across dictionaries
123    all_keys = set().union(*(d.keys() for d in dicts))
124    for key in all_keys:
125        # Collect values for the current key from all dictionaries
126        values = [d.get(key) for d in dicts if key in d]
127        if len(values) == 1:
128            merged[key] = values[0]  # Only one value, no conflict
129        else:
130            # Check if all values have the same type
131            types = set(type(v) for v in values if v is not None)
132            if len(types) > 1:
133                continue  # Skip conflicting keys
134            if isinstance(values[0], dict):
135                # If values are dictionaries, merge them recursively
136                merged_dict = merge_dicts_no_conflicts(values)
137                if len(merged_dict) > 0:
138                    merged[key] = merged_dict
139            else:
140                all_equiv = True
141                for idx in range(1, len(values)):
142                    if not equivalent(values[0], values[idx]):
143                        all_equiv = False
144                        break
145                if all_equiv:
146                    merged[key] = values[0]
147    return merged

Merge a list of dictionaries, dropping conflicting keys.

This function is designed to be passed to xarray's combine_attrs parameter. It merges dictionaries by keeping only keys where all values are equivalent. For nested dictionaries, merging is applied recursively.

Parameters
  • dicts (List[Dict[str, Any]]): List of dictionaries to merge.
  • context (Any, optional): Optional context parameter (unused but included for xarray compatibility).
Returns
  • Dict[str, Any]: Dictionary containing only non-conflicting key-value pairs from input dictionaries. Keys with conflicting values across dictionaries are dropped.
Examples
>>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}]
>>> merge_dicts_no_conflicts(dicts)
{'a': 1}  # 'b' dropped due to conflict
def get_ror_display_name(ror_id: str) -> Optional[str]:
149def get_ror_display_name(ror_id: str) -> Optional[str]:
150    """
151    Parse ROR API response to find the for_display name of a given ROR ID.
152
153    Args:
154        ror_id (str): The ROR identifier (e.g., "https://ror.org/02jx3x895" or just "02jx3x895")
155
156    Returns:
157        Optional[str]: The for_display name if found, None otherwise
158    """
159    # Clean the ROR ID - extract just the identifier part if full URL is provided
160    if ror_id.startswith('https://ror.org/'):
161        ror_id = ror_id.replace('https://ror.org/', '')
162
163    try:
164        # Make request to ROR API
165        url = f"https://api.ror.org/organizations/{ror_id}"
166        response = requests.get(url)
167        response.raise_for_status()
168
169        # Parse JSON response
170        data = response.json()
171
172        # Extract for_display name
173        names = data.get('names', [])
174        for name_entry in names:
175            if name_entry.get('types') and 'ror_display' in name_entry['types']:
176                return name_entry.get('value')
177
178        # Fallback to primary name if no for_display found
179        return data.get('name')
180
181    except requests.exceptions.RequestException as e:
182        print(f"Error fetching data from ROR API: {e}")
183        return None
184    except (json.JSONDecodeError, KeyError) as e:
185        print(f"Error parsing ROR API response: {e}")
186        return None

Parse ROR API response to find the for_display name of a given ROR ID.

Args: ror_id (str): The ROR identifier (e.g., "https://ror.org/02jx3x895" or just "02jx3x895")

Returns: Optional[str]: The for_display name if found, None otherwise