xopr.util

General utility functions for xOPR

This module provides helper functions for data comparison, dictionary merging, and metadata retrieval that are used throughout xOPR.

  1"""
  2General utility functions for xOPR
  3
  4This module provides helper functions for data comparison, dictionary merging,
  5and metadata retrieval that are used throughout xOPR.
  6"""
  7
  8import itertools
  9import json
 10from typing import Any, Dict, List, Optional, Sequence, TypeVar
 11
 12import numpy as np
 13import pandas as pd
 14import requests
 15
 16T = TypeVar("T")
 17
 18def dict_equiv(first: dict, second: dict) -> bool:
 19    """Compare two dictionaries for equivalence (identity or equality).
 20
 21    Parameters
 22    ----------
 23    first : dict
 24        First dictionary to compare.
 25    second : dict
 26        Second dictionary to compare.
 27
 28    Returns
 29    -------
 30    bool
 31        True if dictionaries are identical or have the same keys with
 32        equivalent values (as determined by the equivalent function), False otherwise.
 33    """
 34    if first is second:
 35        return True
 36    if len(first) != len(second):
 37        return False
 38    for key in first:
 39        if key not in second:
 40            return False
 41        if not equivalent(first[key], second[key]):
 42            return False
 43    return True
 44
 45def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
 46    """Compare two sequences for element-wise equivalence.
 47
 48    Parameters
 49    ----------
 50    first : Sequence[T]
 51        First sequence to compare.
 52    second : Sequence[T]
 53        Second sequence to compare.
 54
 55    Returns
 56    -------
 57    bool
 58        True if sequences have equal length and all corresponding elements
 59        are equivalent (as determined by the equivalent function), False otherwise.
 60    """
 61    if len(first) != len(second):
 62        return False
 63    return all(itertools.starmap(equivalent, zip(first, second, strict=True)))
 64
 65def equivalent(first, second) -> bool:
 66    """Compare two objects for equivalence (identity or equality).
 67
 68    Handles different data types:
 69
 70    * Arrays: Uses numpy.array_equal for comparison
 71    * Lists/sequences: Recursively compares all elements
 72    * Dictionaries: Compares keys and values recursively
 73    * Other types: Uses equality operator or pandas null checking
 74
 75    Parameters
 76    ----------
 77    first : Any
 78        First object to compare.
 79    second : Any
 80        Second object to compare.
 81
 82    Returns
 83    -------
 84    bool
 85        True if objects are identical, equal, or both are null/NaN, False otherwise.
 86    """
 87
 88    if first is second:
 89        return True
 90    if isinstance(first, np.ndarray) or isinstance(second, np.ndarray):
 91        try:
 92            return np.array_equal(first, second)
 93        except Exception:
 94            return False
 95    if isinstance(first, list) or isinstance(second, list):
 96        return list_equiv(first, second)  # type: ignore[arg-type]
 97    if isinstance(first, dict) or isinstance(second, dict): # Added: Also supports dictionaries
 98        return dict_equiv(first, second)
 99    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
100
101def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]:
102    """Merge a list of dictionaries, dropping conflicting keys.
103
104    This function is designed to be passed to xarray's combine_attrs parameter.
105    It merges dictionaries by keeping only keys where all values are equivalent.
106    For nested dictionaries, merging is applied recursively.
107
108    Parameters
109    ----------
110    dicts : List[Dict[str, Any]]
111        List of dictionaries to merge.
112    context : Any, optional
113        Optional context parameter (unused but included for xarray compatibility).
114
115    Returns
116    -------
117    Dict[str, Any]
118        Dictionary containing only non-conflicting key-value pairs from input dictionaries.
119        Keys with conflicting values across dictionaries are dropped.
120
121    Examples
122    --------
123    >>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}]
124    >>> merge_dicts_no_conflicts(dicts)
125    {'a': 1}  # 'b' dropped due to conflict
126    """
127    merged = {}
128    # Create set of all keys across dictionaries
129    all_keys = set().union(*(d.keys() for d in dicts))
130    for key in all_keys:
131        # Collect values for the current key from all dictionaries
132        values = [d.get(key) for d in dicts if key in d]
133        if len(values) == 1:
134            merged[key] = values[0]  # Only one value, no conflict
135        else:
136            # Check if all values have the same type
137            types = set(type(v) for v in values if v is not None)
138            if len(types) > 1:
139                continue  # Skip conflicting keys
140            if isinstance(values[0], dict):
141                # If values are dictionaries, merge them recursively
142                merged_dict = merge_dicts_no_conflicts(values)
143                if len(merged_dict) > 0:
144                    merged[key] = merged_dict
145            else:
146                all_equiv = True
147                for idx in range(1, len(values)):
148                    if not equivalent(values[0], values[idx]):
149                        all_equiv = False
150                        break
151                if all_equiv:
152                    merged[key] = values[0]
153    return merged
154
155def get_ror_display_name(ror_id: str) -> Optional[str]:
156    """
157    Fetch institution display name from ROR API.
158
159    Parameters
160    ----------
161    ror_id : str
162        ROR identifier (full URL or just ID, e.g., "02jx3x895").
163
164    Returns
165    -------
166    str or None
167        Institution display name, or None if request fails.
168    """
169    # Clean the ROR ID - extract just the identifier part if full URL is provided
170    if ror_id.startswith('https://ror.org/'):
171        ror_id = ror_id.replace('https://ror.org/', '')
172
173    try:
174        # Make request to ROR API
175        url = f"https://api.ror.org/organizations/{ror_id}"
176        response = requests.get(url)
177        response.raise_for_status()
178
179        # Parse JSON response
180        data = response.json()
181
182        # Extract for_display name
183        names = data.get('names', [])
184        for name_entry in names:
185            if name_entry.get('types') and 'ror_display' in name_entry['types']:
186                return name_entry.get('value')
187
188        # Fallback to primary name if no for_display found
189        return data.get('name')
190
191    except requests.exceptions.RequestException as e:
192        print(f"Error fetching data from ROR API: {e}")
193        return None
194    except (json.JSONDecodeError, KeyError) as e:
195        print(f"Error parsing ROR API response: {e}")
196        return None
def dict_equiv(first: dict, second: dict) -> bool:
19def dict_equiv(first: dict, second: dict) -> bool:
20    """Compare two dictionaries for equivalence (identity or equality).
21
22    Parameters
23    ----------
24    first : dict
25        First dictionary to compare.
26    second : dict
27        Second dictionary to compare.
28
29    Returns
30    -------
31    bool
32        True if dictionaries are identical or have the same keys with
33        equivalent values (as determined by the equivalent function), False otherwise.
34    """
35    if first is second:
36        return True
37    if len(first) != len(second):
38        return False
39    for key in first:
40        if key not in second:
41            return False
42        if not equivalent(first[key], second[key]):
43            return False
44    return True

Compare two dictionaries for equivalence (identity or equality).

Parameters
  • first (dict): First dictionary to compare.
  • second (dict): Second dictionary to compare.
Returns
  • bool: True if dictionaries are identical or have the same keys with equivalent values (as determined by the equivalent function), False otherwise.
def list_equiv(first: Sequence[~T], second: Sequence[~T]) -> bool:
46def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
47    """Compare two sequences for element-wise equivalence.
48
49    Parameters
50    ----------
51    first : Sequence[T]
52        First sequence to compare.
53    second : Sequence[T]
54        Second sequence to compare.
55
56    Returns
57    -------
58    bool
59        True if sequences have equal length and all corresponding elements
60        are equivalent (as determined by the equivalent function), False otherwise.
61    """
62    if len(first) != len(second):
63        return False
64    return all(itertools.starmap(equivalent, zip(first, second, strict=True)))

Compare two sequences for element-wise equivalence.

Parameters
  • first (Sequence[T]): First sequence to compare.
  • second (Sequence[T]): Second sequence to compare.
Returns
  • bool: True if sequences have equal length and all corresponding elements are equivalent (as determined by the equivalent function), False otherwise.
def equivalent(first, second) -> bool:
 66def equivalent(first, second) -> bool:
 67    """Compare two objects for equivalence (identity or equality).
 68
 69    Handles different data types:
 70
 71    * Arrays: Uses numpy.array_equal for comparison
 72    * Lists/sequences: Recursively compares all elements
 73    * Dictionaries: Compares keys and values recursively
 74    * Other types: Uses equality operator or pandas null checking
 75
 76    Parameters
 77    ----------
 78    first : Any
 79        First object to compare.
 80    second : Any
 81        Second object to compare.
 82
 83    Returns
 84    -------
 85    bool
 86        True if objects are identical, equal, or both are null/NaN, False otherwise.
 87    """
 88
 89    if first is second:
 90        return True
 91    if isinstance(first, np.ndarray) or isinstance(second, np.ndarray):
 92        try:
 93            return np.array_equal(first, second)
 94        except Exception:
 95            return False
 96    if isinstance(first, list) or isinstance(second, list):
 97        return list_equiv(first, second)  # type: ignore[arg-type]
 98    if isinstance(first, dict) or isinstance(second, dict): # Added: Also supports dictionaries
 99        return dict_equiv(first, second)
100    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]

Compare two objects for equivalence (identity or equality).

Handles different data types:

  • Arrays: Uses numpy.array_equal for comparison
  • Lists/sequences: Recursively compares all elements
  • Dictionaries: Compares keys and values recursively
  • Other types: Uses equality operator or pandas null checking
Parameters
  • first (Any): First object to compare.
  • second (Any): Second object to compare.
Returns
  • bool: True if objects are identical, equal, or both are null/NaN, False otherwise.
def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]:
102def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]:
103    """Merge a list of dictionaries, dropping conflicting keys.
104
105    This function is designed to be passed to xarray's combine_attrs parameter.
106    It merges dictionaries by keeping only keys where all values are equivalent.
107    For nested dictionaries, merging is applied recursively.
108
109    Parameters
110    ----------
111    dicts : List[Dict[str, Any]]
112        List of dictionaries to merge.
113    context : Any, optional
114        Optional context parameter (unused but included for xarray compatibility).
115
116    Returns
117    -------
118    Dict[str, Any]
119        Dictionary containing only non-conflicting key-value pairs from input dictionaries.
120        Keys with conflicting values across dictionaries are dropped.
121
122    Examples
123    --------
124    >>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}]
125    >>> merge_dicts_no_conflicts(dicts)
126    {'a': 1}  # 'b' dropped due to conflict
127    """
128    merged = {}
129    # Create set of all keys across dictionaries
130    all_keys = set().union(*(d.keys() for d in dicts))
131    for key in all_keys:
132        # Collect values for the current key from all dictionaries
133        values = [d.get(key) for d in dicts if key in d]
134        if len(values) == 1:
135            merged[key] = values[0]  # Only one value, no conflict
136        else:
137            # Check if all values have the same type
138            types = set(type(v) for v in values if v is not None)
139            if len(types) > 1:
140                continue  # Skip conflicting keys
141            if isinstance(values[0], dict):
142                # If values are dictionaries, merge them recursively
143                merged_dict = merge_dicts_no_conflicts(values)
144                if len(merged_dict) > 0:
145                    merged[key] = merged_dict
146            else:
147                all_equiv = True
148                for idx in range(1, len(values)):
149                    if not equivalent(values[0], values[idx]):
150                        all_equiv = False
151                        break
152                if all_equiv:
153                    merged[key] = values[0]
154    return merged

Merge a list of dictionaries, dropping conflicting keys.

This function is designed to be passed to xarray's combine_attrs parameter. It merges dictionaries by keeping only keys where all values are equivalent. For nested dictionaries, merging is applied recursively.

Parameters
  • dicts (List[Dict[str, Any]]): List of dictionaries to merge.
  • context (Any, optional): Optional context parameter (unused but included for xarray compatibility).
Returns
  • Dict[str, Any]: Dictionary containing only non-conflicting key-value pairs from input dictionaries. Keys with conflicting values across dictionaries are dropped.
Examples
>>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}]
>>> merge_dicts_no_conflicts(dicts)
{'a': 1}  # 'b' dropped due to conflict
def get_ror_display_name(ror_id: str) -> Optional[str]:
156def get_ror_display_name(ror_id: str) -> Optional[str]:
157    """
158    Fetch institution display name from ROR API.
159
160    Parameters
161    ----------
162    ror_id : str
163        ROR identifier (full URL or just ID, e.g., "02jx3x895").
164
165    Returns
166    -------
167    str or None
168        Institution display name, or None if request fails.
169    """
170    # Clean the ROR ID - extract just the identifier part if full URL is provided
171    if ror_id.startswith('https://ror.org/'):
172        ror_id = ror_id.replace('https://ror.org/', '')
173
174    try:
175        # Make request to ROR API
176        url = f"https://api.ror.org/organizations/{ror_id}"
177        response = requests.get(url)
178        response.raise_for_status()
179
180        # Parse JSON response
181        data = response.json()
182
183        # Extract for_display name
184        names = data.get('names', [])
185        for name_entry in names:
186            if name_entry.get('types') and 'ror_display' in name_entry['types']:
187                return name_entry.get('value')
188
189        # Fallback to primary name if no for_display found
190        return data.get('name')
191
192    except requests.exceptions.RequestException as e:
193        print(f"Error fetching data from ROR API: {e}")
194        return None
195    except (json.JSONDecodeError, KeyError) as e:
196        print(f"Error parsing ROR API response: {e}")
197        return None

Fetch institution display name from ROR API.

Parameters
  • ror_id (str): ROR identifier (full URL or just ID, e.g., "02jx3x895").
Returns
  • str or None: Institution display name, or None if request fails.