xopr.util
General utility functions for xOPR
This module provides helper functions for data comparison, dictionary merging, and metadata retrieval that are used throughout xOPR.
1""" 2General utility functions for xOPR 3 4This module provides helper functions for data comparison, dictionary merging, 5and metadata retrieval that are used throughout xOPR. 6""" 7 8import itertools 9import json 10from typing import Any, Dict, List, Optional, Sequence, TypeVar 11 12import numpy as np 13import pandas as pd 14import requests 15 16T = TypeVar("T") 17 18def dict_equiv(first: dict, second: dict) -> bool: 19 """Compare two dictionaries for equivalence (identity or equality). 20 21 Parameters 22 ---------- 23 first : dict 24 First dictionary to compare. 25 second : dict 26 Second dictionary to compare. 27 28 Returns 29 ------- 30 bool 31 True if dictionaries are identical or have the same keys with 32 equivalent values (as determined by the equivalent function), False otherwise. 33 """ 34 if first is second: 35 return True 36 if len(first) != len(second): 37 return False 38 for key in first: 39 if key not in second: 40 return False 41 if not equivalent(first[key], second[key]): 42 return False 43 return True 44 45def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool: 46 """Compare two sequences for element-wise equivalence. 47 48 Parameters 49 ---------- 50 first : Sequence[T] 51 First sequence to compare. 52 second : Sequence[T] 53 Second sequence to compare. 54 55 Returns 56 ------- 57 bool 58 True if sequences have equal length and all corresponding elements 59 are equivalent (as determined by the equivalent function), False otherwise. 60 """ 61 if len(first) != len(second): 62 return False 63 return all(itertools.starmap(equivalent, zip(first, second, strict=True))) 64 65def equivalent(first, second) -> bool: 66 """Compare two objects for equivalence (identity or equality). 67 68 Handles different data types: 69 70 * Arrays: Uses numpy.array_equal for comparison 71 * Lists/sequences: Recursively compares all elements 72 * Dictionaries: Compares keys and values recursively 73 * Other types: Uses equality operator or pandas null checking 74 75 Parameters 76 ---------- 77 first : Any 78 First object to compare. 79 second : Any 80 Second object to compare. 81 82 Returns 83 ------- 84 bool 85 True if objects are identical, equal, or both are null/NaN, False otherwise. 86 """ 87 88 if first is second: 89 return True 90 if isinstance(first, np.ndarray) or isinstance(second, np.ndarray): 91 try: 92 return np.array_equal(first, second) 93 except Exception: 94 return False 95 if isinstance(first, list) or isinstance(second, list): 96 return list_equiv(first, second) # type: ignore[arg-type] 97 if isinstance(first, dict) or isinstance(second, dict): # Added: Also supports dictionaries 98 return dict_equiv(first, second) 99 return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload] 100 101def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]: 102 """Merge a list of dictionaries, dropping conflicting keys. 103 104 This function is designed to be passed to xarray's combine_attrs parameter. 105 It merges dictionaries by keeping only keys where all values are equivalent. 106 For nested dictionaries, merging is applied recursively. 107 108 Parameters 109 ---------- 110 dicts : List[Dict[str, Any]] 111 List of dictionaries to merge. 112 context : Any, optional 113 Optional context parameter (unused but included for xarray compatibility). 114 115 Returns 116 ------- 117 Dict[str, Any] 118 Dictionary containing only non-conflicting key-value pairs from input dictionaries. 119 Keys with conflicting values across dictionaries are dropped. 120 121 Examples 122 -------- 123 >>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}] 124 >>> merge_dicts_no_conflicts(dicts) 125 {'a': 1} # 'b' dropped due to conflict 126 """ 127 merged = {} 128 # Create set of all keys across dictionaries 129 all_keys = set().union(*(d.keys() for d in dicts)) 130 for key in all_keys: 131 # Collect values for the current key from all dictionaries 132 values = [d.get(key) for d in dicts if key in d] 133 if len(values) == 1: 134 merged[key] = values[0] # Only one value, no conflict 135 else: 136 # Check if all values have the same type 137 types = set(type(v) for v in values if v is not None) 138 if len(types) > 1: 139 continue # Skip conflicting keys 140 if isinstance(values[0], dict): 141 # If values are dictionaries, merge them recursively 142 merged_dict = merge_dicts_no_conflicts(values) 143 if len(merged_dict) > 0: 144 merged[key] = merged_dict 145 else: 146 all_equiv = True 147 for idx in range(1, len(values)): 148 if not equivalent(values[0], values[idx]): 149 all_equiv = False 150 break 151 if all_equiv: 152 merged[key] = values[0] 153 return merged 154 155def get_ror_display_name(ror_id: str) -> Optional[str]: 156 """ 157 Fetch institution display name from ROR API. 158 159 Parameters 160 ---------- 161 ror_id : str 162 ROR identifier (full URL or just ID, e.g., "02jx3x895"). 163 164 Returns 165 ------- 166 str or None 167 Institution display name, or None if request fails. 168 """ 169 # Clean the ROR ID - extract just the identifier part if full URL is provided 170 if ror_id.startswith('https://ror.org/'): 171 ror_id = ror_id.replace('https://ror.org/', '') 172 173 try: 174 # Make request to ROR API 175 url = f"https://api.ror.org/organizations/{ror_id}" 176 response = requests.get(url) 177 response.raise_for_status() 178 179 # Parse JSON response 180 data = response.json() 181 182 # Extract for_display name 183 names = data.get('names', []) 184 for name_entry in names: 185 if name_entry.get('types') and 'ror_display' in name_entry['types']: 186 return name_entry.get('value') 187 188 # Fallback to primary name if no for_display found 189 return data.get('name') 190 191 except requests.exceptions.RequestException as e: 192 print(f"Error fetching data from ROR API: {e}") 193 return None 194 except (json.JSONDecodeError, KeyError) as e: 195 print(f"Error parsing ROR API response: {e}") 196 return None
def
dict_equiv(first: dict, second: dict) -> bool:
19def dict_equiv(first: dict, second: dict) -> bool: 20 """Compare two dictionaries for equivalence (identity or equality). 21 22 Parameters 23 ---------- 24 first : dict 25 First dictionary to compare. 26 second : dict 27 Second dictionary to compare. 28 29 Returns 30 ------- 31 bool 32 True if dictionaries are identical or have the same keys with 33 equivalent values (as determined by the equivalent function), False otherwise. 34 """ 35 if first is second: 36 return True 37 if len(first) != len(second): 38 return False 39 for key in first: 40 if key not in second: 41 return False 42 if not equivalent(first[key], second[key]): 43 return False 44 return True
Compare two dictionaries for equivalence (identity or equality).
Parameters
- first (dict): First dictionary to compare.
- second (dict): Second dictionary to compare.
Returns
- bool: True if dictionaries are identical or have the same keys with equivalent values (as determined by the equivalent function), False otherwise.
def
list_equiv(first: Sequence[~T], second: Sequence[~T]) -> bool:
46def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool: 47 """Compare two sequences for element-wise equivalence. 48 49 Parameters 50 ---------- 51 first : Sequence[T] 52 First sequence to compare. 53 second : Sequence[T] 54 Second sequence to compare. 55 56 Returns 57 ------- 58 bool 59 True if sequences have equal length and all corresponding elements 60 are equivalent (as determined by the equivalent function), False otherwise. 61 """ 62 if len(first) != len(second): 63 return False 64 return all(itertools.starmap(equivalent, zip(first, second, strict=True)))
Compare two sequences for element-wise equivalence.
Parameters
- first (Sequence[T]): First sequence to compare.
- second (Sequence[T]): Second sequence to compare.
Returns
- bool: True if sequences have equal length and all corresponding elements are equivalent (as determined by the equivalent function), False otherwise.
def
equivalent(first, second) -> bool:
66def equivalent(first, second) -> bool: 67 """Compare two objects for equivalence (identity or equality). 68 69 Handles different data types: 70 71 * Arrays: Uses numpy.array_equal for comparison 72 * Lists/sequences: Recursively compares all elements 73 * Dictionaries: Compares keys and values recursively 74 * Other types: Uses equality operator or pandas null checking 75 76 Parameters 77 ---------- 78 first : Any 79 First object to compare. 80 second : Any 81 Second object to compare. 82 83 Returns 84 ------- 85 bool 86 True if objects are identical, equal, or both are null/NaN, False otherwise. 87 """ 88 89 if first is second: 90 return True 91 if isinstance(first, np.ndarray) or isinstance(second, np.ndarray): 92 try: 93 return np.array_equal(first, second) 94 except Exception: 95 return False 96 if isinstance(first, list) or isinstance(second, list): 97 return list_equiv(first, second) # type: ignore[arg-type] 98 if isinstance(first, dict) or isinstance(second, dict): # Added: Also supports dictionaries 99 return dict_equiv(first, second) 100 return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload]
Compare two objects for equivalence (identity or equality).
Handles different data types:
- Arrays: Uses numpy.array_equal for comparison
- Lists/sequences: Recursively compares all elements
- Dictionaries: Compares keys and values recursively
- Other types: Uses equality operator or pandas null checking
Parameters
- first (Any): First object to compare.
- second (Any): Second object to compare.
Returns
- bool: True if objects are identical, equal, or both are null/NaN, False otherwise.
def
merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]:
102def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]: 103 """Merge a list of dictionaries, dropping conflicting keys. 104 105 This function is designed to be passed to xarray's combine_attrs parameter. 106 It merges dictionaries by keeping only keys where all values are equivalent. 107 For nested dictionaries, merging is applied recursively. 108 109 Parameters 110 ---------- 111 dicts : List[Dict[str, Any]] 112 List of dictionaries to merge. 113 context : Any, optional 114 Optional context parameter (unused but included for xarray compatibility). 115 116 Returns 117 ------- 118 Dict[str, Any] 119 Dictionary containing only non-conflicting key-value pairs from input dictionaries. 120 Keys with conflicting values across dictionaries are dropped. 121 122 Examples 123 -------- 124 >>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}] 125 >>> merge_dicts_no_conflicts(dicts) 126 {'a': 1} # 'b' dropped due to conflict 127 """ 128 merged = {} 129 # Create set of all keys across dictionaries 130 all_keys = set().union(*(d.keys() for d in dicts)) 131 for key in all_keys: 132 # Collect values for the current key from all dictionaries 133 values = [d.get(key) for d in dicts if key in d] 134 if len(values) == 1: 135 merged[key] = values[0] # Only one value, no conflict 136 else: 137 # Check if all values have the same type 138 types = set(type(v) for v in values if v is not None) 139 if len(types) > 1: 140 continue # Skip conflicting keys 141 if isinstance(values[0], dict): 142 # If values are dictionaries, merge them recursively 143 merged_dict = merge_dicts_no_conflicts(values) 144 if len(merged_dict) > 0: 145 merged[key] = merged_dict 146 else: 147 all_equiv = True 148 for idx in range(1, len(values)): 149 if not equivalent(values[0], values[idx]): 150 all_equiv = False 151 break 152 if all_equiv: 153 merged[key] = values[0] 154 return merged
Merge a list of dictionaries, dropping conflicting keys.
This function is designed to be passed to xarray's combine_attrs parameter. It merges dictionaries by keeping only keys where all values are equivalent. For nested dictionaries, merging is applied recursively.
Parameters
- dicts (List[Dict[str, Any]]): List of dictionaries to merge.
- context (Any, optional): Optional context parameter (unused but included for xarray compatibility).
Returns
- Dict[str, Any]: Dictionary containing only non-conflicting key-value pairs from input dictionaries. Keys with conflicting values across dictionaries are dropped.
Examples
>>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}]
>>> merge_dicts_no_conflicts(dicts)
{'a': 1} # 'b' dropped due to conflict
def
get_ror_display_name(ror_id: str) -> Optional[str]:
156def get_ror_display_name(ror_id: str) -> Optional[str]: 157 """ 158 Fetch institution display name from ROR API. 159 160 Parameters 161 ---------- 162 ror_id : str 163 ROR identifier (full URL or just ID, e.g., "02jx3x895"). 164 165 Returns 166 ------- 167 str or None 168 Institution display name, or None if request fails. 169 """ 170 # Clean the ROR ID - extract just the identifier part if full URL is provided 171 if ror_id.startswith('https://ror.org/'): 172 ror_id = ror_id.replace('https://ror.org/', '') 173 174 try: 175 # Make request to ROR API 176 url = f"https://api.ror.org/organizations/{ror_id}" 177 response = requests.get(url) 178 response.raise_for_status() 179 180 # Parse JSON response 181 data = response.json() 182 183 # Extract for_display name 184 names = data.get('names', []) 185 for name_entry in names: 186 if name_entry.get('types') and 'ror_display' in name_entry['types']: 187 return name_entry.get('value') 188 189 # Fallback to primary name if no for_display found 190 return data.get('name') 191 192 except requests.exceptions.RequestException as e: 193 print(f"Error fetching data from ROR API: {e}") 194 return None 195 except (json.JSONDecodeError, KeyError) as e: 196 print(f"Error parsing ROR API response: {e}") 197 return None
Fetch institution display name from ROR API.
Parameters
- ror_id (str): ROR identifier (full URL or just ID, e.g., "02jx3x895").
Returns
- str or None: Institution display name, or None if request fails.