xopr.util
1import itertools 2import json 3from typing import Any, Dict, List, Optional, Sequence, TypeVar 4 5import numpy as np 6import pandas as pd 7import requests 8 9T = TypeVar("T") 10 11def dict_equiv(first: dict, second: dict) -> bool: 12 """Compare two dictionaries for equivalence (identity or equality). 13 14 Parameters 15 ---------- 16 first : dict 17 First dictionary to compare. 18 second : dict 19 Second dictionary to compare. 20 21 Returns 22 ------- 23 bool 24 True if dictionaries are identical or have the same keys with 25 equivalent values (as determined by the equivalent function), False otherwise. 26 """ 27 if first is second: 28 return True 29 if len(first) != len(second): 30 return False 31 for key in first: 32 if key not in second: 33 return False 34 if not equivalent(first[key], second[key]): 35 return False 36 return True 37 38def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool: 39 """Compare two sequences for element-wise equivalence. 40 41 Parameters 42 ---------- 43 first : Sequence[T] 44 First sequence to compare. 45 second : Sequence[T] 46 Second sequence to compare. 47 48 Returns 49 ------- 50 bool 51 True if sequences have equal length and all corresponding elements 52 are equivalent (as determined by the equivalent function), False otherwise. 53 """ 54 if len(first) != len(second): 55 return False 56 return all(itertools.starmap(equivalent, zip(first, second, strict=True))) 57 58def equivalent(first, second) -> bool: 59 """Compare two objects for equivalence (identity or equality). 60 61 Handles different data types: 62 63 * Arrays: Uses numpy.array_equal for comparison 64 * Lists/sequences: Recursively compares all elements 65 * Dictionaries: Compares keys and values recursively 66 * Other types: Uses equality operator or pandas null checking 67 68 Parameters 69 ---------- 70 first : Any 71 First object to compare. 72 second : Any 73 Second object to compare. 74 75 Returns 76 ------- 77 bool 78 True if objects are identical, equal, or both are null/NaN, False otherwise. 79 """ 80 81 if first is second: 82 return True 83 if isinstance(first, np.ndarray) or isinstance(second, np.ndarray): 84 try: 85 return np.array_equal(first, second) 86 except Exception: 87 return False 88 if isinstance(first, list) or isinstance(second, list): 89 return list_equiv(first, second) # type: ignore[arg-type] 90 if isinstance(first, dict) or isinstance(second, dict): # Added: Also supports dictionaries 91 return dict_equiv(first, second) 92 return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload] 93 94def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]: 95 """Merge a list of dictionaries, dropping conflicting keys. 96 97 This function is designed to be passed to xarray's combine_attrs parameter. 98 It merges dictionaries by keeping only keys where all values are equivalent. 99 For nested dictionaries, merging is applied recursively. 100 101 Parameters 102 ---------- 103 dicts : List[Dict[str, Any]] 104 List of dictionaries to merge. 105 context : Any, optional 106 Optional context parameter (unused but included for xarray compatibility). 107 108 Returns 109 ------- 110 Dict[str, Any] 111 Dictionary containing only non-conflicting key-value pairs from input dictionaries. 112 Keys with conflicting values across dictionaries are dropped. 113 114 Examples 115 -------- 116 >>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}] 117 >>> merge_dicts_no_conflicts(dicts) 118 {'a': 1} # 'b' dropped due to conflict 119 """ 120 merged = {} 121 # Create set of all keys across dictionaries 122 all_keys = set().union(*(d.keys() for d in dicts)) 123 for key in all_keys: 124 # Collect values for the current key from all dictionaries 125 values = [d.get(key) for d in dicts if key in d] 126 if len(values) == 1: 127 merged[key] = values[0] # Only one value, no conflict 128 else: 129 # Check if all values have the same type 130 types = set(type(v) for v in values if v is not None) 131 if len(types) > 1: 132 continue # Skip conflicting keys 133 if isinstance(values[0], dict): 134 # If values are dictionaries, merge them recursively 135 merged_dict = merge_dicts_no_conflicts(values) 136 if len(merged_dict) > 0: 137 merged[key] = merged_dict 138 else: 139 all_equiv = True 140 for idx in range(1, len(values)): 141 if not equivalent(values[0], values[idx]): 142 all_equiv = False 143 break 144 if all_equiv: 145 merged[key] = values[0] 146 return merged 147 148def get_ror_display_name(ror_id: str) -> Optional[str]: 149 """ 150 Parse ROR API response to find the for_display name of a given ROR ID. 151 152 Args: 153 ror_id (str): The ROR identifier (e.g., "https://ror.org/02jx3x895" or just "02jx3x895") 154 155 Returns: 156 Optional[str]: The for_display name if found, None otherwise 157 """ 158 # Clean the ROR ID - extract just the identifier part if full URL is provided 159 if ror_id.startswith('https://ror.org/'): 160 ror_id = ror_id.replace('https://ror.org/', '') 161 162 try: 163 # Make request to ROR API 164 url = f"https://api.ror.org/organizations/{ror_id}" 165 response = requests.get(url) 166 response.raise_for_status() 167 168 # Parse JSON response 169 data = response.json() 170 171 # Extract for_display name 172 names = data.get('names', []) 173 for name_entry in names: 174 if name_entry.get('types') and 'ror_display' in name_entry['types']: 175 return name_entry.get('value') 176 177 # Fallback to primary name if no for_display found 178 return data.get('name') 179 180 except requests.exceptions.RequestException as e: 181 print(f"Error fetching data from ROR API: {e}") 182 return None 183 except (json.JSONDecodeError, KeyError) as e: 184 print(f"Error parsing ROR API response: {e}") 185 return None
def
dict_equiv(first: dict, second: dict) -> bool:
12def dict_equiv(first: dict, second: dict) -> bool: 13 """Compare two dictionaries for equivalence (identity or equality). 14 15 Parameters 16 ---------- 17 first : dict 18 First dictionary to compare. 19 second : dict 20 Second dictionary to compare. 21 22 Returns 23 ------- 24 bool 25 True if dictionaries are identical or have the same keys with 26 equivalent values (as determined by the equivalent function), False otherwise. 27 """ 28 if first is second: 29 return True 30 if len(first) != len(second): 31 return False 32 for key in first: 33 if key not in second: 34 return False 35 if not equivalent(first[key], second[key]): 36 return False 37 return True
Compare two dictionaries for equivalence (identity or equality).
Parameters
- first (dict): First dictionary to compare.
- second (dict): Second dictionary to compare.
Returns
- bool: True if dictionaries are identical or have the same keys with equivalent values (as determined by the equivalent function), False otherwise.
def
list_equiv(first: Sequence[~T], second: Sequence[~T]) -> bool:
39def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool: 40 """Compare two sequences for element-wise equivalence. 41 42 Parameters 43 ---------- 44 first : Sequence[T] 45 First sequence to compare. 46 second : Sequence[T] 47 Second sequence to compare. 48 49 Returns 50 ------- 51 bool 52 True if sequences have equal length and all corresponding elements 53 are equivalent (as determined by the equivalent function), False otherwise. 54 """ 55 if len(first) != len(second): 56 return False 57 return all(itertools.starmap(equivalent, zip(first, second, strict=True)))
Compare two sequences for element-wise equivalence.
Parameters
- first (Sequence[T]): First sequence to compare.
- second (Sequence[T]): Second sequence to compare.
Returns
- bool: True if sequences have equal length and all corresponding elements are equivalent (as determined by the equivalent function), False otherwise.
def
equivalent(first, second) -> bool:
59def equivalent(first, second) -> bool: 60 """Compare two objects for equivalence (identity or equality). 61 62 Handles different data types: 63 64 * Arrays: Uses numpy.array_equal for comparison 65 * Lists/sequences: Recursively compares all elements 66 * Dictionaries: Compares keys and values recursively 67 * Other types: Uses equality operator or pandas null checking 68 69 Parameters 70 ---------- 71 first : Any 72 First object to compare. 73 second : Any 74 Second object to compare. 75 76 Returns 77 ------- 78 bool 79 True if objects are identical, equal, or both are null/NaN, False otherwise. 80 """ 81 82 if first is second: 83 return True 84 if isinstance(first, np.ndarray) or isinstance(second, np.ndarray): 85 try: 86 return np.array_equal(first, second) 87 except Exception: 88 return False 89 if isinstance(first, list) or isinstance(second, list): 90 return list_equiv(first, second) # type: ignore[arg-type] 91 if isinstance(first, dict) or isinstance(second, dict): # Added: Also supports dictionaries 92 return dict_equiv(first, second) 93 return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload]
Compare two objects for equivalence (identity or equality).
Handles different data types:
- Arrays: Uses numpy.array_equal for comparison
- Lists/sequences: Recursively compares all elements
- Dictionaries: Compares keys and values recursively
- Other types: Uses equality operator or pandas null checking
Parameters
- first (Any): First object to compare.
- second (Any): Second object to compare.
Returns
- bool: True if objects are identical, equal, or both are null/NaN, False otherwise.
def
merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]:
95def merge_dicts_no_conflicts(dicts: List[Dict[str, Any]], context=None) -> Dict[str, Any]: 96 """Merge a list of dictionaries, dropping conflicting keys. 97 98 This function is designed to be passed to xarray's combine_attrs parameter. 99 It merges dictionaries by keeping only keys where all values are equivalent. 100 For nested dictionaries, merging is applied recursively. 101 102 Parameters 103 ---------- 104 dicts : List[Dict[str, Any]] 105 List of dictionaries to merge. 106 context : Any, optional 107 Optional context parameter (unused but included for xarray compatibility). 108 109 Returns 110 ------- 111 Dict[str, Any] 112 Dictionary containing only non-conflicting key-value pairs from input dictionaries. 113 Keys with conflicting values across dictionaries are dropped. 114 115 Examples 116 -------- 117 >>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}] 118 >>> merge_dicts_no_conflicts(dicts) 119 {'a': 1} # 'b' dropped due to conflict 120 """ 121 merged = {} 122 # Create set of all keys across dictionaries 123 all_keys = set().union(*(d.keys() for d in dicts)) 124 for key in all_keys: 125 # Collect values for the current key from all dictionaries 126 values = [d.get(key) for d in dicts if key in d] 127 if len(values) == 1: 128 merged[key] = values[0] # Only one value, no conflict 129 else: 130 # Check if all values have the same type 131 types = set(type(v) for v in values if v is not None) 132 if len(types) > 1: 133 continue # Skip conflicting keys 134 if isinstance(values[0], dict): 135 # If values are dictionaries, merge them recursively 136 merged_dict = merge_dicts_no_conflicts(values) 137 if len(merged_dict) > 0: 138 merged[key] = merged_dict 139 else: 140 all_equiv = True 141 for idx in range(1, len(values)): 142 if not equivalent(values[0], values[idx]): 143 all_equiv = False 144 break 145 if all_equiv: 146 merged[key] = values[0] 147 return merged
Merge a list of dictionaries, dropping conflicting keys.
This function is designed to be passed to xarray's combine_attrs parameter. It merges dictionaries by keeping only keys where all values are equivalent. For nested dictionaries, merging is applied recursively.
Parameters
- dicts (List[Dict[str, Any]]): List of dictionaries to merge.
- context (Any, optional): Optional context parameter (unused but included for xarray compatibility).
Returns
- Dict[str, Any]: Dictionary containing only non-conflicting key-value pairs from input dictionaries. Keys with conflicting values across dictionaries are dropped.
Examples
>>> dicts = [{'a': 1, 'b': 2}, {'a': 1, 'b': 3}]
>>> merge_dicts_no_conflicts(dicts)
{'a': 1} # 'b' dropped due to conflict
def
get_ror_display_name(ror_id: str) -> Optional[str]:
149def get_ror_display_name(ror_id: str) -> Optional[str]: 150 """ 151 Parse ROR API response to find the for_display name of a given ROR ID. 152 153 Args: 154 ror_id (str): The ROR identifier (e.g., "https://ror.org/02jx3x895" or just "02jx3x895") 155 156 Returns: 157 Optional[str]: The for_display name if found, None otherwise 158 """ 159 # Clean the ROR ID - extract just the identifier part if full URL is provided 160 if ror_id.startswith('https://ror.org/'): 161 ror_id = ror_id.replace('https://ror.org/', '') 162 163 try: 164 # Make request to ROR API 165 url = f"https://api.ror.org/organizations/{ror_id}" 166 response = requests.get(url) 167 response.raise_for_status() 168 169 # Parse JSON response 170 data = response.json() 171 172 # Extract for_display name 173 names = data.get('names', []) 174 for name_entry in names: 175 if name_entry.get('types') and 'ror_display' in name_entry['types']: 176 return name_entry.get('value') 177 178 # Fallback to primary name if no for_display found 179 return data.get('name') 180 181 except requests.exceptions.RequestException as e: 182 print(f"Error fetching data from ROR API: {e}") 183 return None 184 except (json.JSONDecodeError, KeyError) as e: 185 print(f"Error parsing ROR API response: {e}") 186 return None
Parse ROR API response to find the for_display name of a given ROR ID.
Args: ror_id (str): The ROR identifier (e.g., "https://ror.org/02jx3x895" or just "02jx3x895")
Returns: Optional[str]: The for_display name if found, None otherwise