xopr.stac_cache
STAC catalog caching utilities for xopr.
This module provides functions to cache STAC GeoParquet catalogs locally, reducing network latency for repeated queries.
1""" 2STAC catalog caching utilities for xopr. 3 4This module provides functions to cache STAC GeoParquet catalogs locally, 5reducing network latency for repeated queries. 6""" 7 8import os 9from pathlib import Path 10from typing import Optional 11 12import requests 13from platformdirs import user_cache_dir 14 15# Cloud URLs for bedmap catalogs 16BEDMAP_CATALOG_BASE_URL = "https://storage.googleapis.com/opr_stac/bedmap" 17BEDMAP_CATALOG_FILES = ["bedmap1.parquet", "bedmap2.parquet", "bedmap3.parquet"] 18 19 20def get_cache_dir() -> Path: 21 """ 22 Get the xopr cache directory. 23 24 Checks $XOPR_CACHE_DIR environment variable first, otherwise uses 25 platform-specific user cache directory. 26 27 Returns 28 ------- 29 Path 30 Path to xopr cache directory 31 """ 32 env_cache = os.environ.get("XOPR_CACHE_DIR") 33 if env_cache: 34 cache_path = Path(env_cache) 35 else: 36 cache_path = Path(user_cache_dir("xopr", "englacial")) 37 38 return cache_path 39 40 41def get_bedmap_catalog_dir() -> Path: 42 """ 43 Get the bedmap catalog cache directory. 44 45 Returns 46 ------- 47 Path 48 Path to bedmap catalog directory within cache 49 """ 50 return get_cache_dir() / "catalogs" / "bedmap" 51 52 53def _download_file(url: str, dest: Path) -> bool: 54 """ 55 Download a file from URL to destination path. 56 57 Parameters 58 ---------- 59 url : str 60 URL to download from 61 dest : Path 62 Destination file path 63 64 Returns 65 ------- 66 bool 67 True if download succeeded, False otherwise 68 """ 69 try: 70 response = requests.get(url, stream=True, timeout=30) 71 response.raise_for_status() 72 73 dest.parent.mkdir(parents=True, exist_ok=True) 74 75 with open(dest, "wb") as f: 76 for chunk in response.iter_content(chunk_size=8192): 77 f.write(chunk) 78 79 return True 80 except Exception as e: 81 print(f"Warning: Failed to download {url}: {e}") 82 return False 83 84 85def ensure_bedmap_catalogs(force_download: bool = False) -> Optional[Path]: 86 """ 87 Ensure bedmap catalogs are cached locally, downloading if needed. 88 89 Parameters 90 ---------- 91 force_download : bool, default False 92 If True, re-download catalogs even if they exist 93 94 Returns 95 ------- 96 Path or None 97 Path to catalog directory if successful, None if download failed 98 """ 99 catalog_dir = get_bedmap_catalog_dir() 100 101 # Check if all catalogs exist 102 all_exist = all((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES) 103 104 if all_exist and not force_download: 105 return catalog_dir 106 107 # Download missing catalogs 108 print(f"Downloading bedmap catalogs to {catalog_dir}...") 109 catalog_dir.mkdir(parents=True, exist_ok=True) 110 111 success = True 112 for filename in BEDMAP_CATALOG_FILES: 113 dest = catalog_dir / filename 114 if dest.exists() and not force_download: 115 continue 116 117 url = f"{BEDMAP_CATALOG_BASE_URL}/{filename}" 118 if not _download_file(url, dest): 119 success = False 120 121 if success: 122 print(f"Bedmap catalogs cached successfully") 123 return catalog_dir 124 else: 125 print("Warning: Some catalogs failed to download") 126 # Return catalog_dir anyway - partial cache may still be useful 127 return catalog_dir if any((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES) else None 128 129 130def get_bedmap_catalog_path() -> str: 131 """ 132 Get the path pattern for bedmap catalogs, downloading if needed. 133 134 This is the main entry point for query functions. It ensures catalogs 135 are cached locally and returns the glob pattern for querying. 136 137 Returns 138 ------- 139 str 140 Glob pattern to local bedmap catalog files, or cloud URL as fallback 141 """ 142 catalog_dir = ensure_bedmap_catalogs() 143 144 if catalog_dir and any((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES): 145 return str(catalog_dir / "bedmap*.parquet") 146 else: 147 # Fallback to cloud URL if local cache failed 148 print("Warning: Using cloud catalogs (local cache unavailable)") 149 return f"{BEDMAP_CATALOG_BASE_URL}/bedmap*.parquet" 150 151 152def clear_bedmap_cache() -> None: 153 """ 154 Clear cached bedmap catalogs. 155 156 Useful for forcing a fresh download of catalogs. 157 """ 158 catalog_dir = get_bedmap_catalog_dir() 159 if catalog_dir.exists(): 160 for f in BEDMAP_CATALOG_FILES: 161 path = catalog_dir / f 162 if path.exists(): 163 path.unlink() 164 print(f"Cleared bedmap catalog cache at {catalog_dir}")
BEDMAP_CATALOG_BASE_URL =
'https://storage.googleapis.com/opr_stac/bedmap'
BEDMAP_CATALOG_FILES =
['bedmap1.parquet', 'bedmap2.parquet', 'bedmap3.parquet']
def
get_cache_dir() -> pathlib.Path:
21def get_cache_dir() -> Path: 22 """ 23 Get the xopr cache directory. 24 25 Checks $XOPR_CACHE_DIR environment variable first, otherwise uses 26 platform-specific user cache directory. 27 28 Returns 29 ------- 30 Path 31 Path to xopr cache directory 32 """ 33 env_cache = os.environ.get("XOPR_CACHE_DIR") 34 if env_cache: 35 cache_path = Path(env_cache) 36 else: 37 cache_path = Path(user_cache_dir("xopr", "englacial")) 38 39 return cache_path
Get the xopr cache directory.
Checks $XOPR_CACHE_DIR environment variable first, otherwise uses platform-specific user cache directory.
Returns
- Path: Path to xopr cache directory
def
get_bedmap_catalog_dir() -> pathlib.Path:
42def get_bedmap_catalog_dir() -> Path: 43 """ 44 Get the bedmap catalog cache directory. 45 46 Returns 47 ------- 48 Path 49 Path to bedmap catalog directory within cache 50 """ 51 return get_cache_dir() / "catalogs" / "bedmap"
Get the bedmap catalog cache directory.
Returns
- Path: Path to bedmap catalog directory within cache
def
ensure_bedmap_catalogs(force_download: bool = False) -> Optional[pathlib.Path]:
86def ensure_bedmap_catalogs(force_download: bool = False) -> Optional[Path]: 87 """ 88 Ensure bedmap catalogs are cached locally, downloading if needed. 89 90 Parameters 91 ---------- 92 force_download : bool, default False 93 If True, re-download catalogs even if they exist 94 95 Returns 96 ------- 97 Path or None 98 Path to catalog directory if successful, None if download failed 99 """ 100 catalog_dir = get_bedmap_catalog_dir() 101 102 # Check if all catalogs exist 103 all_exist = all((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES) 104 105 if all_exist and not force_download: 106 return catalog_dir 107 108 # Download missing catalogs 109 print(f"Downloading bedmap catalogs to {catalog_dir}...") 110 catalog_dir.mkdir(parents=True, exist_ok=True) 111 112 success = True 113 for filename in BEDMAP_CATALOG_FILES: 114 dest = catalog_dir / filename 115 if dest.exists() and not force_download: 116 continue 117 118 url = f"{BEDMAP_CATALOG_BASE_URL}/{filename}" 119 if not _download_file(url, dest): 120 success = False 121 122 if success: 123 print(f"Bedmap catalogs cached successfully") 124 return catalog_dir 125 else: 126 print("Warning: Some catalogs failed to download") 127 # Return catalog_dir anyway - partial cache may still be useful 128 return catalog_dir if any((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES) else None
Ensure bedmap catalogs are cached locally, downloading if needed.
Parameters
- force_download (bool, default False): If True, re-download catalogs even if they exist
Returns
- Path or None: Path to catalog directory if successful, None if download failed
def
get_bedmap_catalog_path() -> str:
131def get_bedmap_catalog_path() -> str: 132 """ 133 Get the path pattern for bedmap catalogs, downloading if needed. 134 135 This is the main entry point for query functions. It ensures catalogs 136 are cached locally and returns the glob pattern for querying. 137 138 Returns 139 ------- 140 str 141 Glob pattern to local bedmap catalog files, or cloud URL as fallback 142 """ 143 catalog_dir = ensure_bedmap_catalogs() 144 145 if catalog_dir and any((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES): 146 return str(catalog_dir / "bedmap*.parquet") 147 else: 148 # Fallback to cloud URL if local cache failed 149 print("Warning: Using cloud catalogs (local cache unavailable)") 150 return f"{BEDMAP_CATALOG_BASE_URL}/bedmap*.parquet"
Get the path pattern for bedmap catalogs, downloading if needed.
This is the main entry point for query functions. It ensures catalogs are cached locally and returns the glob pattern for querying.
Returns
- str: Glob pattern to local bedmap catalog files, or cloud URL as fallback
def
clear_bedmap_cache() -> None:
153def clear_bedmap_cache() -> None: 154 """ 155 Clear cached bedmap catalogs. 156 157 Useful for forcing a fresh download of catalogs. 158 """ 159 catalog_dir = get_bedmap_catalog_dir() 160 if catalog_dir.exists(): 161 for f in BEDMAP_CATALOG_FILES: 162 path = catalog_dir / f 163 if path.exists(): 164 path.unlink() 165 print(f"Cleared bedmap catalog cache at {catalog_dir}")
Clear cached bedmap catalogs.
Useful for forcing a fresh download of catalogs.