xopr.stac_cache

STAC catalog caching utilities for xopr.

This module provides functions to cache STAC GeoParquet catalogs locally, reducing network latency for repeated queries.

  1"""
  2STAC catalog caching utilities for xopr.
  3
  4This module provides functions to cache STAC GeoParquet catalogs locally,
  5reducing network latency for repeated queries.
  6"""
  7
  8import os
  9from pathlib import Path
 10from typing import Optional
 11
 12import requests
 13from platformdirs import user_cache_dir
 14
 15# Cloud URLs for bedmap catalogs
 16BEDMAP_CATALOG_BASE_URL = "https://storage.googleapis.com/opr_stac/bedmap"
 17BEDMAP_CATALOG_FILES = ["bedmap1.parquet", "bedmap2.parquet", "bedmap3.parquet"]
 18
 19
 20def get_cache_dir() -> Path:
 21    """
 22    Get the xopr cache directory.
 23
 24    Checks $XOPR_CACHE_DIR environment variable first, otherwise uses
 25    platform-specific user cache directory.
 26
 27    Returns
 28    -------
 29    Path
 30        Path to xopr cache directory
 31    """
 32    env_cache = os.environ.get("XOPR_CACHE_DIR")
 33    if env_cache:
 34        cache_path = Path(env_cache)
 35    else:
 36        cache_path = Path(user_cache_dir("xopr", "englacial"))
 37
 38    return cache_path
 39
 40
 41def get_bedmap_catalog_dir() -> Path:
 42    """
 43    Get the bedmap catalog cache directory.
 44
 45    Returns
 46    -------
 47    Path
 48        Path to bedmap catalog directory within cache
 49    """
 50    return get_cache_dir() / "catalogs" / "bedmap"
 51
 52
 53def _download_file(url: str, dest: Path) -> bool:
 54    """
 55    Download a file from URL to destination path.
 56
 57    Parameters
 58    ----------
 59    url : str
 60        URL to download from
 61    dest : Path
 62        Destination file path
 63
 64    Returns
 65    -------
 66    bool
 67        True if download succeeded, False otherwise
 68    """
 69    try:
 70        response = requests.get(url, stream=True, timeout=30)
 71        response.raise_for_status()
 72
 73        dest.parent.mkdir(parents=True, exist_ok=True)
 74
 75        with open(dest, "wb") as f:
 76            for chunk in response.iter_content(chunk_size=8192):
 77                f.write(chunk)
 78
 79        return True
 80    except Exception as e:
 81        print(f"Warning: Failed to download {url}: {e}")
 82        return False
 83
 84
 85def ensure_bedmap_catalogs(force_download: bool = False) -> Optional[Path]:
 86    """
 87    Ensure bedmap catalogs are cached locally, downloading if needed.
 88
 89    Parameters
 90    ----------
 91    force_download : bool, default False
 92        If True, re-download catalogs even if they exist
 93
 94    Returns
 95    -------
 96    Path or None
 97        Path to catalog directory if successful, None if download failed
 98    """
 99    catalog_dir = get_bedmap_catalog_dir()
100
101    # Check if all catalogs exist
102    all_exist = all((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES)
103
104    if all_exist and not force_download:
105        return catalog_dir
106
107    # Download missing catalogs
108    print(f"Downloading bedmap catalogs to {catalog_dir}...")
109    catalog_dir.mkdir(parents=True, exist_ok=True)
110
111    success = True
112    for filename in BEDMAP_CATALOG_FILES:
113        dest = catalog_dir / filename
114        if dest.exists() and not force_download:
115            continue
116
117        url = f"{BEDMAP_CATALOG_BASE_URL}/{filename}"
118        if not _download_file(url, dest):
119            success = False
120
121    if success:
122        print(f"Bedmap catalogs cached successfully")
123        return catalog_dir
124    else:
125        print("Warning: Some catalogs failed to download")
126        # Return catalog_dir anyway - partial cache may still be useful
127        return catalog_dir if any((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES) else None
128
129
130def get_bedmap_catalog_path() -> str:
131    """
132    Get the path pattern for bedmap catalogs, downloading if needed.
133
134    This is the main entry point for query functions. It ensures catalogs
135    are cached locally and returns the glob pattern for querying.
136
137    Returns
138    -------
139    str
140        Glob pattern to local bedmap catalog files, or cloud URL as fallback
141    """
142    catalog_dir = ensure_bedmap_catalogs()
143
144    if catalog_dir and any((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES):
145        return str(catalog_dir / "bedmap*.parquet")
146    else:
147        # Fallback to cloud URL if local cache failed
148        print("Warning: Using cloud catalogs (local cache unavailable)")
149        return f"{BEDMAP_CATALOG_BASE_URL}/bedmap*.parquet"
150
151
152def clear_bedmap_cache() -> None:
153    """
154    Clear cached bedmap catalogs.
155
156    Useful for forcing a fresh download of catalogs.
157    """
158    catalog_dir = get_bedmap_catalog_dir()
159    if catalog_dir.exists():
160        for f in BEDMAP_CATALOG_FILES:
161            path = catalog_dir / f
162            if path.exists():
163                path.unlink()
164        print(f"Cleared bedmap catalog cache at {catalog_dir}")
BEDMAP_CATALOG_BASE_URL = 'https://storage.googleapis.com/opr_stac/bedmap'
BEDMAP_CATALOG_FILES = ['bedmap1.parquet', 'bedmap2.parquet', 'bedmap3.parquet']
def get_cache_dir() -> pathlib.Path:
21def get_cache_dir() -> Path:
22    """
23    Get the xopr cache directory.
24
25    Checks $XOPR_CACHE_DIR environment variable first, otherwise uses
26    platform-specific user cache directory.
27
28    Returns
29    -------
30    Path
31        Path to xopr cache directory
32    """
33    env_cache = os.environ.get("XOPR_CACHE_DIR")
34    if env_cache:
35        cache_path = Path(env_cache)
36    else:
37        cache_path = Path(user_cache_dir("xopr", "englacial"))
38
39    return cache_path

Get the xopr cache directory.

Checks $XOPR_CACHE_DIR environment variable first, otherwise uses platform-specific user cache directory.

Returns
  • Path: Path to xopr cache directory
def get_bedmap_catalog_dir() -> pathlib.Path:
42def get_bedmap_catalog_dir() -> Path:
43    """
44    Get the bedmap catalog cache directory.
45
46    Returns
47    -------
48    Path
49        Path to bedmap catalog directory within cache
50    """
51    return get_cache_dir() / "catalogs" / "bedmap"

Get the bedmap catalog cache directory.

Returns
  • Path: Path to bedmap catalog directory within cache
def ensure_bedmap_catalogs(force_download: bool = False) -> Optional[pathlib.Path]:
 86def ensure_bedmap_catalogs(force_download: bool = False) -> Optional[Path]:
 87    """
 88    Ensure bedmap catalogs are cached locally, downloading if needed.
 89
 90    Parameters
 91    ----------
 92    force_download : bool, default False
 93        If True, re-download catalogs even if they exist
 94
 95    Returns
 96    -------
 97    Path or None
 98        Path to catalog directory if successful, None if download failed
 99    """
100    catalog_dir = get_bedmap_catalog_dir()
101
102    # Check if all catalogs exist
103    all_exist = all((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES)
104
105    if all_exist and not force_download:
106        return catalog_dir
107
108    # Download missing catalogs
109    print(f"Downloading bedmap catalogs to {catalog_dir}...")
110    catalog_dir.mkdir(parents=True, exist_ok=True)
111
112    success = True
113    for filename in BEDMAP_CATALOG_FILES:
114        dest = catalog_dir / filename
115        if dest.exists() and not force_download:
116            continue
117
118        url = f"{BEDMAP_CATALOG_BASE_URL}/{filename}"
119        if not _download_file(url, dest):
120            success = False
121
122    if success:
123        print(f"Bedmap catalogs cached successfully")
124        return catalog_dir
125    else:
126        print("Warning: Some catalogs failed to download")
127        # Return catalog_dir anyway - partial cache may still be useful
128        return catalog_dir if any((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES) else None

Ensure bedmap catalogs are cached locally, downloading if needed.

Parameters
  • force_download (bool, default False): If True, re-download catalogs even if they exist
Returns
  • Path or None: Path to catalog directory if successful, None if download failed
def get_bedmap_catalog_path() -> str:
131def get_bedmap_catalog_path() -> str:
132    """
133    Get the path pattern for bedmap catalogs, downloading if needed.
134
135    This is the main entry point for query functions. It ensures catalogs
136    are cached locally and returns the glob pattern for querying.
137
138    Returns
139    -------
140    str
141        Glob pattern to local bedmap catalog files, or cloud URL as fallback
142    """
143    catalog_dir = ensure_bedmap_catalogs()
144
145    if catalog_dir and any((catalog_dir / f).exists() for f in BEDMAP_CATALOG_FILES):
146        return str(catalog_dir / "bedmap*.parquet")
147    else:
148        # Fallback to cloud URL if local cache failed
149        print("Warning: Using cloud catalogs (local cache unavailable)")
150        return f"{BEDMAP_CATALOG_BASE_URL}/bedmap*.parquet"

Get the path pattern for bedmap catalogs, downloading if needed.

This is the main entry point for query functions. It ensures catalogs are cached locally and returns the glob pattern for querying.

Returns
  • str: Glob pattern to local bedmap catalog files, or cloud URL as fallback
def clear_bedmap_cache() -> None:
153def clear_bedmap_cache() -> None:
154    """
155    Clear cached bedmap catalogs.
156
157    Useful for forcing a fresh download of catalogs.
158    """
159    catalog_dir = get_bedmap_catalog_dir()
160    if catalog_dir.exists():
161        for f in BEDMAP_CATALOG_FILES:
162            path = catalog_dir / f
163            if path.exists():
164                path.unlink()
165        print(f"Cleared bedmap catalog cache at {catalog_dir}")

Clear cached bedmap catalogs.

Useful for forcing a fresh download of catalogs.