Source code for pyarts.data

# -*- coding: utf-8 -*-
"""Common functions for the pyarts package."""

import os
import urllib.request
import zipfile
from pyarts.arts.globals import parameters
import pyarts
import numpy as np
from tqdm import tqdm
import xarray


[docs] def download(data=("xml", "cat"), download_dir=None, verbose=False, **kwargs): """ Download and extract data files. This function sets the ARTS data search path to the downloaded data directories, so that ARTS can find the required data files. If the environment variable ARTS_DATA_PATH or ARTS_INCLUDE_PATH is set, it is assumed that the user wants to use their own catalog locations and this function does nothing. Parameters: data (Tuple[str]): List of data types to download. Possible values are: - ``"xml"``: arts-xml-data package - ``"cat"``: arts-cat-data package download_dir (str, optional): The directory where the data files will be stored. If not provided, the default is ``~/.cache/arts/``. verbose (bool, optional): Whether to print info messages. Defaults to False. **kwargs: Additional options passed on to the data specific download functions, e.g. ``version`` for ARTS catalogs. """ if os.getenv("ARTS_DATA_PATH"): if verbose: print("Skipping download, environment variable ARTS_DATA_PATH already set.") return if os.getenv("ARTS_INCLUDE_PATH"): if verbose: print( "Skipping download, environment variable ARTS_INCLUDE_PATH already set." ) return if download_dir is None: download_dir = os.path.join(os.path.expanduser("~"), ".cache", "arts") datadirs = [] for d in data: match d: case "xml": datadirs.append( download_arts_xml_data(download_dir, verbose=verbose, **kwargs) ) case "cat": datadirs.append( download_arts_cat_data(download_dir, verbose=verbose, **kwargs) ) case _: raise RuntimeError(f'Unknown download data type "{d}"') return datadirs
class _DownloadProgressBar(tqdm): def update_to(self, b=1, bsize=1, tsize=None): if tsize is not None: self.total = tsize return self.update(b * bsize - self.n) def _download_and_extract(url, extract_dir=".", verbose=False): """ Downloads a zip file from a given URL and extracts it to a specified directory. Parameters: url (str): The URL of the zip file to download. extract_dir (str, optional): The directory to extract the zip file to. Defaults to ".". verbose (bool, optional): Whether to print info messages. Defaults to False. """ if verbose: print(f"Downloading {url}") try: with _DownloadProgressBar( unit="B", unit_scale=True, miniters=1, desc=url.split("/")[-1] ) as t: zip_path, _ = urllib.request.urlretrieve(url, reporthook=t.update_to) except urllib.request.HTTPError as e: raise RuntimeError(f"Failed to download {url}: {e}") with zipfile.ZipFile(zip_path, "r") as f: f.extractall(extract_dir) def _download_arts_data(catname, download_dir=None, version=None, verbose=False): """ Download and extract the ARTS XML and catalog data files from github. This function sets the ARTS data search path to the downloaded data directories, so that ARTS can find the required data files. If the environment variable ARTS_DATA_PATH or ARTS_INCLUDE_PATH is set, it is assumed that the user wants to use their own catalog locations and this function does nothing. Parameters: download_dir (str, optional): The directory where the data files will be stored. ggkIf not provided, the default is `~/.cache/arts`. version (str, optional): The version of ARTS to download the data files for. The default is the version of the currently installed pyarts package. verbose (bool, optional): Whether to print info messages. Defaults to False. """ if os.getenv("ARTS_DATA_PATH"): if verbose: print("Skipping download, environment variable ARTS_DATA_PATH already set.") return if os.getenv("ARTS_INCLUDE_PATH"): if verbose: print( "Skipping download, environment variable ARTS_INCLUDE_PATH already set." ) return if download_dir is None: download_dir = os.path.join(os.path.expanduser("~"), ".cache", "arts") if version is None: from pyarts import __version__ version = __version__ GITHUB_URL = f"https://github.com/atmtools/arts/releases/download/v{version}/" if int(version[-1]) % 2: raise RuntimeError( f"Version {version} is not a release version.\n" f"Please check out the current catalogs with svn instead." ) catname = catname + "-" + version catdir = os.path.join(download_dir, catname) if not os.path.exists(catdir): os.makedirs(download_dir, exist_ok=True) _download_and_extract( GITHUB_URL + catname + ".zip", download_dir, verbose=verbose ) parameters.datapath.append(catdir) return catdir
[docs] def download_arts_xml_data(download_dir=None, version=None, verbose=False): """ Download and extract the ARTS XML data files from github. This function sets the ARTS data search path to the downloaded data directories, so that ARTS can find the required data files. If the environment variable ARTS_DATA_PATH or ARTS_INCLUDE_PATH is set, it is assumed that the user wants to use their own catalog locations and this function does nothing. Parameters: download_dir (str, optional): The directory where the data files will be stored. ggkIf not provided, the default is `~/.cache/arts`. version (str, optional): The version of ARTS to download the data files for. The default is the version of the currently installed pyarts package. verbose (bool, optional): Whether to print info messages. Defaults to False. """ return _download_arts_data("arts-xml-data", download_dir, version, verbose)
[docs] def download_arts_cat_data(download_dir=None, version=None, verbose=False): """ Download and extract the ARTS catalog data files from github. This function sets the ARTS data search path to the downloaded data directories, so that ARTS can find the required data files. If the environment variable ARTS_DATA_PATH or ARTS_INCLUDE_PATH is set, it is assumed that the user wants to use their own catalog locations and this function does nothing. Parameters: download_dir (str, optional): The directory where the data files will be stored. ggkIf not provided, the default is `~/.cache/arts`. version (str, optional): The version of ARTS to download the data files for. The default is the version of the currently installed pyarts package. verbose (bool, optional): Whether to print info messages. Defaults to False. """ return _download_arts_data("arts-cat-data", download_dir, version, verbose)
[docs] def to_atmospheric_field( data: xarray.Dataset, remap: None | dict[str, str] = None, ignore: None | list[str] = None, *, atm: None | pyarts.arts.AtmField = None, ) -> pyarts.arts.AtmField: """ Populates a ~pyarts.arts.AtmField from an xarray Dataset-like structure Parameters ---------- data : xarray.Dataset A dataset. Coordinates should contain 'alt', 'lat', and 'lon'. All other keys() should be assignable to the atm-field by name. remap : None | dict[str, str], optional All names in this optional dict are renamed when accessing the dataset. For example, if the altitude-grid is called 'Alt' instead of 'alt', the dict should contain {..., 'alt': 'Alt', ...}. ignore : None | list[str], optional Ignore keys listed from assignment into the atmospheric field. atm : None | pyarts.arts.AtmField The default atmospheric field to use. Defaults to None to use default- constructed atmospheric field object. Returns ------- atm : pyarts.arts.AtmField An atmospheric field """ if ignore is None: ignore = [] if remap is None: remap = {} alt = ( getattr(data, remap.get("alt", "alt")).data.flatten() if "alt" not in ignore else np.array([0]) ) lat = ( getattr(data, remap.get("lat", "lat")).data.flatten() if "lat" not in ignore else np.array([0]) ) lon = ( getattr(data, remap.get("lon", "lon")).data.flatten() if "lon" not in ignore else np.array([0]) ) GF3 = pyarts.arts.GriddedField3( name="Generic", data=np.zeros((alt.size, lat.size, lon.size)), grid_names=["Altitude", "Latitude", "Longitude"], grids=[alt, lat, lon], ) if atm is None: atm = pyarts.arts.AtmField() atm.top_of_atmosphere = max(alt) for k in data.keys(): try: if k in ignore: continue k = remap.get(k, k) kstr = str(k) GF3.dataname = kstr np.asarray(GF3.data).flat[:] = getattr(data, kstr).data.flat[:] atm[k] = GF3 except Exception as e: raise Exception(f"{e}\n\nFailed to assign key to atm: {k}") return atm
[docs] def to_absorption_species( atm_field: pyarts.arts.AtmField, ) -> pyarts.arts.ArrayOfArrayOfSpeciesTag: """Scans the ARTS data path for species relevant to the given atmospheric field. The scan is done over files in an arts-cat-data like directory structure. Args: atm_field (pyarts.arts.AtmField): A relevant atmospheric field. Returns: pyarts.arts.ArrayOfArrayOfSpeciesTag: All found species tags. The intent is that this is enough information to use pyarts.workspace.Workspace.ReadCatalogData """ species = atm_field.species_keys() out = [] for spec in species: out.append(f"{spec}") if pyarts.arts.file.find_xml(f"xsec/{spec}-XFIT") is not None: out.append(f"{spec}-XFIT") for spec2 in species: if pyarts.arts.file.find_xml(f"cia/{spec}-CIA-{spec2}"): out.append(f"{spec}-CIA-{spec2}") if pyarts.arts.file.find_xml(f"cia/{spec2}-CIA-{spec}"): out.append(f"{spec2}-CIA-{spec}") if pyarts.arts.file.find_xml(f"cia/{spec}-CIA-{spec}") is not None: out.append(f"{spec}-CIA-{spec}") if spec == pyarts.arts.SpeciesEnum.Water: out.append("H2O-ForeignContCKDMT400") out.append("H2O-SelfContCKDMT400") elif spec == pyarts.arts.SpeciesEnum.CarbonDioxide: out.append("CO2-CKDMT252") elif spec == pyarts.arts.SpeciesEnum.Oxygen: out.append("O2-SelfContStandardType") elif spec == pyarts.arts.SpeciesEnum.Nitrogen: out.append("N2-SelfContStandardType") return pyarts.arts.ArrayOfArrayOfSpeciesTag(np.unique(out))
[docs] def xarray_open_dataset(filename_or_obj, *args, **kwargs): """Wraps xarray.open_dataset to search for files in the current and in ARTS' data path. All args and kwargs are passed on to xarray.open_dataset directly. Any FileNotFoundError will be caught and just raised if no path works. Args: filename_or_obj (_type_): _description_ Raises: FileNotFoundError: _description_ Returns: _type_: _description_ """ try: return xarray.open_dataset(filename_or_obj, *args, **kwargs) except FileNotFoundError: pass for p in parameters.datapath: try: return xarray.open_dataset(os.path.join(p, filename_or_obj), *args, **kwargs) except FileNotFoundError: pass raise FileNotFoundError(f"File not found in ARTS data path ({parameters.datapath}): {filename_or_obj}")