# -*- coding: utf-8 -*-
"""This module provides functionality for reading and writing ARTS XML files.
"""
import gzip
import glob
import itertools
import os
from os.path import isfile, join, basename, splitext, dirname
from . import read
from . import write
__all__ = [
'load',
'save',
'load_directory',
'load_indexed',
'make_binary',
'make_directory_binary',
]
[docs]def save(var, filename, precision='.7e', format='ascii', comment=None,
parents=False):
"""Save a variable to an ARTS XML file.
Args:
var: Variable to be stored.
filename (str): Name of output XML file.
If the name ends in .gz, the file is compressed on the fly.
precision (str): Format for output precision.
format (str): Output format: 'ascii' (default) or 'binary'.
comment (str): Comment string included in a tag above data.
parents (bool): Create missing parent directories.
Note:
Python's gzip module is extremely slow in writing. Consider
compressing files manually after writing them normally.
Example:
>>> x = numpy.array([1.,2.,3.])
>>> pyarts.xml.save(x, 'myvector.xml')
"""
if parents:
os.makedirs(dirname(filename), exist_ok=True)
if filename.endswith('.gz'):
if format != 'ascii':
raise RuntimeError(
'For zipped files, the output format must be "ascii"')
xmlopen = gzip.open
else:
xmlopen = open
with xmlopen(filename, mode='wt', encoding='UTF-8') as fp:
if format == 'binary':
with open(filename + '.bin', mode='wb') as binaryfp:
axw = write.ARTSXMLWriter(fp, precision=precision,
binaryfp=binaryfp)
axw.write_header()
if comment is not None:
axw.write_comment(comment)
axw.write_xml(var)
axw.write_footer()
elif format == 'ascii':
axw = write.ARTSXMLWriter(fp, precision=precision)
axw.write_header()
if comment is not None:
axw.write_comment(comment)
axw.write_xml(var)
axw.write_footer()
else:
raise RuntimeError('Unknown output format "{}".'.format(format))
[docs]def load(filename):
"""Load a variable from an ARTS XML file.
The input file can be either a plain or gzipped XML file
Args:
filename (str): Name of ARTS XML file.
Returns:
Data from the XML file. Type depends on data in file.
Example:
>>> pyarts.xml.load('tests/reference/matrix.xml')
array([[ 0., 1.],
[ 2., 3.]])
"""
# If file is not found, try the gzipped version.
if not isfile(filename):
if not isfile(filename + '.gz'):
raise FileNotFoundError("No such file: '{}'".format(filename))
else:
filename += '.gz'
if filename.endswith('.gz'):
xmlopen = gzip.open
else:
xmlopen = open
binaryfilename = filename + '.bin'
with xmlopen(filename, 'rb') as fp:
if isfile(binaryfilename):
with open(binaryfilename, 'rb',) as binaryfp:
return read.parse(fp, binaryfp).getroot().value()
else:
return read.parse(fp).getroot().value()
[docs]def load_directory(directory, exclude=None):
"""Load all XML files in a given directory.
Search given directory for files with ``.xml`` or ``.xml.gz`` extension
and try to load them using :func:`load`.
Parameters:
directory (str): Path to the directory.
exclude (Container[str]): Filenames to exclude.
Returns:
dict: Filenames without extension are keys for the file content.
Example:
Load all files in ``foo`` except for the lookup table in
``abs_lookup.xml.``
>>> load_directory('foo', exclude=['abs_lookup.xml'])
"""
def includefile(f):
"""Check if to include file."""
return basename(f) not in exclude if exclude is not None else True
def stripext(f):
"""Strip the extension of a filename."""
return splitext(f)[0]
# Create a generator yielding all XML files to load (not excluded).
xmlfiles = filter(includefile, glob.iglob(join(directory, '*.xml')))
# Remove extension from zipped files to keep dictionary keys clean.
# The `load` function looks for zipped files anyway.
gzfiles = filter(includefile, glob.iglob(join(directory, '*.xml.gz')))
gzfiles = map(stripext, gzfiles)
# Store XML file contents in a dictionary, using the filename as key.
return {stripext(basename(f)): load(f)
for f in itertools.chain(xmlfiles, gzfiles)}
[docs]def load_indexed(filename):
"""Load all indexed XML files matching the given filename.
The function searches all files matching the pattern
``<filename>.<file_index>.xml`` or ``<filename>.<file_index>.xml.gz``.
A list with the loaded file contents is returned. The list indices are
equivalent to the file indices.
Parameters:
filename (str): Filename.
Returns:
list: List of file contents.
Example:
Load all files matching the pattern ``foo.<file_index>.xml``.
>>> load_indexed_xml('foo')
"""
iidx = -2 # Relative position of fileindex in splitted filename.
# Get all files matching the indexed filename format.
files = glob.glob('{}.*.xml'.format(filename))
# If no files are found, try the gzipped version.
if len(files) == 0:
files = glob.glob('{}.*.xml.gz'.format(filename))
iidx = -3 # Correct fileindex position for gzipped files.
# Extract indices from filenames.
maxindex = max(int(x.split('.')[iidx]) for x in files)
# Pre-allocate a list according to the maximum index found.
ret = (maxindex + 1) * [None]
# Fill list with file contents (file index matching list index).
for f in files:
findex = int(f.split('.')[iidx])
ret[findex] = load(f)
return ret
[docs]def make_binary(filename, out='', absolute_out=False, parents=True):
"""Loads xml-file at filename and saves it back in binary format
Parameters:
filename (str): Filename path.
out (str): Path to save the binary. Empty causes overwrite of file.
absolute_out (bool): If true, then write file to out-path rather than
to the relative path out. Does nothing if file is in the working
folder and out is relative.
parents (bool): Create missing parent directories.
Returns:
str: Path to the created binary file.
Example:
Load t_field.xml and save it back as binary it as ./binary/t_field.xml
and ./binary/t_field.bin
>>> make_binary('t_field.xml', out='binary')
'binary/t_field.xml'
"""
xml_data = load(filename)
if absolute_out:
outfile = join(out, basename(filename))
else:
outfile = join(dirname(filename), out, basename(filename))
save(xml_data, outfile, format='binary', parents=parents)
return outfile
[docs]def make_directory_binary(directory, out='', absolute_out=False, parents=True):
"""Loads xml-files in directory and saves them back in binary format
Parameters:
directory (str): Directory path.
out (str): Path to save the binary.
absolute_out (bool): If true, then write file to out-path rather than
to the relative path out. Does nothing if file is in the working
folder and out is relative.
parents (bool): Create missing parent directories.
Returns:
list[str]: Paths to the created binary files.
Example:
Load arts-xml-data/spectroscopy/cia/hitran2011/ and save it back as
binary it at arts-xml-data-binary/spectroscopy/cia/hitran2011/
>>> make_directory_binary('arts-xml-data/spectroscopy/cia/hitran2011',
out='arts-xml-data-binary/spectroscopy/cia/hitran2011',
absolute_out=True)
['arts-xml-data-binary/spectroscopy/cia/hitran2011/hitran_cia2012_adapted.xml']
"""
directory_of_xmls = load_directory(directory)
outfiles = [] # Empty list to store output filepaths.
if absolute_out:
get_outfile = join(out, '{entry}.xml')
else:
get_outfile = join(directory, out, '{entry}.xml')
for entry in directory_of_xmls:
outfile = get_outfile.format(entry=entry)
save(directory_of_xmls[entry],
outfile,
format='binary',
parents=parents)
outfiles.append(outfile)
return outfiles