"""
This module implements a parse for ARTS controlfile. Its implemented
using lark, which greatly simplifies the parsing. Functions are provided
to transform the parsed controlfile to a Python script.
"""
import numpy as np
import re
from textwrap import indent
from lark import Lark, Transformer, Token
import pyarts.workspace.global_data as global_data
workspace_methods = global_data.get_raw_method_map()
workspace_variables = global_data.get_variables_map()
group_names = [str(x.name) for x in global_data.cxx.globals.get_wsv_groups()]
grammar = r"""
controlfile : statement*
statement : agenda
| comment
| function
| include
| agenda_definition
| agenda_append
include : "INCLUDE " STRING
agenda : CNAME "{" statement* "}"
agenda_definition : "AgendaSet" "(" CNAME ")" "{" statement* "}"
agenda_append : "ArrayOfAgendaAppend" "(" CNAME ")" "{" statement* "}"
comment : /[ \t]*#.*\n/
function : CNAME ("(" arguments ")")?
arguments : named_arguments
| positional_arguments
named_arguments : [argument_pair ("," argument_pair? )*]
argument_pair : comment* CNAME comment* "=" comment* value comment*
positional_arguments: (comment* value comment* | comment+) (("," comment* value) | comment+)*
list : "[" ((comment* value comment*)? (comment* ("," ) comment* value | comment+)*
| nested_list (";" nested_list ";"? comment? | comment)+) "]"
nested_list : (comment* value comment*) (comment* (",") comment* value | comment)*
matrix : "[" ( | comment)? (comment? ("," | ";") comment? value | comment)* "]"
empty_list : "[" "]"
?value : STRING
| SIGNED_FLOAT -> number
| SIGNED_INT
| list
| CNAME
| comment
DOUBLE_QUOTED_STRING : /"[^"]*"/
SINGLE_QUOTED_STRING : /'[^']*'/
STRING : (SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING)
%import common.SIGNED_FLOAT
%import common.SIGNED_INT
%import common.CNAME
%import common.WS
%import common.WS_INLINE
%import common.NEWLINE
%ignore WS
"""
arts_parser = Lark(grammar, start="controlfile", debug=False, parser="earley")
################################################################################
# Python representation of syntax elements
################################################################################
replace_array = re.compile(r'array\(([^\)]*)\)')
replace_dtype = re.compile(r'dtype=([^\s]+)')
def to_python(obj, workspace):
"""
Generic function to write elements of a controlfile AST in
Python syntax. For classes defined below the to_python member
function is called. Arrays are printed so that they are parsed as
numpy arrays. Strings are escaped. Other objects (int, list of int)
are just converted to a string.
Arguments:
obj: Element of controlfile AST to write in Python syntax
workspace: Variable name to use for workspace.
"""
if hasattr(obj, "to_python"):
return obj.to_python(workspace)
elif isinstance(obj, np.ndarray):
if obj.size == 0:
return "[]"
s = repr(obj)
s = replace_array.sub(r"np.array(\1)", s)
s = replace_dtype.sub(r"dtype=np.\1", s)
return s
elif isinstance(obj, str):
return "\"" + str(obj) + "\""
else:
return str(obj)
class WSMCall:
"""
Represents a call of a WSM.
Attributes:
name: Name of the WSM that is called
args: Positional arguments of the WSM call
kwargs: Named arguments of the call
"""
def __init__(self, name, args, kwargs):
if not name in workspace_methods:
raise Exception("{} is not a known workspace method.".format(name))
self.wsm = workspace_methods[name]
self.wsm_outs = [global_data.get_variable_name(m) for m in self.wsm.outs]
self.wsm_gouts = list(self.wsm.g_out)
self.wsm_ins = [global_data.get_variable_name(m) for m in self.wsm.ins \
if not m in self.wsm.outs]
self.wsm_gins = list(self.wsm.g_in)
self.arg_names = self.wsm_outs + self.wsm_gouts + self.wsm_ins + self.wsm_gins
self.name = name
self.args = args
self.kwargs = kwargs
if not kwargs is None:
if "in" in kwargs:
self.kwargs_to_args()
def kwargs_to_args(self):
"""
Convert function call from named arguments to positional arguments.
"""
if self.kwargs == None:
return None
args = []
for n in self.wsm_outs:
if not n in self.kwargs:
args.append(WSV(n))
else:
args.append(self.kwargs[n])
for n in self.wsm_gouts:
args.append(self.kwargs[n])
for n in self.wsm_ins:
if not n in self.kwargs:
args.append(WSV(n))
else:
args.append(self.kwargs[n])
for n in self.wsm_gins:
if not n in self.kwargs:
i = self.wsm.g_in.index(n)
args.append(global_data.convert(self.wsm.g_in_types[i], self.wsm.g_in_default[i]))
else:
args.append(self.kwargs[n])
self.kwargs = None
self.args = args
def __repr__(self):
"""
Print WSM call in ARTS script.
"""
if self.args is None and self.kwargs is None:
return self.name + "()\n"
s = self.name + "("
if self.kwargs is None:
for a in self.args[:-1]:
s += str(a) + ", "
s += str(self.args[-1]) + ")\n"
if self.args is None:
for k in list(self.kwargs.keys())[:-1]:
s += str(k) + "=" + str(self.kwargs[k]) + ", "
k = list(self.kwargs.keys())[-1]
s += str(k) + "=" + str(self.kwargs[k]) + ")\n"
return s
def convert_argument(self, name, value):
"""
Tries to infer type of argument based on types of input and
generic input.
"""
if isinstance(value, WSV):
return value
if name in self.wsm_ins:
v = workspace_variables[name]
value_converted = global_data.convert(v.group, value)
if not value_converted is None:
value = value_converted
if name in self.wsm_gins:
if len(self.wsm.g_in_types) == 1:
g = group_names[self.wsm.g_in_types[0]]
value_converted = global_data.convert(g, value)
if not value_converted is None:
value = value_converted
return value
def to_python(self, workspace = "ws"):
"""
Rewrite function call in Python.
"""
s = workspace + "." + self.name
if self.args is None and self.kwargs is None:
return s + "()\n"
else:
s += "("
if len(self.name) > 6 and self.name[-6:] == "Create":
if not self.args is None:
self.args[0] = self.args[0].name
if not self.kwargs is None:
k = self.kwargs.keys()
self.kwargs[k] = self.kwargs[k].name
if self.kwargs is None:
for a, n in zip(self.args[:-1], self.arg_names):
if not isinstance(a, WSV):
a = self.convert_argument(n, a)
s += to_python(a, workspace) + ", "
if len(self.args):
a = self.args[-1]
n = self.arg_names[len(self.args)-1]
if not isinstance(a, WSV):
a = self.convert_argument(n, a)
s += to_python(a, workspace)
s += ")\n"
if self.args is None:
keys = list(self.kwargs.keys())
for k in keys[:-1]:
a = self.kwargs[k]
if not isinstance(a, WSV):
a = self.convert_argument(k, a)
s += str(k) + "=" + to_python(a, workspace) + ", "
if len(keys):
k = keys[-1]
a = self.kwargs[keys[-1]]
if not isinstance(a, WSV):
a = self.convert_argument(k, a)
s += str(k) + "=" + to_python(a, workspace)
s += ")\n"
return s
class AgendaDefinition:
"""
An agenda defined in a controlfile.
Attributes:
name: Name of the agenda
content: List of statements in the agenda
"""
def __init__(self, name, content):
self.name = name
self.content = content
def __repr__(self):
"""
Print agenda definition in controlfile syntax.
"""
s = "AgendaSet(" + self.name + ") {\n"
for c in self.content:
s += str(c)
s += "}\n"
return s
def to_python(self, workspace):
"""
Print agenda definition in Python syntax.
"""
s = f"@arts_agenda(ws={workspace})\ndef " + self.name + "({}):\n".format(workspace)
cs = ""
for c in self.content:
cs += to_python(c, workspace)
s = s + indent(cs, " " * 4)
s += workspace + "." + self.name + " = " + self.name + "\n\n"
return s
class AgendaAppend:
"""
Weird ARTS syntax feature to append agenda to array.
Attributes:
name: Name of the agenda to append to
content: List of statements in the agenda
"""
def __init__(self, name, content):
self.name = name
self.content = content
def __repr__(self):
"""
Print agenda definition in controlfile syntax.
"""
s = "ArrayOfAgendaAppend" + self.name + ") {\n"
for c in self.content:
s += str(c)
s += "}\n"
return s
def to_python(self, workspace):
"""
Print agenda definition in Python syntax.
"""
s = f"@arts_agenda(ws={workspace})\ndef " + self.name + "({}):\n".format(workspace)
cs = ""
for c in self.content:
cs += to_python(c, workspace)
s = s + indent(cs, " " * 4) + "\n"
s += (workspace + ".Append(" + workspace + "." + self.name
+ ", " + self.name + ")\n\n")
return s
class Comment:
"""
A comment
"""
def __init__(self, text):
self.text = text
def __repr__(self):
"""
Print comment in controlfile syntax.
"""
return str(self.text)
def to_python(self, workspace):
"""
Print comment in Python syntax.
"""
return self.__repr__()
class Include:
"""
A INCLUDE statement.
"""
def __init__(self, name):
self.name = name
def __repr__(self):
"""
Print INCLUDE statement in controlfile syntax.
"""
return "INCLUDE " + "\"" + str(self.name) + "\"\n"
def to_python(self, workspace):
"""
Print INCLUDE statement in Python syntax.
"""
s = "ws.execute_controlfile(\"" + self.name + "\")\n"
return s
class WSV:
"""
A workspace variable.
Attributes:
name: Name of the WSV
"""
def __init__(self, name):
if name in workspace_methods:
name = camel_to_snake(name)
self.name = name
def __repr__(self):
"""
Print WSV in controlfile syntax.
"""
return self.name
def to_python(self, workspace):
"""
Print WSV in controlfile syntax.
"""
return workspace + "." + self.name
class Agenda:
"""
Class to represent the ARTS2 agenda which is the main
part of a controlfile.
Attributes:
name: Name of the agenda
content: The statement within the agenda.
"""
def __init__(self, name, content):
self.name = name
self.content = content
def __repr__(self):
"""
Print agenda in controlfile syntax.
"""
s = self.name + " {\n"
for c in self.content:
s += str(c)
s += "\n}"
return s
def to_python(self, workspace):
"""
Print agenda in Python syntax.
"""
s = """
import numpy as np
import pyarts
from pyarts.workspace import Workspace, arts_agenda
{} = Workspace(verbosity=0)
""".format(workspace)
for c in self.content:
s += to_python(c, workspace)
return s
class Controlfile:
"""
Class to represent a whole parse controlfile. Consists
of a number of comments and one Agenda object.
Attributes:
name: Name of the agenda
content: The statement within the agenda.
"""
def __init__(self, content):
self.content = content
def __repr__(self):
s = ""
for c in self.content:
s += str(c)
return s
def to_python(self, workspace):
s = ""
for c in self.content:
s += to_python(c, workspace)
return s
class ArtsTransformer(Transformer):
"""
Transformer to transformt lark AST into symbolic representation
of ARTS controlfile.
"""
def comment(self, s):
return Comment(s[0])
def SIGNED_INT(self, i):
i = int(i)
return int(i)
def include(self, i):
return Include(i[0])
def number(self, i):
return float(i[0])
def STRING(self, s):
return s[1:-1]
def arguments(self, c):
c = [e for e in c if not isinstance(e, Comment)]
return c
def list(self, c):
c = [e for e in c if not isinstance(e, Comment)]
return c
def nested_list(self, c):
c = [e for e in c if not isinstance(e, Comment)]
return c
def function(self, f):
if len(f) == 1:
return WSMCall(f[0], None, None)
else:
if type(f[1][0]) is list:
return WSMCall(f[0], f[1][0], None)
if type(f[1][0]) is dict:
return WSMCall(f[0], None, f[1][0])
def positional_arguments(self, c):
if not type(c) == list:
c = [c]
cs = []
for a in c:
if isinstance(a, Token) and a.type == "CNAME":
a = WSV(a)
cs += [a]
cs = [e for e in cs if not isinstance(e, Comment)]
return cs
def named_arguments(self, c):
return dict(c)
def argument_pair(self, p):
"""
Arguments pairs have a string on the left and a python
literal or a variable name on the right.
"""
p = [e for e in p if not isinstance(e, Comment)]
pl, pr = p
pl = str(pl)
if isinstance(pr, Token) and pr.type == "CNAME":
pr = WSV(pr)
return (pl, pr)
def statement(self, s):
return s[0]
def controlfile(self, c):
return Controlfile(c)
def agenda(self, c):
return Agenda(c[0], c[1:])
def agenda_definition(self, a):
return AgendaDefinition(a[0], a[1:])
def agenda_append(self, a):
return AgendaAppend(a[0], a[1:])
################################################################################
# Functions to convert ARTS controlfile to Python
################################################################################
[docs]
def convert_to_python(controlfile, output, workspace = "ws"):
with open(controlfile) as f:
source = f.read()
tree = arts_parser.parse(source)
t = ArtsTransformer().transform(tree)
s = t.to_python(workspace)
with open(output, "w") as f:
f.write(s)
pattern = re.compile(r'(?<!^)(?=[A-Z])')
def camel_to_snake(s):
s = pattern.sub('_', s).lower()
return s