source: framspy/framsfiles/reader/_all.py @ 1142

Last change on this file since 1142 was 1104, checked in by Maciej Komosinski, 4 years ago

Added the "framsfiles" module for reading and writing Framsticks-format files (genotypes, settings, scrips, neurons, etc.)

File size: 14.1 KB
RevLine 
[1104]1import os.path
2import re as _re
3import warnings
4
5from framsfiles._context import _create_specs_from_xml
6
7warnings.simplefilter('always', UserWarning)
8
9_INT_FLOAT_REGEX = r'([+|-]?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?'
10_NATURAL_REGEX = r'(?:0|[1-9]\d*)'
11_HEX__NUMBER_REGEX = r'[+|-]?0[xX][\da-fA-F]*'
12_NUMBER_REGEX = '({}|{})'.format(_HEX__NUMBER_REGEX, _INT_FLOAT_REGEX)
13_TYLDA_REGEX = '(?<![\\\\])(~)'
14_QUOTE_REGEX = '(?<![\\\\])(")'
15_ESCAPED_QUOTE_REGEX = '\\\\"'
16_ESCAPED_TAB_REGEX = '\\\\t'
17_ESCAPED_NEWLINE_REGEX = '\\\\n'
18_ESCAPED_TYLDA_REGEX = '\\\\~'
19_FRAMSCRIPT_XML_PATH = os.path.join((os.path.dirname(__file__)), "framscript.xml")
20
21# Messages:
22_NO_FILE_EXTENSION_WARNING = "No file extension found. Setting default context."
23_UNSUPPORTED_EXTENSION_WARNING = "Unsupported file extension: '{}'. Setting default context."
24_UNSUPPORTED_CONTEXT_WARNING = "Unsupported context: '{}'. Setting default context."
25_UNEXPECTED_KEY_WARNING = "Unexpected key encountered: key: '{}', class: '{}', context: '{}' )"
26_NOT_A_NUMBER_ERROR = "Expression cannot be parsed to a number: {}"
27_MULTILINE_NOT_CLOSED_WARNING = "Multiline property for key: '{}' was not closed with '~'."
28_STRING_NOT_CLOSED_ERROR = "String expression not closed with '~'"
29_EMPTY_SERIALIZED_ERROR = "Empty value for '@Serialized' not allowed."
30_NO_OBJECT_ERROR = "No object defined for the current line."
31_XYZ_ERROR = "XYZ format should look like this: XYZ[ a,b,c], Got: '{}'"
32_REFERENCE_FORMAT_ERROR = "reference sign '^' should be followed by an integer. Got: {}"
33_COLON_EXPECTED_ERROR = "Colon ':' was expected. Got: {}"
34_MIN_VAL_EXCEEDED_ERROR = "Minimum value allowed: {}, got: {}"
35_MAX_VAL_EXCEEDED_ERROR = "Maximum value allowed: {}, got: {}"
36_NONEMPTY_CLASSNAME = "There should be no string after obejct's classname."
37
38
39_specs, _contexts = _create_specs_from_xml()
40
41
42def _create_generic_parser(dtype, min=None, max=None):
43    def parse(x):
44        x = dtype(x)
45        if min is not None:
46            if x < min:
47                raise ValueError(_MIN_VAL_EXCEEDED_ERROR.format(min, x))
48        if max is not None:
49            if x > max:
50                raise ValueError(_MAX_VAL_EXCEEDED_ERROR.format(max, x))
51        return x
52
53    return parse
54
55
56def _str_to_number(s):
57    assert isinstance(s, str)
58    s = s.strip()
59
60    try:
61        parsed_int = int(s, 0)
62        return parsed_int
63    except ValueError:
64        pass
65    try:
66        parsed_float = float(s)
67        return parsed_float
68
69    except ValueError:
70        pass
71    raise ValueError(_NOT_A_NUMBER_ERROR.format(s))
72
73
74def parse_value(value, classname=None, key=None, context=None, autoparse=True):
75    assert isinstance(value, str)
76    value = value.strip()
77    # TODO maybe check 'Global context' as well?
78    if (context, classname) in _specs:
79        spec = _specs[(context, classname)]
80        if key in spec:
81            parser = _create_generic_parser(**spec[key])
82            return parser(value)
83        else:
84            warnings.warn(_UNEXPECTED_KEY_WARNING.format(key, classname, context))
85
86    if value.startswith("@Serialized:"):
87        prop = value.split(":", 1)[1]
88        prop = deserialize(prop)
89        return prop
90    elif autoparse:
91        try:
92            parsed_number = _str_to_number(value)
93            return parsed_number
94        except ValueError:
95            pass
96    return value
97
98
99def _extract_string(exp):
100    exp = exp[1:]
101    str_end_match = _re.search(_QUOTE_REGEX, exp)
102    if str_end_match is None:
103        raise ValueError(_STRING_NOT_CLOSED_ERROR.format(exp))
104    str_end = str_end_match.span()[0]
105    s = exp[:str_end]
106    reminder = exp[str_end + 1:]
107    s = _re.sub(_ESCAPED_QUOTE_REGEX, '"', s)
108    s = _re.sub(_ESCAPED_TAB_REGEX, '\t', s)
109    s = _re.sub(_ESCAPED_NEWLINE_REGEX, '\n', s)
110    return s, reminder
111
112
113def _extract_number(exp):
114    match = _re.match(_NUMBER_REGEX, exp)
115    number_as_str = match.group()
116    reminder = exp[match.span()[1]:]
117    number = _str_to_number(number_as_str)
118    return number, reminder
119
120
121# TODO maybe do it nicer??
122def _extract_xyz(exp):
123    exp = exp.strip()
124    if not exp.startswith('XYZ['):
125        raise ValueError(_XYZ_ERROR.format(exp))
126    exp = exp[4:]
127    x, exp = _extract_number(exp)
128    x = float(x)
129    exp = exp.strip()
130    if exp[0] != ',':
131        raise ValueError(_XYZ_ERROR.format(exp))
132    exp = exp[1:]
133    y, exp = _extract_number(exp)
134    y = float(y)
135    exp = exp.strip()
136    if exp[0] != ',':
137        raise ValueError(_XYZ_ERROR.format(exp))
138    exp = exp[1:]
139    z, exp = _extract_number(exp)
140    z = float(z)
141    exp = exp.strip()
142    if exp[0] != ']':
143        raise ValueError(_XYZ_ERROR.format(exp))
144    return (x, y, z), exp[1:]
145
146
147def _extract_reference(exp):
148    exp = exp[1:].strip()
149    i_match = _re.match(_NATURAL_REGEX, exp)
150    if i_match is None:
151        raise ValueError(_REFERENCE_FORMAT_ERROR.format(exp))
152    else:
153        end_i = i_match.span()[1]
154        ref_index = int(exp[:end_i])
155        reminder = exp[end_i:]
156    return ref_index, reminder
157
158
159def _extract_custom_object(exp):
160    open_braces = 0
161    open_sbrackets = 0
162    open_pbrackets = 0
163    # TODO maybe do it smarter?
164    suffix_end_match = _re.search('<|\[|\{]', exp)
165    if suffix_end_match is None:
166        # TODO
167        raise ValueError()
168
169    suffix_end_i = suffix_end_match.span()[0]
170    i = 0
171    for i, c in enumerate(exp[suffix_end_i:], start=suffix_end_i):
172        if c == '<':
173            open_pbrackets += 1
174        elif c == '[':
175            open_sbrackets += 1
176        elif c == '{':
177            open_braces += 1
178        elif c == '>':
179            open_pbrackets -= 1
180        elif c == ']':
181            open_sbrackets -= 1
182        elif c == '}':
183            open_braces -= 1
184
185        if open_braces == 0 and open_sbrackets == 0 and open_pbrackets == 0:
186            break
187    if open_braces != 0 or open_sbrackets != 0 or open_pbrackets != 0:
188        # TODO
189        raise ValueError()
190    return exp[0:i + 1], exp[i + 1:]
191
192
193def deserialize(expression):
194    stripped_exp = expression.strip()
195    if stripped_exp == '':
196        raise ValueError(_EMPTY_SERIALIZED_ERROR)
197    # Just load with json ...
198
199    if stripped_exp == 'null':
200        return None
201
202    objects = []
203    references = []
204    main_object_determined = False
205    main_object = None
206    expect_dict_value = False
207    last_dict_key = None
208    exp = stripped_exp
209    opened_lists = 0
210    opened_dicts = 0
211
212    while len(exp) > 0:
213        current_object_is_reference = False
214        if main_object_determined and len(objects) == 0:
215            raise ValueError(_NO_OBJECT_ERROR)
216        if expect_dict_value:
217            if exp[0] == ':':
218                exp = exp[1:].strip()
219            else:
220                raise ValueError(_COLON_EXPECTED_ERROR.foramt(exp[0]))
221        # List continuation
222        # TODO support for XYZ tuples
223        if exp[0] == ",":
224            if not (isinstance(objects[-1], list) or (isinstance(objects[-1], dict) and not expect_dict_value)):
225                # TODO msg
226                raise ValueError()
227            else:
228                exp = exp[1:].strip()
229
230        if exp[0] == "]":
231            if not isinstance(objects[-1], list):
232                # TODO msg
233                raise ValueError()
234            else:
235                opened_lists -= 1
236                objects.pop()
237                exp = exp[1:].strip()
238                continue
239        elif exp[0] == "}":
240            opened_dicts -= 1
241            if not isinstance(objects[-1], dict):
242                # TODO msg
243                raise ValueError()
244            else:
245                objects.pop()
246                exp = exp[1:].strip()
247                continue
248        # List start
249        elif exp.startswith("null"):
250            current_object = None
251            exp = exp[4:]
252        elif exp.startswith("XYZ"):
253            current_object, exp = _extract_xyz(exp)
254        elif exp[0] == "[":
255            current_object = list()
256            opened_lists += 1
257            exp = exp[1:]
258        elif exp[0] == "{":
259            current_object = dict()
260            opened_dicts += 1
261            exp = exp[1:]
262        elif exp[0] == '"':
263            current_object, exp = _extract_string(exp)
264        elif _re.match(_NUMBER_REGEX, exp) is not None:
265            current_object, exp = _extract_number(exp)
266        elif exp[0] == '^':
267            i, exp = _extract_reference(exp)
268            if i >= len(references):
269                # TODO msg
270                raise ValueError()
271            current_object = references[i]
272            current_object_is_reference = True
273        else:
274            current_object, exp = _extract_custom_object(exp)
275
276        if len(objects) > 0:
277            if isinstance(objects[-1], list):
278                objects[-1].append(current_object)
279            elif isinstance(objects[-1], dict):
280                if expect_dict_value:
281                    objects[-1][last_dict_key] = current_object
282                    last_dict_key = None
283                    expect_dict_value = False
284                else:
285                    if not isinstance(current_object, str):
286                        # TODO msg
287                        raise ValueError()
288                    last_dict_key = current_object
289                    expect_dict_value = True
290
291        if isinstance(current_object, (list, dict, tuple)) and not current_object_is_reference:
292            objects.append(current_object)
293            references.append(current_object)
294        if not main_object_determined:
295            main_object_determined = True
296            main_object = current_object
297        exp = exp.strip()
298
299    if opened_lists != 0:
300        # TODO msg
301        raise ValueError()
302    if opened_dicts != 0:
303        # TODO msg
304        raise ValueError()
305    return main_object
306
307
308def loads(input_string, context=None, autocast=True):
309    """
310    Parses string in Framsticks' format to a list of dictionaries.
311    :param input_string: String to parse.
312    :param context: Context of parsing compliant with contexts found in 'framscript.xml' e.g. 'expdef file'.
313    :param autocast: If true numbers will be parsed automatically if possible.
314    If false every field will be treated as a string.
315    :return: A list of dictionaries representing Framsticks objects.
316    """
317    assert isinstance(input_string, str)
318    if context is not None and context not in _contexts:
319        warnings.warn(_UNSUPPORTED_CONTEXT_WARNING.format(context))
320
321    lines = input_string.split("\n")
322    multiline_value = None
323    multiline_key = None
324    current_object = None
325    objects = []
326    parsing_error = False
327    class_name = None
328    try:
329        for line_num, line in enumerate(lines):
330
331            if multiline_key is not None:
332                endmatch = _re.search(_TYLDA_REGEX, line)
333                if endmatch is not None:
334                    endi = endmatch.span()[0]
335                    value = line[0:endi]
336                    reminder = line[endi + 1:].strip()
337                    if reminder != "":
338                        # TODO msg
339                        raise ValueError()
340                else:
341                    value = line + "\n"
342
343                if _re.search(_TYLDA_REGEX, value) is not None:
344                    # TODO msg
345                    raise ValueError()
346                value = _re.sub(_ESCAPED_TYLDA_REGEX, '~', value)
347                multiline_value += value
348                if endmatch is not None:
349                    current_object[multiline_key] = multiline_value
350                    multiline_value = None
351                    multiline_key = None
352
353            # Ignores comment lines (if outside multiline prop)
354            elif line.startswith("#"):
355                continue
356            else:
357                line = line.strip()
358                if current_object is not None:
359                    if line == "":
360                        current_object = None
361                        continue
362                else:
363                    if ":" in line:
364                        class_name, suffix = line.split(":", 1)
365                        if suffix != "":
366                            raise ValueError(_NONEMPTY_CLASSNAME)
367                        current_object = {"_classname": class_name}
368                        objects.append(current_object)
369                        continue
370
371                if current_object is not None:
372                    key, value = line.split(":", 1)
373                    # TODO check if the key is supported for given class
374                    if key.strip() == "":
375                        # TODO msg
376                        raise ValueError()
377                    if value.strip() == "~":
378                        multiline_value = ""
379                        multiline_key = key
380                    else:
381                        value = parse_value(value, classname=class_name, key=key, context=context, autoparse=autocast)
382                        current_object[key] = value
383    except ValueError as ex:
384        parsing_error = True
385        error_msg = str(ex)
386
387    if multiline_key is not None:
388        current_object[multiline_key] = multiline_value
389        warnings.warn(_MULTILINE_NOT_CLOSED_WARNING.format(multiline_key))
390
391    if parsing_error:
392        error_msc = "Parsing error. Incorrect syntax in line {}:\n{}\n{}".format(line_num, error_msg, line)
393        raise ValueError(error_msc)
394
395    return objects
396
397
398def load(filename, context=None, autocast=True):
399    """
400    Parses the file with a given filename to a list of dictionaries.
401    :param filename: Name of the file to parse.
402    :param context: Context of parsing compliant with contexts found in 'framscript.xml' e.g. 'expdef file'.
403    If context is left emtpy it will be inferred from the file's extension/
404    :param autocast: If true numbers will be parsed automatically if possible.
405        If false every field will be treated as a string.
406        :return: A list of dictionaries representing Framsticks objects.
407    """
408    file = open(filename, encoding='UTF-8')
409    if context is None:
410        try:
411            _, extension = filename.split(".")
412            context = extension + " file"
413            if context not in _contexts:
414                context = None
415                warnings.warn(_UNSUPPORTED_EXTENSION_WARNING.format(extension))
416        except RuntimeError:
417            warnings.warn(_NO_FILE_EXTENSION_WARNING)
418            context = None
419    s = file.read()
420    file.close()
421    return loads(s, context=context, autocast=autocast)
Note: See TracBrowser for help on using the repository browser.