"""Classes and methods for loading configurations from YAML files.
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
import os
import re
import warnings
import yaml
import six
from collections import namedtuple
SCIENTIFIC_NOTATION_REGEXP = r"^[\-\+]?(\d+\.?\d*|\d*\.?\d+)?[eE][\-\+]?\d+$"
IS_INITIALIZED = False
_BaseProxy = namedtuple("_BaseProxy", ["callable", "positionals", "keywords",
"yaml_src"])
class _Proxy(_BaseProxy):
"""An intermediate representation between initial YAML parse and
object instantiation.
Parameters
----------
callable : callable
The function/class to call to instantiate this node.
positionals : iterable
Placeholder for future support for positional
arguments (`*args`).
keywords : dict-like
A mapping from keywords to arguments (`**kwargs`), which may be
`_Proxy`s or `_Proxy`s nested inside `dict` or `list` instances.
Keys must be strings that are valid Python variable names.
yaml_src : str
The YAML source that created this node, if available.
Notes
-----
This is intended as a robust, forward-compatible intermediate
representation for either internal consumption or external
consumption by another tool e.g. hyperopt.
This particular class mainly exists to override `_BaseProxy`'s
`__hash__` (to avoid hashing unhashable namedtuple elements).
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
__slots__ = []
def __hash__(self):
"""Return a hash based on the object ID (to avoid hashing
unhashable namedtuple elements).
"""
return hash(id(self))
def bind(self, **kwargs):
"""Sets the values for specified keys.
"""
for k in kwargs:
if k not in self.keywords:
self.keywords[k] = kwargs[k]
def pop(self, key):
return self.keywords.pop(key)
def _do_not_recurse(value):
"""Function symbol used for wrapping an unpickled object
(which should not be recursively expanded).
This is recognized and respected by the instantiation parser.
Implementationally, no-op (returns the value passed in as an
argument).
Parameters
----------
value : object
The value to be returned.
Returns
-------
value : object
The same object passed in as an argument.
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
return value
def _instantiate_proxy_tuple(proxy, bindings=None):
""" Helper function for `_instantiate` that handles objects of the
`_Proxy` class.
Parameters
----------
proxy : _Proxy object
A `_Proxy` object that.
bindings : dict, optional
A dictionary mapping previously instantiated `_Proxy` objects
to their instantiated values.
Returns
-------
obj : object
The result object from recursively instantiating the object DAG.
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
if proxy in bindings:
return bindings[proxy]
else:
# Respect _do_not_recurse by just un-packing it (same as calling).
if proxy.callable == _do_not_recurse:
obj = proxy.keywords['value']
else:
if len(proxy.positionals) > 0:
raise NotImplementedError('positional arguments not yet '
'supported in proxy instantiation')
kwargs = dict((k, instantiate(v, bindings))
for k, v in six.iteritems(proxy.keywords))
obj = proxy.callable(**kwargs)
try:
obj.yaml_src = proxy.yaml_src
except AttributeError: # Some classes won't allow this.
pass
bindings[proxy] = obj
return bindings[proxy]
def _preprocess(string, environ=None):
"""Preprocesses a string.
Preprocesses a string, by replacing `${VARNAME}` with
`os.environ['VARNAME']` and ~ with the path to the user's
home directory.
Parameters
----------
string : str
String object to _preprocess
environ : dict, optional
If supplied, preferentially accept values from
this dictionary as well as `os.environ`. That is,
if a key appears in both, this dictionary takes
precedence.
Returns
-------
rval : str
The preprocessed string
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
if environ is None:
environ = {}
split = string.split('${')
rval = [split[0]]
for candidate in split[1:]:
subsplit = candidate.split('}')
if len(subsplit) < 2:
raise ValueError('Open ${ not followed by } before '
'end of string or next ${ in "' + string + '"')
varname = subsplit[0]
val = (environ[varname] if varname in environ
else os.environ[varname])
rval.append(val)
rval.append('}'.join(subsplit[1:]))
rval = ''.join(rval)
string = os.path.expanduser(string)
return rval
[docs]def instantiate(proxy, bindings=None):
"""Instantiate a hierarchy of proxy objects.
Parameters
----------
proxy : object
A `_Proxy` object or list/dict/literal. Strings are run through
`_preprocess`.
bindings : dict, optional
A dictionary mapping previously instantiated `_Proxy` objects
to their instantiated values.
Returns
-------
obj : object
The result object from recursively instantiating the object DAG.
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
if bindings is None:
bindings = {}
if isinstance(proxy, _Proxy):
return _instantiate_proxy_tuple(proxy, bindings)
elif isinstance(proxy, dict):
# Recurse on the keys too, for backward compatibility.
# Is the key instantiation feature ever actually used, by anyone?
return dict((instantiate(k, bindings), instantiate(v, bindings))
for k, v in six.iteritems(proxy))
elif isinstance(proxy, list):
return [instantiate(v, bindings) for v in proxy]
# In the future it might be good to consider a dict argument that provides
# a type->callable mapping for arbitrary transformations like this.
elif isinstance(proxy, six.string_types):
return _preprocess(proxy)
else:
return proxy
def load(stream, environ=None, instantiate=True, **kwargs):
"""Loads a YAML configuration from a string or file-like object.
Parameters
----------
stream : str or object
Either a string containing valid YAML or a file-like object
supporting the `.read()` interface.
environ : dict, optional
A dictionary used for ${FOO} substitutions in addition to
environment variables. If a key appears both in `os.environ`
and this dictionary, the value in this dictionary is used.
instantiate : bool, optional
If `False`, do not actually instantiate the objects but instead
produce a nested hierarchy of `_Proxy` objects.
**kwargs : dict
Other keyword arguments, all of which are passed to `yaml.load`.
Returns
-------
graph : dict or object
The dictionary or object (if the top-level element specified
a Python object to instantiate), or a nested hierarchy of
`_Proxy` objects.
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
global IS_INITIALIZED
if not IS_INITIALIZED:
_initialize()
if isinstance(stream, six.string_types):
string = stream
else:
string = stream.read()
return yaml.load(string, Loader=yaml.SafeLoader, **kwargs)
[docs]def load_path(path, environ=None, instantiate=False, **kwargs):
"""Convenience function for loading a YAML configuration from a
file.
Parameters
----------
path : str
The path to the file to load on disk.
environ : dict, optional
A dictionary used for ${FOO} substitutions in addition to
environment variables. If a key appears both in `os.environ`
and this dictionary, the value in this dictionary is used.
instantiate : bool, optional
If `False`, do not actually instantiate the objects but instead
produce a nested hierarchy of `_Proxy` objects.
**kwargs : dict
Other keyword arguments, all of which are passed to `yaml.load`.
Returns
-------
graph : dict or object
The dictionary or object (if the top-level element specified
a Python object to instantiate), or a nested hierarchy of
`_Proxy` objects.
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
with open(path, 'r') as f:
content = ''.join(f.readlines())
# This is apparently here to avoid the odd instance where a file gets
# loaded as Unicode instead (see 03f238c6d). It's rare instance where
# basestring is not the right call.
if not isinstance(content, str):
raise AssertionError("Expected content to be of type str, got " +
str(type(content)))
return load(content, instantiate=instantiate, environ=environ, **kwargs)
def _try_to_import(tag_suffix):
components = tag_suffix.split('.')
module_name = '.'.join(components[:-1])
try:
exec("import {0}".format(module_name))
except ImportError as e:
# We know it's an ImportError, but is it an ImportError related to
# this path,
# or did the module we're importing have an unrelated ImportError?
# and yes, this test can still have false positives, feel free to
# improve it
pieces = module_name.split('.')
str_e = str(e)
found = True in [piece.find(str(e)) != -1 for piece in pieces]
if found:
# The yaml file is probably to blame.
# Report the problem with the full module path from the YAML
# file
raise ImportError(
"Could not import {0}; ImportError was {1}".format(
module_name, str_e))
else:
pcomponents = components[:-1]
assert len(pcomponents) >= 1
j = 1
while j <= len(pcomponents):
module_name = '.'.join(pcomponents[:j])
try:
exec("import {0}".format(module_name))
except Exception:
base_msg = "Could not import {0}".format(module_name)
if j > 1:
module_name = '.'.join(pcomponents[:j - 1])
base_msg += " but could import {0}".format(module_name)
raise ImportError(
"{0}. Original exception: {1}".format(base_msg,
str(e)))
j += 1
try:
obj = eval(tag_suffix)
except AttributeError as e:
try:
# Try to figure out what the wrong field name was
# If we fail to do it, just fall back to giving the usual
# attribute error
pieces = tag_suffix.split('.')
module = '.'.join(pieces[:-1])
field = pieces[-1]
candidates = dir(eval(module))
msg = ("Could not evaluate {0}. "
"Did you mean {1}? "
"Original error was {2}".format(
tag_suffix, candidates, str(e)
))
except Exception:
warnings.warn("Attempt to decipher AttributeError failed")
raise AttributeError("Could not evaluate {0}. " +
"Original error was {1}".format(
tag_suffix, str(e)))
raise AttributeError(msg)
return obj
def _initialize():
"""
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
global IS_INITIALIZED
yaml.add_multi_constructor(
"!obj:", _multi_constructor_obj, Loader=yaml.SafeLoader)
yaml.add_multi_constructor(
"!import:", _multi_constructor_import, Loader=yaml.SafeLoader)
yaml.add_constructor(
"!import", _constructor_import, Loader=yaml.SafeLoader)
yaml.add_constructor(
"!float", _constructor_float, Loader=yaml.SafeLoader)
pattern = re.compile(SCIENTIFIC_NOTATION_REGEXP)
yaml.add_implicit_resolver("!float", pattern)
IS_INITIALIZED = True
def _multi_constructor_obj(loader, tag_suffix, node):
"""
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
yaml_src = yaml.serialize(node)
_construct_mapping(node)
mapping = loader.construct_mapping(node)
assert hasattr(mapping, 'keys')
assert hasattr(mapping, 'values')
for key in mapping.keys():
if not isinstance(key, six.string_types):
raise TypeError(
"Received non string object ({0}) as key in mapping.".format(
str(key)
))
if '.' not in tag_suffix:
# I'm not sure how this was ever working without eval().
callable = eval(tag_suffix)
else:
callable = _try_to_import(tag_suffix)
rval = _Proxy(callable=callable, yaml_src=yaml_src, positionals=(),
keywords=mapping)
return rval
def _multi_constructor_import(loader, tag_suffix, node):
"""Callback for "!import:" tag.
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
if '.' not in tag_suffix:
raise yaml.YAMLError("!import: tag suffix contains no'.'")
return _try_to_import(tag_suffix)
def _constructor_import(loader, node):
"""Callback for "!import"
Notes
-----
Taken (with minor changes) from`Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
val = loader.construct_scalar(node)
if '.' not in val:
raise yaml.YAMLError("Import tag suffix contains no '.'")
return _try_to_import(val)
def _constructor_float(loader, node):
"""Callback for "!float"
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
val = loader.construct_scalar(node)
return float(val)
def _construct_mapping(node, deep=False):
"""This is a modified version of
`yaml.BaseConstructor._construct_mapping` only
permitting unique keys.
Notes
-----
Taken (with minor changes) from `Pylearn2`_.
.. _Pylearn2: \
http://github.com/lisa-lab/pylearn2/blob/master/pylearn2/config/yaml_parse.py
"""
if not isinstance(node, yaml.nodes.MappingNode):
const = yaml.constructor
raise Exception(
"Expected a mapping node, but found {0} {1}.".format(
node.id, node.start_mark
))
mapping = {}
constructor = yaml.constructor.BaseConstructor()
for key_node, value_node in node.value:
key = constructor.construct_object(key_node, deep=False)
try:
hash(key)
except TypeError as exc:
const = yaml.constructor
raise Exception("While constructing a mapping " +
"{0}, found unacceptable " +
"key ({1}).".format(
node.start_mark, (exc, key_node.start_mark)))
if key in mapping:
const = yaml.constructor
raise Exception("While constructing a mapping " +
"{0}, found duplicate " +
"key ({1}).".format(node.start_mark, key))
value = constructor.construct_object(value_node, deep=False)
mapping[key] = value
return mapping