Source code for Stoner.folders.metadata

# -*- coding: utf-8 -*-
"""Provides classes and functions to support the :py:attr:`Stoner.DataFolder.metadata` magic attribute."""

__all__ = ["MetadataProxy"]
import fnmatch
from collections.abc import MutableMapping

from lmfit import Model
import numpy as np

from ..core import typeHintedDict, metadataObject
from ..compat import string_types
from ..tools import isLikeList, isiterable, make_Data
from ..Core import DataFile


def _fmt_as_list(results):
    """Convert the results list of slicing to a simple list."""
    keys = set()
    for r in results:
        keys |= set(r.keys())
    keys = list(keys)
    if len(keys) == 1:
        ret = [r.get(keys[0], None) for r in results]
    else:
        ret = []
        for r in results:
            ret.append(tuple(r.get(k, None) for k in keys))

    return ret


def _fmt_as_dict(results):
    """Non-opt, results is already adictionary."""
    return results


def _fmt_as_dataframe(results):
    """Format the return results as a DataFrame."""
    from pandas import DataFrame

    frame = DataFrame(results)
    return frame


def _fmt_as_Data(results):
    """Format the results as a Data() object."""
    ret = make_Data(_fmt_as_dataframe(results))
    mask = np.zeros(ret.shape, dtype=bool)
    for ix, col in enumerate(ret.data.T):
        try:
            mask[:, ix] = np.isnan(col)
        except TypeError:
            pass
    ret.mask = mask
    return ret


def _fmt_as_array(results):
    """Format the results as an array."""
    ret = _fmt_as_Data(results)
    if ret.data.shape[1] != 1:
        ret = ret.data
    else:
        ret = ret.data[:, 0]
    return ret


def _fmt_as_smart(results):
    """Decide what formatting of results to do."""
    if np.all([len(r) == 1 and list(r.keys())[0] == list(results[0].keys())[0] for r in results]):
        return _fmt_as_list(results)
    return _fmt_as_dict(results)


def _slice_keys(args, possible=None):
    """Work through the arguments to slice() and construct a list of keys."""
    keys = []
    for k in args:
        if isinstance(k, string_types):
            if k not in possible:
                sub_k = fnmatch.filter(possible, k)
                if len(sub_k) > 0:
                    keys.extend(_slice_keys(sub_k, possible))
                else:
                    raise KeyError(f"No matching keys for {sub_k} in metadata")
            else:
                keys.append(k)
        elif isinstance(k, type) and issubclass(k, Model):
            model = k.__name__
            for name in k().param_names:
                for sub_k in [f"{model}:{name}", f"{model}:{name} err"]:
                    if sub_k not in possible:
                        raise KeyError(f"No matching keys for {sub_k} in metadata")
                    keys.append(sub_k)
        elif isinstance(k, Model):
            model = type(k).__name__
            for name in k.param_names:
                for sub_k in [f"{model}:{name}", f"{model}:{name} err"]:
                    if sub_k not in possible:
                        raise KeyError(f"No matching keys for {sub_k} in metadata")
                    keys.append(sub_k)
        elif isiterable(k):
            keys.extend(_slice_keys(k, possible))
        else:
            raise KeyError(f"{type(k)} cannot be used as a key name or set of key names")
    return keys


[docs]class MetadataProxy(MutableMapping): """Provide methods to interact with a whole collection of metadataObjects' metadata.""" def __init__(self, folder): """Note our parent folder object.""" self._folder = folder @property def all(self): """List all the metadata dictionaries in the Folder.""" if hasattr(self._folder, "_metadata"): # Extra logic for Folders like Stack for item in self._folder._metadata.items(): yield item else: for item in self._folder: yield item.metadata @all.setter def all(self, value): """List all the metadata dictionaries in the Folder.""" if hasattr(self._folder, "_metadata"): # Direct support for metadata dictionary for new, old in zip(value, self._folder._metadata): old.update(new) else: for new, item in zip(value, self._folder): item.metadata.update(new) @property def all_by_keys(self): """Return the set of metadata keys common to all objects int he Folder.""" if len(self._folder) > 0: keys = set(self._folder[0].metadata.keys()) for d in self._folder: keys &= set(d.metadata.keys()) else: keys = set() ret = typeHintedDict() for k in sorted(list(keys)): ret[k] = self[k].view(np.ndarray) return ret @property def common_keys(self): """Return the set of metadata keys common to all objects int he Folder.""" if len(self._folder) > 0: keys = set(self._folder[0].metadata.keys()) for d in self._folder: keys &= set(d.metadata.keys()) else: keys = set() return sorted(list(keys)) @property def common_metadata(self): """Return a dictionary of the common_keys that have common values.""" output = typeHintedDict() for key in self.common_keys: vals = self.slice(key, output="list") if np.all(vals == vals[0]): output[key] = vals[0] return output def __contains__(self, item): """Check for membership of all possible kes.""" return item in self.all_keys() def __iter__(self): """Iterate over objects.""" for k in self.common_keys: yield k def __len__(self): """Out length is our common_keys.""" return len(self.common_keys) def __repr__(self): """Give an informative display of the metadata representation.""" return ( f"The {type(self._folder).__name__} {self._folder.key} has" + f" {len(self)} common keys of metadata in {len(self._folder)} {self._folder.type.__name__} objects" ) def __delitem__(self, item): """Attempt to delete item from all members of the folder.""" ok = False for entry in self._folder: try: del entry.metadata[item] ok = True except KeyError: pass if not ok: # item was not a key in any data file raise KeyError(f"{item} was not recognised as a metadata key in any object in the folder.") def __getitem__(self, value): """Return an array formed by getting a single key from each object in the Folder.""" ret = self.slice(value, mask_missing=True, output="array") if ret.size == 0: raise KeyError(f"{value} did not match any keys in any file") return ret def __setitem__(self, key, value): """Proxy to set an item on all the entries in the folder.""" for d in self._folder: d[key] = value def __xor__(self, other): """Implement an XOR operator that gives differences between metadata dictionaries.""" if isinstance(other, type(self._folder)): other = other.metadata if isinstance(other, MetadataProxy): other = other.all_by_keys elif isinstance(other, metadataObject): other = other.metadata else: return NotImplemented return self.all_by_keys ^ other def __eq__(self, other): """Equality test operator.""" ret = self ^ other if not isinstance(ret, dict): return NotImplemented return len(ret) == 0
[docs] def all_keys(self): """Return the union of all the metadata keyus for all objects int he Folder.""" if len(self._folder) > 0: keys = set(self._folder[0].metadata.keys()) for d in self._folder: keys |= set(d.metadata.keys()) else: keys = set() for k in sorted(keys): yield k
[docs] def all_items(self): """Return the result of indexing the metadata with all_keys(). Yields: key,self[key] """ for k in self.all_keys(): yield k, self[k]
[docs] def all_values(self): """Return the result of indexing the metadata with all_keys(). Yields: self[key] """ for k in self.all_keys(): yield self[k]
[docs] def apply(self, key, func): """Evaluate a function for each item in the folder and store the return value in a metadata key. Args: key (str): The name of the key to store the result in. func(callable): The function to be evaluated. Returns: (self) a copy of the combined metadata object to allow routines to be strung together. Notes: The function should have a protoptye of the form: def func(i,metadataObject): where i is a counter that runs from 0 to the length of the current Folder and metadataObject will be each object in the Folder in turn. """ for i, d in enumerate(self._folder): d[key] = func(i, d) return self
[docs] def slice(self, *args, **kwargs): # pylint: disable=arguments-differ """Return a list of the metadata dictionaries for each item/file in the top level group. Keyword Arguments: *args (string, lmfit.Model class or instance or iterable of string, lmfit Models): if given then only return the item(s) requested from the metadata values_only(bool): if given and *output* not set only return tuples of the dictionary values. Mostly useful when given a single key string output (str or type): Controls the output format from slice_metadata. Possible values are - "dict" or dict - return a list of dictionary subsets of the metadata from each image - "list" or list - return a list of values of each item pf the metadata - "array" or np.array - return a single array - like list above, but returns as a numpy array. This can create a 2D array from multiple keys - "data" or Stoner.Data - returns the metadata in a Stoner.Data object where the column headers are the metadata keys. - "frame" - returns the metadata as a Pandas DataFrame object - "smart" - switch between *dict* and *list* depending whether there is one or more keys. mask_missing (bool): If true, then metadata entries missing in members of the folder are returned as masked values (or None), If False, then an exception is raised if any entries are missing. Returns: ret(list of dict, tuple of values or :py:class:`Stoner.Data`): depending on *values_only* or (output* returns the sliced dictionaries or tuples/ values of the items """ values_only = kwargs.pop("values_only", False) output = kwargs.pop("output", None) mask_missing = kwargs.pop("mask_missing", False) if kwargs: raise SyntaxError(f"Unused keyword arguments : {kwargs}") if output is None: # Sort out a definitive value of output output = "dict" if not values_only else "smart" if isinstance(output, string_types): output = output.lower() outputs = { "list": _fmt_as_list, list: _fmt_as_list, "dict": _fmt_as_dict, dict: _fmt_as_dict, "frame": _fmt_as_dataframe, "data": _fmt_as_Data, DataFile: _fmt_as_Data, "array": _fmt_as_array, np.ndarray: _fmt_as_array, "smart": _fmt_as_smart, } if output not in outputs: # Check for good output value raise TypeError(f"output of slice metadata must be either dict, list, or array not {output}") formatter = outputs[output] possible = list(self.all_keys()) if mask_missing else self.common_keys keys = _slice_keys(args, possible) results = [] for d in self._folder: results.append({k: d[k] for k in keys if k in d}) for r in results: # Expand the results where a result contains a list for k in keys: if k in r and isLikeList(r[k]) and len(r[k]) > 0: v = r[k] del r[k] r.update({f"{k}[{i}]": vi for i, vi in enumerate(v)}) return formatter(results)