#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""setas module provides the setas class for DataFile and friends."""
__all__ = ["setas"]
import re
import copy
from collections.abc import MutableMapping, Mapping
import numpy as np
from ..compat import string_types, int_types, index_types, _pattern_type
from ..tools import AttributeStore, isiterable, typedList, isLikeList
from .utils import decode_string
[docs]class setas(MutableMapping):
"""A Class that provides a mechanism for managing the column assignments in a DataFile like object.
Implements a MutableMapping bsed on the column_headers as the keys (with a few tweaks!).
Note:
Iterating over setas will return the column assignments rather than the standard mapping behaviour of
iterating over the keys. Otherwise
the interface is essentially as a Mapping class.
Calling an existing setas instance and the constructor share the same signatgure:
setas("xyzuvw")
setas(["x"],["y"],["z"],["u"],["v"],["w"])
setas(x="column_1",y=3,column4="z")
Keyword Arguments:
_self (bool):
If True, make the call return a copy of the setas object, if False, return _object attribute, if None,
return None
reset (bool):
If False then preserve the existing set columns and simply add the new ones. Otherwise, clear all column
assignments before setting new ones (default).
"""
def __init__(self, row=False, bless=None):
"""Construct the setas instance and sets an initial value.
Args:
ref (DataFile): Contains a reference to the owning DataFile instance
Keyword Arguments:
initial_val (string or list or dict): Initial values to set
"""
self._row = row
self._cols = AttributeStore()
self._shape = tuple()
self._setas = []
self._column_headers = typedList(string_types)
self._object = bless
self._col_defaults = {
2: {
"axes": 2,
"xcol": 0,
"ycol": [1],
"zcol": [],
"ucol": [],
"vcol": [],
"wcol": [],
"xerr": None,
"yerr": [],
"zerr": [],
}, # xy
3: {
"axes": 2,
"xcol": 0,
"ycol": [1],
"zcol": [],
"ucol": [],
"vcol": [],
"wcol": [],
"xerr": None,
"yerr": [2],
"zerr": [],
}, # xye
4: {
"axes": 2,
"xcol": 0,
"ycol": [2],
"zcol": [],
"ucol": [],
"vcol": [],
"wcol": [],
"xerr": 1,
"yerr": [3],
"zerr": [],
}, # xdye
5: {
"axes": 5,
"xcol": 0,
"ycol": [1],
"zcol": None,
"ucol": [2],
"vcol": [3],
"wcol": [4],
"xerr": None,
"yerr": [],
"zerr": [],
}, # xyuvw
6: {
"axes": 6,
"xcol": 0,
"ycol": [1],
"zcol": [2],
"ucol": [3],
"vcol": [4],
"wcol": [5],
"xerr": None,
"yerr": [],
"zerr": [],
},
} # xyzuvw
def _prepare_call(self, args, kargs):
"""Extract a value to be used to evaluate the setas attribute during a call."""
reset = kargs.pop("reset", True)
if not isinstance(reset, bool):
reset = True
if args:
value = args[0]
if isinstance(value, string_types): # expand the number-code combos in value
if reset:
self.setas = []
value = decode_string(value)
else:
value = kargs
if reset:
self.setas = []
return value
@property
def _size(self):
"""Calculate a size of the setas attribute."""
if len(self._shape) == 1 and self._row:
c = self._shape[0]
elif len(self._shape) == 1:
c = 1
elif len(self._shape) > 1:
c = self.shape[1]
else:
c = len(self._column_headers)
return c
@property
def _unique_headers(self):
"""Return either a column header or an index if the column_header is duplicated."""
ret = []
for i, ch in enumerate(self.column_headers):
if ch not in ret:
ret.append(ch)
else:
ret.append(i)
return ret
@property
def clone(self):
"""Create an exact copy of the current object."""
cls = type(self)
new = cls()
for attr, val in self.__dict__.items():
if not callable(val):
new.__dict__[attr] = copy.deepcopy(val)
return new
@property
def cols(self):
"""Get the current column assignments."""
self._cols.update(self._get_cols())
return self._cols
@property
def x(self):
"""Quick access to the x column number.
Just a convenience read only property. If we want to change the setas.x
value we should use the setas(x=1,y=2) style call (so that reset can
be handled properly)
"""
return self.cols["xcol"]
@property
def y(self):
"""Quick access to the y column numbers list."""
return self.cols["ycol"]
@property
def z(self):
"""Quick access to the z column numbers list."""
return self.cols["zcol"]
@property
def column_headers(self):
"""Get the current column headers."""
cols = self._size
length = len(self._column_headers)
if length < cols: # Extend the column headers if necessary
self._column_headers.extend([f"Column {i + length}" for i in range(cols - length)])
return self._column_headers
@column_headers.setter
def column_headers(self, value):
"""Set the column headers."""
if isinstance(value, np.ndarray): # Convert ndarray to list of strings
value = value.astype(str).tolist()
elif isinstance(value, string_types): # Bare strings get turned into lists
value = [value]
self._column_headers = typedList(string_types, value)
@property
def empty(self):
"""Determine if any columns are set."""
return len(self._setas) == 0 or np.all(np.array(self._setas) == ".")
@property
def ndim(self):
"""Return the number of dimensions of the array."""
return len(self.shape)
@property
def not_set(self):
"""Return a boolean array if not set."""
return np.array([x == "." for x in self._setas])
@property
def set(self):
"""Return a boolean array if column is set."""
return np.array([x != "." for x in self._setas])
@property
def setas(self):
"""Guard the setas attribute."""
cols = self._size
length = len(self._setas)
if cols > length:
self._setas.extend(["."] * (cols - length))
self._setas = self._setas[:cols]
return self._setas
@setas.setter
def setas(self, value):
"""Minimal attribute setter."""
self._setas = value
@property
def shape(self):
"""Return the shape of the array that we think we are."""
return self._shape
@shape.setter
def shape(self, value):
"""Update the note of our shape."""
value = tuple(value)
if 0 <= len(value) <= 2:
self._shape = tuple(value)
else:
raise AttributeError(f"shape attribute should be a 2-tuple not a {value}-tuple")
[docs] def __call__(self, *args, **kargs):
"""Treat the current instance as a callable object and assign columns accordingly.
Variois forms of this method are accepted::
setas("xyzuvw")
setas(["x"],["y"],["z"],["u"],["v"],["w"])
setas(x="column_1",y=3,column4="z")
Keyword Arguments:
_self (bool):
If True, make the call return a copy of the setas object, if False, return _object attribute, if None,
return None. Default - **False**
reset (bool):
If False then preserve the existing set columns and simply add the new ones. Otherwise, clear
all column assignments before setting new ones (default).
"""
return_self = kargs.pop("_self", False)
if not (args or kargs): # New - bare call to setas will return the current value.
return self.setas
if len(args) == 1 and isinstance(args[0], setas):
args = list(args)
args[0] = args[0].to_list()
if len(args) == 1 and not (isinstance(args[0], string_types + (setas,)) or isiterable(args[0])):
raise SyntaxError(
f"setas should be called with eother a string, iterable object or setas object, not a {type(args[0])}"
)
# If reset is neither in kargs nor a False boolean, then clear the existing setas assignments
value = self._prepare_call(args, kargs)
_ = self.setas # Forxce setas to be the right length
if isinstance(value, dict):
for k, v in value.items():
if isinstance(k, string_types) and len(k) == 1 and k in "xyzuvwdef": # of the form x:column_name
for v_item in self.find_col(v, force_list=True):
try:
self._setas[v_item] = k
except (IndexError, KeyError):
pass
elif (
isinstance(k, index_types) and isinstance(v, string_types) and len(v) == 1 and v in "xyzuvwdef"
): # of the form column_name:x
k = self.find_col(k)
self._setas[k] = v
else:
raise IndexError(f"Unable to workout what do with {k}:{v} when setting the setas attribute.")
elif isiterable(value):
if len(value) > self._size:
value = value[: self._size]
elif len(value) < self._size:
value = list(value) # Ensure value is now a list
value.extend(list("." * (self._size - len(value))))
value = value[: self._size]
for i, v in enumerate(value):
if v.lower() not in "xyzedfuvw.-":
raise ValueError(f"Set as column element is invalid: {v}")
if v != "-":
self.setas[i] = v.lower()
else:
raise ValueError("Set as column string ended with a number")
self.cols.update(self._get_cols())
if return_self is None:
return None
if return_self:
return self
return self._object
def __contains__(self, item):
"""Use getitem to test for membership. Either column assignments or column index types are tested."""
try:
_ = self[item]
except (IndexError, KeyError, ValueError):
return False
return True
def __delitem__(self, name):
"""Unset either by column index or column assignment.
Equivalent to unsetting the same object."""
self.unset(name)
def __eq__(self, other):
"""Check to see if this is the same object, or has the same headers and the same setas values."""
ret = False
if isinstance(other, string_types): # Expand strings and convert to list
other = list(decode_string(other))
if not isinstance(other, setas): # Ok, need to check whether items match
if isiterable(other) and len(other) <= self._size:
for m in self.setas[len(other) :]: # Check that if other is short we don't have assignments there
if m != ".":
return False
for o, m in zip(other, self.setas):
if o != m: # Look for mis-matched assignments
return False
return True
return False
if id(self) == id(other):
ret = True
else:
ret = self.column_headers == other.column_headers and self.setas == other.setas
return ret
def __getattr__(self, name):
"""Try to see if attribute name is a key in self.cols and return that instead."""
if name != "_cols" and name in self._cols:
return self._cols[name]
return getattr(super(), name)
def __getitem__(self, name):
"""Permit the setas attribute to be treated like either a list or a dictionary.
Args:
name (int, slice or string): if *name* is an integer or a slice, return the column type
of the corresponding column(s). If a string, should be a single letter
from the set x,y,z,u,v,w,d,e,f - if so returns the corresponding
column(s)
Returns:
Either a single letter x,y,z,u,v,w,d,e or f, or a list of letters if used in
list mode, or a single coliumn name or list of names if used in dictionary mode.
"""
if isinstance(name, string_types) and len(name) == 1 and name in "xyzuvwdef.-":
ret = self.to_dict()[name]
if len(ret) == 1:
ret = ret[0]
elif isinstance(name, string_types) and len(name) == 2 and name[0] == "#" and name[1] in "xyzuvwdef.-":
ret = []
name = name[1]
s = 0
while name in self._setas[s:]:
s = self._setas.index(name) + 1
ret.append(s - 1)
if len(ret) == 1:
ret = ret[0]
elif isinstance(name, index_types):
ret = self.setas[self.find_col(name)]
elif isinstance(name, slice):
indices = name.indices(len(self.setas))
name = range(*indices)
ret = [self[x] for x in name]
elif isiterable(name):
ret = [self[x] for x in name]
else:
raise IndexError(f"{name} was not found in the setas attribute.")
return ret
def __iter__(self):
"""Iterate over thew column assignments.
.. warn::
This class does not follow standard Mapping semantics - iterating iterates over the values and not
the items.
"""
_ = self.setas # Force setas to fix size
for c in self._setas:
yield c
def __ne__(self, other):
"""!= is the same as no ==."""
return not self.__eq__(other)
def __setitem__(self, name, value):
"""Allow setting of the setas variable like a dictionary or a list.
Args:
name (string or int): If name is a string, it should be in the set x,y,z,u,v,w,d,e or f
and value should be a column index type. If name is an integer, then value should be
a single letter string in the set above.
value (integer or column index): See above.
"""
if isLikeList(name): # Sipport indexing with a list like object
if isLikeList(value) and len(value) == len(name):
for n, v in zip(name, value):
self._setas[n] = v
else:
for n in name:
self[n] = value
elif isinstance(name, string_types) and len(name) == 1 and name in "xyzuvwdef.-": # indexing by single letter
for c in self.find_col(value, force_list=True):
self._setas[c] = name
elif (
isinstance(name, index_types)
and isinstance(value, string_types)
and len(value) == 1
and value in "xyzuvwdef.-"
):
for c in self.find_col(name, force_list=True):
self.setas[c] = value
else:
raise IndexError(f"Failed to set setas as couldn't workout what todo with setas[{name}] = {value}")
def __len__(self):
"""Return our own length."""
return self._size
def __repr__(self):
"""Our representation is as a list of the values."""
return self.setas.__repr__()
def __str__(self):
"""Our string representation is just formed by joing the assignments together."""
# Quick string conversion routine
return "".join(self.setas)
#################################################################################################################
############################# Operator Methods ################################################################
def _add_core_(self, new, other):
"""Allow the user to add a dictionary to setas to add extra columns."""
if not isinstance(other, dict):
try:
tmp = self.clone
tmp(other)
other = tmp.to_dict()
except (TypeError, SyntaxError):
return NotImplemented
for k, v in other.items():
if isinstance(k, string_types) and len(k) == 1 and k in "xyzuvwdef": # of the form x:column_name
for v in new.find_col(v, force_list=True):
new._setas[v] = k
elif (
isinstance(k, index_types) and isinstance(v, string_types) and len(v) == 1 and v in "xyzuvwdef"
): # of the form column_name:x
k = new.find_col(k)
new._setas[k] = v
else:
raise IndexError(f"Unable to workout what do with {k}:{v} when setting the setas attribute.")
return new
def __add__(self, other):
"""Jump to the core."""
new = self.clone
return self._add_core_(new, other)
def __iadd__(self, other):
"""Jump to the core."""
new = self
return self._add_core_(new, other)
def _sub_core_(self, new, other):
"""Implement subtracting either column indices or x,y,z,d,e,f,u,v,w for the current setas."""
if isinstance(other, string_types) and len(other) == 1 and other in "xyzuvwdef":
while True:
try:
new._setas[new._setas.index(other)] = "."
except ValueError:
break
return new
if isinstance(other, index_types):
try:
new._setas[new.find_col(other)] = "."
return new
except KeyError:
other = new.clone(other, _self=True)
if isinstance(other, Mapping):
me = new.to_dict()
other = new.clone(other, _self=True).to_dict()
for k, v in other.items():
v = [v] if not isinstance(v, list) else v
if k in me:
for header in v:
if header in me[k]:
if isinstance(me[k], list):
me[k].remove(header)
else:
me[k] = ""
else:
raise ValueError(f"{header} is not set as {k}")
if len(me[k]) == 0:
del me[k]
else:
raise ValueError(f"No column is set as {k}")
new.clear()
new(me)
return new
if isiterable(other):
for o in other:
new = self._sub_core_(new, o)
if new is NotImplemented:
return NotImplemented
return new
return NotImplemented
def __sub__(self, other):
"""Jump to the core."""
new = self.clone
return self._sub_core_(new, other)
def __isub__(self, other):
"""Jump to the core."""
new = self
return self._sub_core_(new, other)
[docs] def find_col(self, col, force_list=False):
"""Indexes the column headers in order to locate a column of data.shape.
Indexing can be by supplying an integer, a string, a regular expression, a slice or a list of any of the above.
- Integer indices are simply checked to ensure that they are in range
- String indices are first checked for an exact match against a column header
if that fails they are then compiled to a regular expression and the first
match to a column header is taken.
- A regular expression index is simply matched against the column headers and the
first match found is taken. This allows additional regular expression options
such as case insensitivity.
- A slice index is converted to a list of integers and processed as below
- A list index returns the results of feading each item in the list at :py:meth:`find_col`
in turn.
Args:
col (int, a string, a re, a slice or a list): Which column(s) to retuirn indices for.
Keyword Arguments:
force_list (bool): Force the output always to be a list. Mainly for internal use only
Returns:
The matching column index as an integer or a KeyError
"""
if isinstance(col, int_types): # col is an int so pass on
if col >= len(self.column_headers):
raise IndexError(f"Attempting to index a non - existent column {col}")
if col < 0:
col = col % len(self.column_headers)
elif isinstance(col, string_types): # Ok we have a string
col = str(col)
if col in self.column_headers: # and it is an exact string match
col = self.column_headers.index(col)
else: # ok we'll try for a regular expression
test = re.compile(col)
possible = [x for x in self.column_headers if test.search(x)]
if not possible:
try:
col = int(col)
except ValueError as err:
raise KeyError(
f'Unable to find any possible column matches for "{col} in {self.column_headers}"'
) from err
if col < 0 or col >= self.data.shape[1]:
raise KeyError("Column index out of range")
else:
col = self.column_headers.index(possible[0])
elif isinstance(col, _pattern_type):
test = col
possible = [x for x in self.column_headers if test.search(x)]
if not possible:
raise KeyError(f"Unable to find any possible column matches for {col.pattern}")
col = self.find_col(possible)
elif isinstance(col, slice):
indices = col.indices(self.shape[1])
col = range(*indices)
col = self.find_col(col)
elif isiterable(col):
col = [self.find_col(x) for x in col]
else:
raise TypeError(f"Column index must be an integer, string, list or slice, not a {type(col)}")
if force_list and not isinstance(col, list):
col = [col]
return col
[docs] def clear(self):
"""Clear the current setas attribute.
Notes:
Equivalent to doing :py:meth:`setas.unset` with no argument.
"""
self.unset()
[docs] def get(self, key, default=None): # pylint: disable=arguments-differ
"""Implement a get method."""
try:
return self[key]
except (IndexError, KeyError) as err:
if default is not None:
return default
raise KeyError(f"{key} is not in setas and no default was given.") from err
[docs] def keys(self):
"""Access mapping keys.
Mapping keys are the same as iterating over the unique headers"""
for c in self._unique_headers:
yield c
[docs] def values(self):
"""Access mapping values.
Mapping values are the same as iterating over setas."""
for v in self.setas:
yield v
[docs] def items(self):
"""Access mapping items.
Mapping items iterates over keys and values."""
for k, v in zip(self._unique_headers, self.setas):
yield k, v
[docs] def pop(self, key, default=None): # pylint: disable=arguments-differ
"""Implement a get method."""
try:
ret = self[key]
self.unset(key)
return ret
except (IndexError, KeyError) as err:
if default is not None:
return default
raise KeyError(f"{key} is not in setas and no default was given.") from err
[docs] def popitem(self):
"""Return and clear a column assignment."""
for c in "xdyezfuvw":
if c in self:
v = self[c]
self.unset(c)
return (c, v)
raise KeyError("No columns set in setas!")
[docs] def setdefault(self, key, default=None): # pylint: disable=arguments-differ
"""Implement a setdefault method."""
try:
return self[key]
except (IndexError, KeyError):
self[key] = default
return default
[docs] def unset(self, what=None):
"""Remove column settings from the setas attribute in method call.
Parameters:
what (str,iterable,dict or None): What to unset.
Notes:
The *what* parameter determines what to unset, possible values are:
- A single lets from *xyzuvwdef* - all column assignments of the corresponding type are unset
- A column index type - all matching columns are unset
- A list or other iterable of the above - all matching entries are unset
- None - all setas assignments are cleared.
"""
if what is None:
self.setas = []
_ = self.setas
else:
self -= what
[docs] def update(self, other=(), **kwds): # pylint: disable=arguments-differ
"""Replace any assignments in self with assignments from other."""
if isinstance(other, setas):
other = other.to_dict()
elif isinstance(other, tuple) and len(other) == 0:
other = kwds
else:
try:
other = dict(other)
except (ValueError, TypeError) as err:
raise TypeError(f"setas.update requires a dictionary not a {type(other)}") from err
vals = list(other.values())
keys = list(other.keys())
for k in "xyzuvwdef":
if k in other:
try:
c = self[k]
self[c] = "."
except (KeyError, IndexError):
pass
self[k] = other[k]
elif k in vals:
try:
c = self[k]
self[c] = "."
except IndexError:
pass
self[k] = keys[vals.index(k)]
return self
[docs] def to_dict(self):
"""Return the setas attribute as a dictionary.
If multiple columns are assigned to the same type, then the column names are
returned as a list. If column headers are duplicated"""
ret = {}
for k, ch in zip(self._setas, self._unique_headers):
if k != ".":
if k in ret:
ret[k].append(ch)
else:
ret[k] = [ch]
for k, val in ret.items():
if len(val) == 1:
ret[k] = val[0]
return ret
[docs] def to_list(self):
"""Return the setas attribute as a list of letter types."""
return list(self)
[docs] def to_string(self, encode=False):
"""Return the setas attribute encoded as a string.
Optionally replaces runs of 3 or more identical characters with a precediung digit."""
expanded = "".join(self)
if encode:
pat = re.compile(r"((.)\2{2,9})")
while True:
res = pat.search(expanded)
if not res:
break
start, stop = res.span()
let = str(stop - start) + res.group(2)
expanded = expanded[:start] + let + expanded[stop:]
return expanded
def _get_cols(self, what=None, startx=0, no_guess=False):
"""Use the setas attribute to work out which columns to use for x,y,z etc.
Keyword Arguments:
what (string): Returns either xcol, ycol, zcol, ycols,xcols rather than the full dictionary
startx (int): Start looking for x columns at this column.
Returns:
A single integer, a list of integers or a dictionary of all columns.
"""
# Do the xcolumn and xerror first. If only one x column then special case to reset startx to get any
# y columns
if self.setas.count("x") == 1:
xcol = self.setas.index("x")
maxcol = len(self.setas) + 1
startx = 0
xerr = self.setas.index("d") if "d" in self.setas else None
elif self.setas.count("x") > 1:
xcol = self.setas[startx:].index("x") + startx
startx = xcol
try:
maxcol = self.setas[xcol + 1 :].index("x") + xcol + 1
except ValueError:
maxcol = len(self.setas)
xerr = self.setas[startx:maxcol].index("d") if "d" in self.setas[startx:maxcol] else None
else:
xcol = None
maxcol = len(self.setas) + 1
startx = 0
xerr = None
# No longer enforce ordering of yezf - allow them to appear in any order.
columns = {"y": [], "e": [], "z": [], "f": [], "u": [], "v": [], "w": []}
for ix, lett in enumerate(self.setas[startx:maxcol]):
if lett in columns:
columns[lett].append(ix + startx)
if xcol is None:
axes = 0
elif not columns["y"]:
axes = 1
elif not columns["z"]:
axes = 2
else:
axes = 3
if axes == 2 and len(columns["u"]) * len(columns["v"]) > 0:
axes = 4
elif axes == 3:
if len(columns["u"]) * len(columns["v"]) * len(columns["w"]) > 0:
axes = 6
elif len(columns["u"]) * len(columns["v"]) > 0:
axes = 5
ret = AttributeStore()
ret.update({"axes": axes, "xcol": xcol, "xerr": xerr})
for ck, rk in {
"y": "ycol",
"z": "zcol",
"e": "yerr",
"f": "zerr",
"u": "ucol",
"v": "vcol",
"w": "wcol",
}.items():
ret[rk] = columns[ck]
if axes == 0 and self.ndim >= 2 and self.shape[1] in self._col_defaults and not no_guess:
ret = self._col_defaults[self.shape[1]]
for n in ["xcol", "xerr", "ycol", "yerr", "zcol", "zerr", "ucol", "vcol", "wcol", "axes"]:
ret[f"has_{n}"] = not (ret[n] is None or (isinstance(ret[n], list) and not ret[n]))
ret["has_uvw"] = ret["has_ucol"] & ret["has_vcol"] & ret["has_wcol"]
if what in ["xcol", "xerr"]:
ret = ret[what]
elif what in ("ycol", "zcol", "ucol", "vcol", "wcol", "yerr", "zerr"):
ret = ret[what][0]
elif what in ("ycols", "zcols", "ucols", "vcols", "wcols", "yerrs", "zerrs"):
ret = ret[what[0:-1]]
return ret