Source code for Stoner.core.data

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""The main Data Class definition."""

import copy
import csv
import inspect as _inspect_
import io
import pathlib
import urllib
import warnings
from collections.abc import Iterable, Mapping, MutableSequence
from textwrap import TextWrapper

import h5py
import numpy as np
from numpy import nan  # NOQA pylint: disable=unused-import
from numpy import ma

from ..compat import index_types, string_types
from ..tools import all_type, get_option, isiterable, make_Data
from ..tools.file import (
    URL_SCHEMES,
    FileManager,
    auto_load_classes,
    file_dialog,
    get_file_name_type,
    get_filename,
    get_loader,
)
from ..tools.tests import ClassTester
from .array import DataArray
from .base import TypeHintedDict, metadataObject
from .exceptions import StonerLoadError, StonerSetasError
from .interfaces import DataFileInterfacesMixin
from .operators import DataFileOperatorsMixin
from .property import DataFilePropertyMixin
from .utils import Tab_Delimited, copy_into

try:
    from tabulate import tabulate

    tabulate.PRESERVE_WHITESPACE = True
except ImportError:
    tabulate = None

try:
    import pandas as pd
except ImportError:
    pd = None


# Bring all the subclasses into memory (idnore unused imports warnings)
from ..analysis import columns, features, filtering, functions
from ..analysis.fitting import functions as fitting
from ..plot import PlotMixin
from ..tools.decorators import class_modifier
from ..tools.file import best_saver
from . import methods



[docs]
@class_modifier(
    [methods, fitting, columns, functions, features, filtering], adaptor=None, no_long_names=True, overload=True
)
class Data(
    DataFileInterfacesMixin,
    DataFileOperatorsMixin,
    DataFilePropertyMixin,
    metadataObject,
    MutableSequence,
    PlotMixin,
):
    """Base class object that represents a matrix of data, associated metadata and column headers.

    Attributes:
        column_headers (list):
            list of strings of the column names of the data.
        data (2D numpy masked array):
            The attribute that stores the nuermical data for each Data. This is a :py:class:`DataArray` instance -
            which is itself a subclass of :py:class:`numpy.ma.MaskedArray`.
        title (string):
            The title of the measurement.
        filename (string):
            The current filename of the data if loaded from or already saved to disc. This is the default filename
            used by the :py:meth:`Stoner.Core.Data.load` and :py:meth:`Stoner.Core.Data.save`.
        header (string):
            A readonly property that returns a pretty formatted string giving the header of tabular representation.
        mask (array of booleans):
            Returns the current mask applied to the numerical data equivalent to self.data.mask.
        mime_type (list of str):
            The possible mime-types of data files represented by each matching filename pattern in
            :py:attr:`Datafile.pattern`.
        patterns (list):
            A list of filename extension glob patterns that matrches the expected filename patterns for a Data
            (*.txt and *.dat")
        priority (int):
            Used to indicathe order in which subclasses of :py:class:`Data` are tried when loading data. A higher
            number means a lower priority (!)
        setas (:py:class:`_stas`):
            Defines certain columns to contain X, Y, Z or errors in X,Y,Z data.
        shape (tuple of integers):
            Returns the shape of the data (rows,columns) - equivalent to self.data.shape.
        records (numpy record array):
            Returns the data in the form of a list of yuples where each tuple maps to the columns names.
        clone (Data):
            Creates a deep copy of the :py:class`Data` object.
        dict_records (array of dictionaries):
            View the data as an array or dictionaries where each dictionary represents one row with keys derived
            from column headers.
        dims (int):
            When data columns are set as x,y,z etc. returns the number of dimensions implied in the data set
        dtype (numpoy dtype):
            Returns the datatype stored in the :py:attr:`Data.data` attribute.
        T (:py:class:`DataArray`):
            Transposed version of the data.
        subclasses (list):
            Returns a list of all the subclasses of Data currently in memory, sorted by
            their py:attr:`Stoner.Core.Data.priority`. Each entry in the list consists of the
            string name of the subclass and the class object.
        xcol (int):
            If a column has been designated as containing *x* values, this will return the index of that column
        xerr (int):
            Similarly to :py:attr:`Data.xcol` but for the x-error value column.
        ycol (list of int):
            Similarly to :py:attr:`Data.xcol` but for the y value columns.
        yerr (list of int):
            Similarly to :py:attr:`Data.xcol` but for the y error value columns.
        zcol (list of int):
            Similarly to :py:attr:`Data.xcol` but for the z value columns.
        zerr (list of int):
            Similarly to :py:attr:`Data.xcol` but for the z error value columns.
        ucol (list of int):
            Similarly to :py:attr:`Data.xcol` but for the u (x-axis direction cosine) columns.
        vcol (list of int):
            Similarly to :py:attr:`Data.xcol` but for the v (y-axis direction cosine) columns.
        wcol (list of int):
            Similarly to :py:attr:`Data.xcol` but for the w (z-axis direction cosine) columns.
    """

    #: priority (int): is the load order for the class, smaller numbers are tried before larger numbers.
    #   .. note::
    #
    #      Subclasses with priority<=32 should make some positive identification that they have the right
    #      file type before attempting to read data.
    priority = 32

    #: pattern (list of str): A list of file extensions that might contain this type of file. Used to construct
    # the file load/save dialog boxes.
    _patterns = ["*.txt", "*.tdi"]  # Recognised filename patterns

    # mimetypes we match
    mime_type = ["text/plain"]

    _conv_string = np.vectorize(str)
    _conv_float = np.vectorize(float)

    # ====================================================================================
    ############################     Object Construction   ###############################
    # ====================================================================================

    def __new__(cls, *args, **kwargs):
        """Prepare the basic Data instance before the mixins add their bits."""
        self = metadataObject.__new__(cls, *args)
        object.__setattr__(self, "debug", kwargs.pop("debug", False))
        self._masks = [False]
        self._filename = None
        object.__setattr__(self, "_data", DataArray([]))
        self._baseclass = Data
        self._kwargs = kwargs
        return self


[docs]
    def __init__(self, *args, **kwargs):
        """Initialise the Data from arrays, dictionaries and filenames.

        Various forms are recognised:

        .. py:function:: Data('filename',<optional filetype>,<args>)
            :noindex:

            Creates the new Data object and then executes the :py:class:`Data`.load
            method to load data from the given *filename*.

        .. py:function:: Data(array)
            :noindex:

            Creates a new Data object and assigns the *array* to the
            :py:attr:`Data.data`  attribute.

        .. py:function:: Data(dictionary)
            :noindex:

            Creates the new Data object. If the dictionary keys are all strigns and the values are all
            numpy D arrays of equal length, then assumes the dictionary represents columns of data and the keys
            are the column titles, otherwise initialises the metadata with :parameter: dictionary.

        .. py:function:: Data(array,dictionary)
            :noindex:

            Creates the new Data object and does the combination of the
            previous two forms.


        .. py:function:: Data(Data)
            :noindex:

            Creates the new Data object and initialises all data from the
            existing :py:class:`Data` instance. This on the face of it does the same as
            the assignment operator, but is more useful when one or other of the
            Data objects is an instance of a sub - class of Data

        Args:
            args (positional arguments):
                Variable number of arguments that match one of the definitions above
            kwargs (keyword Arguments):
                All keyword arguments that match public attributes are used to set those public attributes.
        """
        # init instance attributes
        super().__init__(**kwargs)  # initialise self.metadata)
        self._public_attrs = {
            "data": np.ndarray,
            "filetype": str,
            "setas": (string_types, list, dict),
            "column_headers": list,
            "metadata": TypeHintedDict,
            "debug": bool,
            "filename": string_types,
            "mask": (np.ndarray, bool),
        }
        self._repr_limits = (256, 6)
        handler = [lambda *args, **kwargs: None, self._init_single, self._init_double, self._init_many][
            min(len(args), 3)
        ]
        self.mask = False
        self.data._setas._get_cols()
        handler(*args, **kwargs)
        try:
            kwargs = self._kwargs
            delattr(self, "_kwargs")
        except AttributeError:
            pass
        self.metadata["Stoner.class"] = type(self).__name__
        if kwargs:  # set public attributes from keywords
            to_go = []
            for k, val in kwargs.items():
                if k in self._public_attrs:
                    if isinstance(val, self._public_attrs[k]):
                        self.__setattr__(k, val)
                    else:
                        self._raise_type_error(k)
                        to_go.append(k)
                else:
                    raise AttributeError(f"{k} is not an allowed attribute of {self._public_attrs}")
                    # self._public_attrs[k]=type(kwargs[k])
                    # self.__setattr__(k, kwargs[k])
            for k in to_go:
                del kwargs[k]
        if self.debug:
            print("Done Data init")


    # ============================================================================================
    ############################   Constructor Methods ###########################################
    # ============================================================================================

    def _init_single(self, *args, **kwargs):
        """Handle constructor with 1 argument - called from __init__."""
        test = ClassTester(ImageFile="Stoner.Image.core.ImageFile")
        match args[0]:
            case str() | bool() | pathlib.Path() | bytes() | io.IOBase() | h5py.Group() | h5py.File():
                self._init_load(args[0], **kwargs)
            case Data():
                self._init_datafile(args[0], **kwargs)
            case pd.DataFrame():
                self._init_pandas(args[0], **kwargs)
            case test.ImageFile():
                self._init_imagefile(args[0], **kwargs)
            case np.ndarray():
                self._init_array(args[0], **kwargs)
            case Mapping():
                self._init_dict(args[0], **kwargs)
            case Iterable():
                self._init_list(args[0], **kwargs)
            case _:
                raise TypeError(f"No constructor for {type(args[0])}")
        self.data._setas.cols.update(self.setas._get_cols())

    def _init_double(self, *args, **kwargs):
        """Two argument constructors handled here. Called form __init__."""
        (arg0, arg1) = args
        match args:
            case (arg0, {} as arg1):
                self._init_single(arg0, **kwargs)
                self._init_single(arg1, **kwargs)
            case (arg0, Iterable() as arg1) if all_type(arg1, str):
                self._init_single(arg0, **kwargs)
                self._init_single(arg1, **kwargs)
            case (np.ndarray() as arg0, np.ndarray() as arg1) if arg0.ndim == 1 and arg1.ndim == 1:
                self._init_many(*args, **kwargs)
            case _:
                raise TypeError(f"Unable to decide how to initialise {type(args)}")

    def _init_many(self, *args, **kwargs):
        """Handle more than two arguments to the constructor - called from init."""
        for a in args:
            if not (isinstance(a, np.ndarray) and a.ndim == 1):
                copy_into(self.__class__.load(a, **kwargs), self)
                break
        else:
            self.data = np.column_stack(args)

    def _init_array(self, arg, **kwargs):  # pylint: disable=unused-argument
        """Initialise from a single numpy array."""
        # numpy.array - set data
        if np.issubdtype(arg.dtype, np.number):
            self.data = DataArray(np.atleast_2d(arg), setas=self.data._setas)
            self.column_headers = [f"Column_{x}" for x in range(np.shape(arg)[1])]
        elif isinstance(arg[0], dict):
            for row in arg:
                self += row

    def _init_datafile(self, arg, **kwargs):  # pylint: disable=unused-argument
        """Initialise from datafile."""
        for a in arg.__dict__:
            if not callable(a) and a != "_baseclass":
                super().__setattr__(a, copy.copy(getattr(arg, a)))
        self.metadata = arg.metadata.copy()
        self.data = DataArray(arg.data, setas=arg.setas.clone)
        self.data.setas = arg.setas.clone

    def _init_dict(self, arg, **kwargs):  # pylint: disable=unused-argument
        """Initialise from dictionary."""
        if (
            all_type(arg.keys(), string_types)
            and all_type(arg.values(), np.ndarray)
            and np.all([len(arg[k].shape) == 1 and np.all(len(arg[k]) == len(list(arg.values())[0])) for k in arg])
        ):
            self.data = np.column_stack(tuple(arg.values()))
            self.column_headers = list(arg.keys())
        else:
            self.metadata = arg.copy()

    def _init_imagefile(self, arg, **kwargs):  # pylint: disable=unused-argument
        """Initialise from an ImageFile."""
        x = arg.get("x_vector", np.arange(arg.shape[1]))
        y = arg.get("y_vector", np.arange(arg.shape[0]))
        x, y = np.meshgrid(x, y)
        z = arg.image

        self.data = np.column_stack((x.ravel(), y.ravel(), z.ravel()))
        self.metadata = copy.deepcopy(arg.metadata)
        self.column_headers = ["X", "Y", "Image Intensity"]
        self.setas = "xyz"

    def _init_pandas(self, arg, **kwargs):  # pylint: disable=unused-argument
        """Initialise from a pandas dataframe."""
        self.data = arg.values
        ch = []
        for ix, col in enumerate(arg):
            if isinstance(col, string_types):
                ch.append(col)
            elif isiterable(col):
                for ch_i in col:
                    if isinstance(ch_i, string_types):
                        ch.append(ch_i)
                        break
                else:
                    ch.append(f"Column {ix}")
            else:
                ch.append(f"Column {ix}:{col}")
        self.column_headers = ch
        self.metadata.update(arg.metadata)
        if isinstance(arg.columns, pd.MultiIndex) and len(arg.columns.levels) > 1:
            for label in arg.columns.get_level_values(1):
                if label not in list("xyzdefuvw."):
                    break
            else:
                self.setas = list(arg.columns.get_level_values(1))

    def _init_load(self, arg, **kwargs):
        """Load data from a file-like source.

        arg(str, PurePath, IOBase, bool):
            If arg is a str, PaurePath, ioBase then open the file like object and read. If arg is bool and False,
            provide a dialog box instead.
        """
        if isinstance(arg, bool):
            if arg:
                raise ValueError("Cannot construct a Data with a single argument of True")
        elif isinstance(arg, pathlib.PurePath):
            arg = str(arg)
        copy_into(self.__class__.load(filename=arg, **kwargs), self)

    def _init_list(self, arg, **kwargs):
        """Initialise from a list or other ioterable."""
        if all_type(arg, string_types):
            self.column_headers = list(arg)
        elif all_type(arg, np.ndarray):
            self._init_many(*arg, **kwargs)
        else:
            raise TypeError(f"Unable to construct Data from a {type(arg)}")

    # ============================================================================================
    ############################   Special Methods ###############################################
    # ============================================================================================

    def __call__(self, *args, **kwargs):
        """Clone the Data, but allowing additional arguments to modify the new clone.

        Args:
            *args (tuple):
                Positional arguments to pass through to the new clone.
            **kwargs (dict):
                Keyword arguments to pass through to the new clone.

        Raises:
            TypeError: If a keyword argument doesn't match an attribute.

        Returns:
            new_d (Data):
                Modified clone of the current object.
        """
        new_d = self.clone
        handler = [lambda *args, **kwargs: None, new_d._init_single, new_d._init_double, new_d._init_many][
            min(len(args), 2)
        ]
        handler(*args, **kwargs)
        if kwargs:  # set public attributes from keywords
            myattrs = new_d._public_attrs
            for k, val in kwargs.items():
                if k in myattrs:
                    if isinstance(val, myattrs[k]):
                        new_d.__setattr__(k, val)
                    else:
                        if isinstance(myattrs[k], tuple):
                            typ = "one of " + ",".join([str(type(t)) for t in myattrs[k]])
                        else:
                            typ = f"a {type(myattrs[k])}"
                        raise TypeError(f"{k} should be {typ} not a {type(val)}")

        return new_d

    def __deepcopy__(self, memo):
        """Provide support for copy.deepcopy to work."""
        cls = type(self)
        result = cls.__new__(cls)
        memo[id(self)] = result
        for k, v in self.__dict__.items():
            try:
                setattr(result, k, copy.deepcopy(v, memo))
            except (TypeError, ValueError, RecursionError):
                setattr(result, k, copy.copy(v))
        return result

    def __dir__(self):
        """Returns the attributes of the current object.

        Augmenting the keys of self.__dict__ with the attributes that __getattr__ will handle."""
        attr = dir(type(self))
        col_check = {"xcol": "x", "xerr": "d", "ycol": "y", "yerr": "e", "zcol": "z", "zerr": "f"}
        if not self.setas.empty:
            for k, val in col_check.items():
                if k.startswith("x"):
                    if k in self._data._setas.cols and self._data._setas.cols[k] is not None:
                        attr.append(val)
                else:
                    if k in self._data._setas.cols and self._data._setas.cols[k]:
                        attr.append(val)
        return sorted(set(attr))

    def __getattr__(self, name):
        """Handle some special pseudo attributes that map to the setas columns.

        Args:
            name (string):
                The name of the attribute to be returned.

        Returns:
            Various:
                the Data object in various forms

        Supported attributes:
            - records:
                return the Data data as a numpy structured
                array - i.e. rows of elements whose keys are column headings
                - clone:
                    returns a deep copy of the current Data instance

        Otherwise the name parameter is tried as an argument to :py:meth:`Data.column` and the resultant column
        is returned. If Data.column raises a KeyError this is remapped as an AttributeError.
        """
        setas_cols = ("x", "y", "z", "d", "e", "f", "u", "v", "w", "r", "q", "p")
        if name != "debug" and self.debug:
            print(name)
        try:
            return super().__getattr__(name)
        except AttributeError:
            ret = self.__dict__.get(name, type(self).__dict__.get(name, None))
            if ret is not None:
                return ret
        if name in setas_cols:
            ret = self._getattr_col(name)
            if ret is not None:
                return ret
        if name in self.setas.cols:
            ret = self.setas.cols[name]
            if ret is not None and ret != []:
                return ret
        try:
            col = self._data._setas.find_col(name)
            return self.column(col)
        except (KeyError, IndexError):
            pass
        if name in setas_cols:  # Probably tried to use a setas col when it wasn't defined
            raise StonerSetasError(
                f"Tried accessing a {name} column, but setas is not defined and {name} is not a column name either"
            )
        raise AttributeError(f"{name} is not an attribute of Data nor a column name")

    #    def __reduce_ex__(self, p):
    #        """Machinery used for deepcopy."""
    #        cls=type(self)
    #        return (cls, (), self.__getstate__())

    def __repr__(self):
        """Output the :py:class:`Data` object in TDI format.

        This allows one to print any :py:class:`Data` to a stream based
        object andgenerate a reasonable textual representation of the data.shape

        Returns:
            self in a textual format.
        """
        if get_option("short_repr") or get_option("short_data_repr"):
            return self._repr_short_()
        try:
            return self._repr_table_()
        except (ImportError, ValueError, TypeError):
            return self.__repr_core__(256)

    def __setattr__(self, name, value):
        """Handle attempts to set attributes not covered with class attribute variables.

        Args:
            name (str):
                Name of attribute to set. Details of possible attributes below:
                -   mask Passes through to the mask attribute of self.data (which is a numpy masked array).
                    Also handles the case where you pass a callable object to nask where we pass each row to the
                    function and use the return reult as the mask
                -   data Ensures that the :py:attr:`data` attribute is always a :py:class:`numpy.ma.maskedarray`
        """
        if hasattr(type(self), name) and isinstance(getattr(type(self), name), property):
            super().__setattr__(name, value)
        elif len(name) == 1 and name in "xyzuvwdef" and self.setas[name]:
            self._setattr_col(name, value)
        else:
            super().__setattr__(name, value)

    def __str__(self):
        """Provide an implementation for str(Data) that does not shorten the output."""
        return self.__repr_core__(False)

    # ============================================================================================
    ############################ Private Methods #################################################
    # ============================================================================================

    def _col_args(self, *args, **kwargs):
        """Create an object which has keys  based either on arguments or setas attribute."""
        return self.data._col_args(*args, **kwargs)  # Now just pass through to DataArray

    def _getattr_col(self, name):
        """Get a column using the setas attribute."""
        try:
            return getattr(self._data, name)
        except StonerSetasError:
            return None

    def _interesting_cols(self, cols):
        """Workout which columns the user might be interested in in the basis of the setas.

        ArgsL
            cols (float):
                Maximum Number of columns to display

        Returns
            list(ints):
                The indices of interesting columns with breaks in runs indicated by -1
        """
        c = self.shape[1]
        if c > cols:
            interesting = []
            last = -1
            for ix, typ in enumerate(self.setas):
                if last not in [-1, ix - 1]:
                    interesting.append(-1)
                    last = -1
                if typ != ".":
                    interesting.append(ix)
                    last = ix
            if interesting and interesting[-1] == -1:
                interesting = interesting[:-1]
            if interesting:
                c_start = max(interesting) + 1
            else:
                c_start = 0
            interesting.extend(range(c_start, c))
            cols = min(cols, len(interesting))
            if interesting[cols - 3] != -1:
                interesting[cols - 2] = -1
            else:
                interesting[cols - 2] = c - 2
            interesting[cols - 1] = c - 1
            interesting = interesting[:cols]
            c = cols
        else:
            interesting = list(range(c))

        col_assignments = []
        for i in interesting:
            if i != -1:
                if self.setas[i] != ".":
                    col_assignments.append(f"{i} ({self.setas[i]})")
                else:
                    col_assignments.append(f"{i}")
            else:
                col_assignments.append("")
        return interesting, col_assignments, cols

    def _load(self, filename, *args, **kwargs):
        """Actually load the data from disc assuming a .tdi file format.

        Args:
            filename (str):
                Path to filename to be loaded. If None or False, a dialog bax is raised to ask for the filename.

        Returns:
            Data:
                A copy of the newly loaded :py:class`Data` object.

        Exceptions:
            StonerLoadError:
                Raised if the first row does not start with 'TDI Format 1.5' or 'TDI Format=1.0'.

        Note:
            The *_load* methods shouldbe overridden in each child class to handle the process of loading data from
            disc. If they encounter unexpected data, then they should raise StonerLoadError to signal this, so that
            the loading class can try a different sub-class instead.
        """
        if filename is None or not filename:
            self.get_filename("r")
        else:
            self.filename = filename
        with FileManager(self.filename, "r", encoding="utf-8", errors="ignore") as datafile:
            line = datafile.readline()
            if line.startswith("TDI Format 1.5"):
                fmt = 1.5
            elif line.startswith("TDI Format=Text 1.0"):
                fmt = 1.0
            else:
                raise StonerLoadError("Not a TDI File")

            datafile.seek(0)
            reader = csv.reader(datafile, dialect=Tab_Delimited())
            cols = 0
            max_rows = 0
            for ix, metadata in enumerate(reader):
                if ix == 0:
                    row = metadata
                    continue
                if len(metadata) < 1:
                    continue
                if cols == 0:
                    cols = len(metadata)
                if len(metadata) > 1:
                    max_rows = ix + 1
                if "=" in metadata[0]:
                    self.metadata.import_key(metadata[0])
            col_headers_tmp = [x.strip() for x in row[1:]]
            with warnings.catch_warnings():
                datafile.seek(0)
                warnings.filterwarnings("ignore", "Some errors were detected !")
                data = np.genfromtxt(
                    datafile,
                    skip_header=1,
                    usemask=True,
                    delimiter="\t",
                    usecols=range(1, cols),
                    invalid_raise=False,
                    comments="\0",
                    missing_values=[""],
                    filling_values=[nan],
                    max_rows=max_rows,
                )
        if data.ndim < 2:
            data = np.ma.atleast_2d(data)
        retain = np.all(np.isnan(data), axis=1)
        self.data = DataArray(data[~retain])
        self["TDI Format"] = fmt
        if self.data.ndim == 2 and self.data.shape[1] > 0:
            self.column_headers = col_headers_tmp
        return self

    def __repr_core__(self, shorten=1000):
        """Actuall do the repr work, but allow for a shorten parameter to save printing big files out to disc."""
        outp = "TDI Format 1.5\t" + "\t".join(self.column_headers) + "\n"
        m = len(self.metadata)
        self.data = np.atleast_2d(self.data)
        r = np.shape(self.data)[0]
        md = self.metadata.export_all()
        for x in range(min(r, m)):
            if self.data.ndim != 2 or self.shape[1] == 1:
                outp += f"{md[x]}\t{self.data[x]}\n"
            else:
                outp = outp + md[x] + "\t" + "\t".join([str(y) for y in self.data[x].filled()]) + "\n"
        if m > r:  # More metadata
            for x in range(r, m):
                outp = outp + md[x] + "\n"
        elif r > m:  # More data than metadata
            if shorten is not None and shorten and r - m > shorten:
                for x in range(m, m + shorten - 100):
                    if self.data.ndim != 2 or self.shape[1] == 1:
                        outp += "\t" + f"\t{self.data[x]}\n"
                    else:
                        outp += "\t" + "\t".join([str(y) for y in self.data[x].filled()]) + "\n"
                outp += f"... {r - m - shorten + 100} lines skipped...\n"
                for x in range(-100, -1):
                    if self.data.ndim != 2 or self.shape[1] == 1:
                        outp += f"\t\t{self.data[x]}\n"
                    else:
                        outp += "\t" + "\t".join([str(y) for y in self.data[x].filled()]) + "\n"
            else:
                for x in range(m, r):
                    if self.data.ndim != 2 or self.shape[1] == 1:
                        outp += f"\t\t{self.data[x]}\n"
                    else:
                        outp = outp + "\t" + "\t".join([str(y) for y in self.data[x].filled()]) + "\n"
        return outp

    def _repr_html_private(self):
        """Version of repr_core that does and html output."""
        return self._repr_table_("html")

    def _repr_short_(self):
        ret = (
            f"{self.filename}({type(self)}) of shape {self.shape} ({''.join(self.setas)})"
            + f" and {len(self.metadata)} items of metadata"
        )
        return ret

    def _repr_table_(self, fmt="rst"):
        """Convert the Data to a 2D array and then feed to tabulate."""
        if tabulate is None:
            raise ImportError("No tabulate.")
        lb = "<br/>" if fmt == "html" else "\n"
        rows, cols = self._repr_limits
        r, c = self.shape
        interesting, col_assignments, cols = self._interesting_cols(cols)
        c = min(c, cols)
        if len(interesting) > 0:
            c_w = max((len(self.column_headers[x]) for x in interesting if x > -1))
        else:
            c_w = 0
        wrapper = TextWrapper(subsequent_indent="\t", width=max(20, (80 - c_w * c)))
        if r > rows:
            shorten = [True, False]
            r = rows + rows % 2
        else:
            shorten = [False, False]

        shorten[1] = c > cols
        r = max(len(self.metadata), r)

        outp = np.zeros((r + 1, c + 1), dtype=object)
        outp[:, :] = "..."
        ch = [self.column_headers[ix] if ix >= 0 else "...." for ix in interesting]

        for ix, (h, i) in enumerate(zip(ch, col_assignments)):
            spaces1 = " " * ((c_w - len(h)) // 2)
            spaces2 = " " * ((c_w - len(i)) // 2)
            ch[ix] = f"{spaces1}{h}{lb}{spaces2}{i}"
            if self.debug:
                print(len(spaces1), len(spaces2))
        outp[0, 1:] = ch
        outp[1, 1:] = col_assignments
        outp[0, 0] = f"TDI Format 1.5{lb}index"
        i = 1
        for md in self.metadata.export_all():
            md = md.replace("=", "= ")
            for line in wrapper.wrap(md):
                if i >= outp.shape[0]:  # pylint: disable=E1136
                    outp = np.append(outp, [[""] * outp.shape[1]], axis=0)  # pylint: disable=E1136
                outp[i, 0] = line
                i += 1
        for ic, c in enumerate(interesting):
            if c >= 0:
                if shorten[0]:
                    col_out = np.where(self.mask[: r // 2 - 1, c], "#####", self.data[: r // 2 - 1, c].astype(str))
                    outp[1 : r // 2, ic + 1] = col_out
                    col_out = np.where(self.mask[-r // 2 :, c], "#####", self.data[-r // 2 :, c].astype(str))
                    outp[r // 2 + 1 : r + 1, ic + 1] = col_out
                else:
                    col_out = np.where(self.mask[:, c], "#####", self.data[:, c].astype(str))
                    outp[1 : len(self.data) + 1, ic + 1] = col_out
        return tabulate(outp[1:], outp[0], tablefmt=fmt, numalign="decimal", stralign="left")

    def _setattr_col(self, name, value):
        """Attempt to either assign data columns if set up, or setas setting.

        Args:
            name (length 1 string):
                Column type to work with (one of x,y,z,u,v,w,d,e or f)
            value (nd array or column index):
                If an ndarray and the column type corresponding to *name* is set up, then overwrite the column(s)
                of data with this new data. If an index type, then set the corresponding setas assignment to
                these columns.
        """
        if isinstance(value, np.ndarray):
            value = np.atleast_2d(value)
            if value.shape[0] == self.data.shape[0]:
                pass
            elif value.shape[1] == self.data.shape[0]:
                value = value.T
            else:
                raise RuntimeError("Value to be assigned to data columns is the wrong shape!")
            for i, ix in enumerate(self.find_col(self.setas[name], force_list=True)):
                self.data[:, ix] = value[:, i]
        elif isinstance(value, index_types):
            self._set_setas({name: value})

    def _set_mask(self, func, invert=False, cumulative=False, col=0):
        """Apply func to each row in self.data and uses the result to set the mask for the row.

        Args:
            func (callable):
                A Callable object of the form lambda x:True where x is a row of data (numpy
            invert (bool):
                Optionally invert te reult of the func test so that it unmasks data instead
            cumulative (bool):
                if tru, then an unmask value doesn't unmask the data, it just leaves it as it is.
        """
        i = -1
        args = len(_inspect_.getargs(func.__code__)[0])
        for r in self.rows():
            i += 1
            r.mask = False
            if args == 2:
                t = func(r[col], r)
            else:
                t = func(r)
            if isinstance(t, (bool, np.bool_)):
                if t ^ invert:
                    self.data[i] = ma.masked
                elif not cumulative:
                    self.data[i] = self._data.data[i]
            else:
                for j in range(min(len(t), np.shape(self.data)[1])):
                    if t[j] ^ invert:
                        self.data[i, j] = ma.masked
                    elif not cumulative:
                        self.data[i, j] = self.data.data[i, j]

    def _push_mask(self, mask=None):
        """Copy the current data mask to a temporary store and replace it with a new mask if supplied.

        Args:
            mask (:py:class:numpy.array of bool or bool or None):
                The new data mask to apply (defaults to None = unmask the data

        Returns:
            Nothing
        """
        self._masks.append(self.mask)
        if mask is None:
            self.data.mask = False
        else:
            self.mask = mask

    def _pop_mask(self):
        """Replace the mask on the data with the last one stored by _push_mask().

        Returns:
            Nothing
        """
        self.mask = False
        self.mask = self._masks.pop()  # pylint: disable=E0203
        if not self._masks:  # pylint: disable=E0203
            self._masks = [False]

    def _raise_type_error(self, k):
        """Raise a type error when setting an attribute k."""
        if isinstance(self._public_attrs[k], tuple):
            typ = "one of " + ",".join([str(type(t)) for t in self._public_attrs[k]])
        else:
            typ = f"a {type(self._public_attrs[k])}"
        raise TypeError(f"{k} should be {typ}")


[docs]
    @classmethod
    def load(cls, *args, **kwargs):
        """Create a new :py:class:`Data` from a file on disc guessing a better subclass if necessary.

        Args:
            filename (string or None):
                path to file to load

        Keyword Arguments:
            auto_load (bool):
                If True (default) then the load routine tries all the subclasses of :py:class:`Data` in turn to
                load the file
            filetype (:py:class:`Data`, str):
                If not none then tries using filetype as the loader.
            loaded_class (bool):
                If True, the return object is kept as the class that managed to load it, otherwise it is copied into a
                :py:class:`Stoner.Data` object. (Default False)

        Returns:
            (Data):
                A new instance of :py:class:`Stoner.Data` or a s subclass of :py:class:`Stoner.Data` if
                *loaded_class* is True.
        Note:
            If *filetype* is a string, then it is first tried as an exact match to a subclass name, otherwise it
            is used as a partial match and the first class in priority order is that matches is used.

            Some subclasses can be found in the :py:mod:`Stoner.formats` package.

            Each subclass is scanned in turn for a class attribute priority which governs the order in which they
            are tried. Subclasses which can make an early positive determination that a file has the correct format
            can have higher priority levels. Classes should return a suitable exception if they fail to load the file.

            If no class can load a file successfully then a StonerUnrecognisedFormat exception is raised.
        """
        filename, args, kwargs = get_filename(args, kwargs)
        debug = kwargs.pop("debug", False)
        filetype = kwargs.pop("filetype", None)
        auto_load = kwargs.pop("auto_load", filetype is None)
        loaded_class = kwargs.pop("loaded_class", False)
        if (
            isinstance(filename, (str, pathlib.Path))
            and urllib.parse.urlparse(str(filename)).scheme not in URL_SCHEMES
        ):
            filename, filetype = get_file_name_type(filename, filetype, Data)
        if filename is None or not filename:
            filename = file_dialog("r", filename, "Data")
        elif not auto_load and not filetype:
            raise StonerLoadError("Cannot read data from non-path like filenames !")
        if auto_load:  # We're going to try every subclass we canA
            ret = auto_load_classes(filename, "Data", debug=debug, args=args, kwargs=kwargs)
            if not isinstance(ret, Data):  # autoload returned something that wasn't a data file!
                return ret
        else:
            loader = get_loader(filetype)
            try:
                ret = loader(make_Data(), filename, *args, **kwargs)
            except StonerLoadError as err:
                raise ValueError(f"Unable to load {filename}") from err

        for k, i in kwargs.items():
            if not callable(getattr(ret, k, lambda x: False)):
                setattr(ret, k, i)
        ret._kwargs = kwargs
        filetype = ret.__class__.__name__
        if loaded_class:
            datafile = ret
        else:
            datafile = make_Data()
            datafile._public_attrs.update(ret._public_attrs)
            copy_into(ret, datafile)
            datafile.filetype = filetype
        return datafile



[docs]
    def save(self, filename=None, as_loaded=None, filetype=False, **kwargs):
        """Save a string representation of the current self object into the file 'filename'.

        Args:
            self (Data):
                Data object to work with if not being used as a bound method.

        Keyword Arguments:
            filename (string, bool or None):
                Filename to save data as, if this is None then the current filename for the object is used. If this
                is not set, then then a file dialog is used. If filename is False then a file dialog is forced.
            as_loaded (bool,str):
                If True, then the *Loaded as* key is inspected to see what the original class of the self was
                and then this class' save method is used to save the data. If a str then
                the keyword value is interpreted as the name of a subclass of the the current self.
            filetype (bool):
                Fallback is as_loaded is not provided.
            **kwargs:
                Other keyword arguments are ignored.

        Returns:
            self:
                The current :py:class:`self` object
        """
        as_loaded = filetype if as_loaded is None else as_loaded
        if filename is None:
            filename = self.filename
        if filename is None or (isinstance(filename, bool) and not filename):
            # now go and ask for one
            filename = file_dialog("w", self.filename, "Data")
            if not filename:
                raise RuntimeError("Cannot get filename to save")
        match as_loaded:
            case False:
                saver = best_saver(filename, name=None, what="Data")
                ret = saver(self, filename)
                self.filename = ret.filename
                return self
            case True:
                as_loaded = self.get("Loaded as", "self")
            case str():
                pass
            case _:
                raise TypeError("Unable to use loadtype to work out best saving routine.")
        saver = best_saver(filename, name=as_loaded, what="Data")
        ret = saver(self, filename)
        self.filename = ret.filename
        return self