Source code for Stoner.Image.folders

# -*- coding: utf-8 -*-
"""Implements a baseFolder type structure for working with collections of images."""
__all__ = ["ImageFolderMixin", "ImageFolder"]
from warnings import warn
from importlib import import_module
from os import path
from json import loads, dumps
from copy import deepcopy, copy

import numpy as np
from matplotlib.pyplot import figure, Figure, subplot
from PIL.TiffImagePlugin import ImageFileDirectory_v2
from PIL import Image

from .core import ImageArray
from ..Folders import DiskBasedFolderMixin, baseFolder
from ..compat import string_types, int_types
from . import ImageFile


[docs]class ImageFolderMixin: """Mixin to provide a folder object for images. ImageFolderMixin is designed to behave pretty much like DataFolder but with functions and loaders appropriate for image based files. Attributes: type (:py:class:`Stoner.Image.core.ImageArray`): the type ob object to store in the folder (defaults to :py:class:`Stoner.Cire.Data`) extra_args (dict): Extra arguments to use when instantiatoing the contents of the folder from a file on disk. pattern (str or regexp): A filename globbing pattern that matches the contents of the folder. If a regular expression is provided then any named groups are used to construct additional metadata entryies from the filename. Default is *.* to match all files with an extension. read_means (bool): If true, additional metadata keys are added that return the mean value of each column of the data. This can hep in grouping files where one column of data contains a constant value for the experimental state. Default is False recursive (bool): Specifies whether to search recursively in a whole directory tree. Default is True. flatten (bool): Specify where to present subdirectories as separate groups in the folder (False) or as a single group (True). Default is False. The :py:meth:`DiskBasedFolderMixin.flatten` method has the equivalent effect and :py:meth:`DiskBasedFolderMixin.unflatten` reverses it. directory (str): The root directory on disc for the folder - by default this is the current working directory. multifile (boo): Whether to select individual files manually that are not (necessarily) in a common directory structure. readlist (bool): Whether to read the directory immediately on creation. Default is True """ _defaults = {"type": ImageArray, "pattern": ["*.png", "*.tiff", "*.jpeg", "*.jpg", "*.tif"]} _no_defaults = ["flat"] @property def size(self): """Return the size of an individual image or False if not all images are the same size.""" if len(self) > 0: shape = self[0].shape else: shape = tuple() for i in self: if i.shape != shape: return False return shape @property def images(self): """Iterate over just the images in the Folder.""" for im in self: if not isinstance(im, np.ndarray): if hasattr(im, "image"): im = im.image else: raise TypeError(f"Cannot represent {type(im)} as an ImageArray.") yield im ######################################################################################################### ##### Folder interface methods ######################################################################################################### def __getter__(self, name, instantiate=True): """Ensure we set the title on the image. Parameters: name (key type): The canonical mapping key to get the dataObject. By default the baseFolder class uses a :py:class:`regexpDict` to store objects in. Keyword Arguments: instantiate (bool): If True (default) then always return a metadataObject. If False, the __getter__ method may return a key that can be used by it later to actually get the metadataObject. If None, then will return whatever is held in the object cache, either instance or name. Returns: (metadataObject): The metadataObject Note: Mainly we call the parent method and then set the title if it's not already set.' """ ret = super().__getter__(name, instantiate) if hasattr(ret, "_title") and ret._title is None: ret._title = name return ret
[docs] def align(self, *args, **kargs): """Align each image in the folder to the reference image. Args: ref (str, int, ImageFile, ImageArray or 2D array): The reference image to align to. If a string or an int, then this is used to lookup the corresponding member of the ImageFolder which is then used. ImageFiles, ImageArrays and 2D arrays are used directly as reference images. Keyword Arguments: method (str): The method is passed to the :py:class:`Stone.Image.ImageArray.align` method to control how the image alignment is done. By default the 'Scharr' method is used. box (int, float, tuple of ints or floats): Specifies a subset of the images to be used to calculate the alignment with. scale (int): Magnification factor to scale the image by before doing the alignment for better sub=pixel alignments. Returns: The aligned ImageFolder. """ if len(args) == 1: ref = args[0] elif len(args) == 0: ref = 0 else: raise ValueError( f"{type(self).__name__}.align only takes zero or one positional arguments not {len(args)}!" ) # Get me reference data if isinstance(ref, (string_types, int_types)): ref_data = self.__getter__(ref, instantiate=True) if isinstance(ref_data, ImageFile): ref_data = ref_data.image elif isinstance(ref, ImageFile): ref_data = ref.image.view(ImageArray) elif isinstance(ref, np.ndarray) and ref.ndim == 2: ref_data = ref.view(ImageArray) else: try: ref_data = np.array(ref).view(ImageArray) if ref_data.ndim != 2: raise TypeError() except (TypeError, ValueError) as err: raise TypeError(f"Cannot interpret {type(ref)} as reference image data.") from err # Call align on each object self.each.align(ref_data, **kargs) limits = self.metadata.slice("translation_limits", output="array") stack_limits = np.zeros(4) stack_limits[::2] = limits.max(axis=0)[::2] stack_limits[1::2] = limits.min(axis=0)[1::2] self.metadata["translation_limits"] = tuple(stack_limits) stack_limits[::2] = np.ceil(stack_limits)[::2] stack_limits[1::2] = np.floor(stack_limits)[1::2] self.metadata["align_box"] = tuple(stack_limits.astype(int)) return self
[docs] def apply_all(self, func, *args, **kargs): """Apply function to all images in the stack. Args: func(string or callable): if string it must be a function reachable by ImageArray quiet(bool): if False print '.' for every iteration Note: Further args, kargs are passed through to the function """ warn("apply_all is deprecated and will be removed in a future version. Use ImageFolder.each() instead") return self.each(func, *args, **kargs)
[docs] def average(self, weights=None, _box=False, _metadata="first"): """Get an array of average pixel values for the stack. Pass through to numpy average Keyword Arguments: _box (crop box): Specifies the region of the array to be averaged. Default - entire image _metadata (str): Specifies how to generate metadata for the averaged image. - "first": Just ise the first image's metadata - "common": Find the common metadata across all images - "none': no metadata from images. Returns: average(ImageArray): average values """ if not self.size: raise RuntimeError("Cannot average Imagefolder if images have different sizes") if hasattr(self, "_stack"): stack = self._stack.view(np.ndarray) axis = -1 else: stack = np.stack(list(self.images), axis=0) axis = 0 average = np.average(stack, axis=axis, weights=weights) ret = average.view(ImageArray) if _metadata == "common": ret.metadata = self.metadata.common_metadata elif _metadata == "first": ret.metadata = deepcopy(self[0].metadata) return self._type(ret[ret._box(_box)])
[docs] def loadgroup(self): """Load all files from this group into memory.""" for _ in self: pass
[docs] def as_stack(self): """Return a ImageStack of the images in the current group.""" stack = import_module(".stack", "Stoner.Image") k = stack.ImageStack(self) return k
[docs] @classmethod def from_tiff(cls, filename, **kargs): """Create a new ImageArray from a tiff file.""" self = cls(**kargs) with Image.open(filename, "r") as img: tags = img.tag_v2 if 270 in tags: try: userdata = loads(tags[270]) typ = userdata.get("type", cls.__name__) mod = userdata.get("module", cls.__module__) layout = userdata.get("layout", (0, {})) mod = import_module(mod) typ = getattr(mod, typ) if not issubclass(typ, ImageFolderMixin): raise TypeError( f"Bad type in Tiff file {typ.__name__} is not a subclass of Stoner.ImageFolder" ) metadata = userdata.get("metadata", []) except (TypeError, ValueError, IOError): metadata = [] else: raise TypeError("Cannot load as an ImageFolder due to lack of description tag") imglist = [] for ix, md in enumerate(metadata): img.seek(ix) image = np.asarray(img) if image.ndim == 3: if image.shape[2] < 4: # Need to add a dummy alpha channel image = np.append(np.zeros_like(image[:, :, 0]), axis=2) image = image.view(dtype=np.uint32).reshape(image.shape[:-1]) if isinstance(self.type, np.ndarray): image = image.view(self.type) else: image = self.type(image) image.metadata.import_all(md) imglist.append(image) self._marshall(layout=layout, data=imglist) return self
[docs] def mask_select(self): """Run the ImageFile.mask.select() on each image.""" sel = [] for img in self: img.mask.select(_selection=sel)
[docs] def mean(self, _box=False, _metadata="first"): """Calculate the mean value of all the images in the stack. Keyword Arguments: _box (crop box): Specifies the region of the array to be averaged. Default - entire image _metadata (str): Specifies how to generate metadata for the averaged image. - "first": Just ise the first image's metadata - "common": Find the common metadata across all images - "none': no metadata from images. Actually a synonym for self.average with not weights """ return self.average(_box=_box, _metadata=_metadata)
[docs] def montage(self, *args, **kargs): """Call the plot method for each metadataObject, but switching to a subplot each time. Args: args: Positional arguments to pass through to the :py:meth:`Stoner.plot.PlotMixin.plot` call. kargs: Keyword arguments to pass through to the :py:meth:`Stoner.plot.PlotMixin.plot` call. Keyword Arguments: extra (callable(i,j,d)): A callable that can carry out additional processing per plot after the plot is done figsize(tuple(x,y)): Size of the figure to create dpi(float): dots per inch on the figure edgecolor,facecolor(matplotlib colour): figure edge and frame colours. frameon (bool): Turns figure frames on or off FigureClass(class): Passed to matplotlib figure call. plots_per_page(int): maximum number of plots per figure. Returns: A list of :py:class:`matplotlib.pyplot.Axes` instances. Notes: If the underlying type of the :py:class:`Stoner.Core.metadataObject` instances in the :py:class:`PlotFolder` lacks a **plot** method, then the instances are converted to :py:class:`Stoner.Core.Data`. Each plot is generated as sub-plot on a page. The number of rows and columns of subplots is computed from the aspect ratio of the figure and the number of files in the :py:class:`PlotFolder`. """ plts = kargs.pop("plots_per_page", getattr(self, "plots_per_page", len(self))) plts = min(plts, len(self)) extra = kargs.pop("extra", lambda i, j, d: None) fig_num = kargs.pop("figure", getattr(self, "_figure", None)) if isinstance(fig_num, Figure): kargs.setdefault("figsize", fig_num.get_size_inches()) kargs.setdefault("facecolor", fig_num.get_facecolor()) kargs.setdefault("edgecolor", fig_num.get_edgecolor()) kargs.setdefault("frameon", fig_num.get_frameon()) kargs.setdefault("FigureClass", fig_num.__class__) fig_num = fig_num.number fig_args = getattr(self, "_fig_args", []) fig_kargs = getattr(self, "_fig_kargs", {"layout": "constrained"}) for arg in ("figsize", "dpi", "facecolor", "edgecolor", "frameon", "FigureClass"): if arg in kargs: fig_kargs[arg] = kargs.pop(arg) if fig_num is None: fig = figure(*fig_args, **fig_kargs) else: fig = figure(fig_num, **fig_kargs) w, h = fig.get_size_inches() plt_x = int(np.floor(np.sqrt(plts) * w / h)) plt_y = int(np.ceil(plts / plt_x)) kargs["figure"] = fig ret = [] j = 0 fignum = fig.number for i, d in enumerate(self): plt_kargs = copy(kargs) if i % plts == 0 and i != 0: fig = figure(*fig_args, **fig_kargs) fignum = fig.number j = 1 else: j += 1 fig = figure(fignum) ax = subplot(plt_y, plt_x, j) plt_kargs["figure"] = fig plt_kargs["ax"] = ax if "title" in kargs: if isinstance(kargs["title"], str): plt_kargs["title"] = kargs["title"].format(**d) elif callable(kargs["title"]): plt_kargs["title"] = kargs["title"](d) ret.append(d.imshow(*args, **plt_kargs)) extra(i, j, d) return ret
[docs] def stddev(self, weights=None, _box=False, _metadata="first"): """Calculate weighted standard deviation for stack. Keyword Arguments: _box (crop box): Specifies the region of the array to be averaged. Default - entire image _metadata (str): Specifies how to generate metadata for the averaged image. - "first": Just ise the first image's metadata - "common": Find the common metadata across all images - "none': no metadata from images. This is a biased standard deviation, may not be appropriate for small sample sizes """ if weights is None: # shortcircuit if hasattr(self, "_stack"): sumsqdev = np.std(self._stack.view(np.ndarray), axis=-1) else: sumsqdev = np.stack(list(self.images), axis=0).std(axis=0) else: avs = self.average(weights=weights) if not isinstance(avs, np.ndarray) and hasattr(avs, "image"): avs = avs.image sumsqdev = np.zeros_like(avs) for ix, img in enumerate(self.images): sumsqdev += weights[ix] * (img - avs) ** 2 sumsqdev = np.sqrt(sumsqdev) / np.sum(weights, axis=0) ret = sumsqdev.view(ImageArray) ret.metadata = self.metadata.common_metadata return self._type(ret[ret._box(_box)])
[docs] def stderr(self, weights=None, _box=False, _metadata="first"): """Calculate standard error in the stack average. Keyword Arguments: _box (crop box): Specifies the region of the array to be averaged. Default - entire image _metadata (str): Specifies how to generate metadata for the averaged image. - "first": Just ise the first image's metadata - "common": Find the common metadata across all images - "none': no metadata from images. """ serr = self.stddev(weights=weights, _box=_box, _metadata=_metadata) / np.sqrt(len(self)) return serr
[docs] def to_tiff(self, filename): """Save the ImageArray as a tiff image with metadata. Args: filename (str): Filename to save file as. Note: PIL can save in modes "L" (8bit unsigned int), "I" (32bit signed int), or "F" (32bit signed float). In general max info is preserved for "F" type so if forcetype is not specified then this is the default. For boolean type data mode "L" will suffice and this is chosen in all cases. The type name is added as a string to the metadata before saving. """ metadata_export = [] imlist = [] for d in self._marshall(): dtype = np.dtype(d.dtype).name # string representation of dtype we can save d["ImageArray.dtype"] = dtype # add the dtype to the metadata for saving. metadata_export.append(d.metadata.export_all()) if d.dtype.kind == "b": # boolean we're not going to lose data by saving as unsigned int imlist.append(Image.fromarray(d.image, mode="L")) else: try: imlist.append(Image.fromarray(d.image)) except TypeError: imlist.append(Image.fromarray(d.image.astype("float32"))) ifd = ImageFileDirectory_v2() ifd[270] = dumps( { "type": type(self).__name__, "module": type(self).__module__, "layout": self.layout, "metadata": metadata_export, } ) ext = path.splitext(filename)[1] if ext in [".tif", ".tiff"]: # ensure extension is preserved in save pass else: # default to tiff ext = ".tiff" tiffname = path.splitext(filename)[0] + ext imlist[0].save(tiffname, save_all=True, append_images=imlist[1:], tiffinfo=ifd) return self
[docs]class ImageFolder(ImageFolderMixin, DiskBasedFolderMixin, baseFolder): """Folder object for images. ImageFolder is designed to behave pretty much like DataFolder but with functions and loaders appropriate for image based files. Attributes: type (:py:class:`Stoner.Image.core.ImageArray`): the type ob object to store in the folder (defaults to :py:class:`Stoner.Cire.Data`) extra_args (dict): Extra arguments to use when instantiatoing the contents of the folder from a file on disk. pattern (str or regexp): A filename globbing pattern that matches the contents of the folder. If a regular expression is provided then any named groups are used to construct additional metadata entryies from the filename. Default is *.* to match all files with an extension. read_means (bool): If true, additional metadata keys are added that return the mean value of each column of the data. This can hep in grouping files where one column of data contains a constant value for the experimental state. Default is False recursive (bool): Specifies whether to search recursively in a whole directory tree. Default is True. flatten (bool): Specify where to present subdirectories as separate groups in the folder (False) or as a single group (True). Default is False. The :py:meth:`DiskBasedFolderMixin.flatten` method has the equivalent effect and :py:meth:`DiskBasedFolderMixin.unflatten` reverses it. directory (str): The root directory on disc for the folder - by default this is the current working directory. multifile (boo): Whether to select individual files manually that are not (necessarily) in a common directory structure. readlist (bool): Whether to read the directory immediately on creation. Default is True """