Source code for stsci.imagestats

"""
Compute desired statistics values for input array objects.
"""

import time

import numpy as np

from ._version import version as __version__  # noqa: F401
from .computeMean import computeMean
from .histogram1d import histogram1d

__all__ = ["ImageStats", "computeMean", "histogram1d"]


[docs] class ImageStats: """ Class to compute desired statistics from array objects. Examples -------- This class can can be instantiated using the following syntax:: >>> import stsci.imagestats as imagestats >>> i = imagestats.ImageStats(image, fields="npix,min,max,mean,stddev", nclip=3, lsig=3.0, usig=3.0, binwidth=0.1 ) >>> i.printStats() >>> i.mean The statistical quantities specified by the parameter *fields* are computed and printed for the input *image* array. The results are available as attributes of the class object as well. Parameters ---------- image : str input image data array. fields : str comma-separated list of values to be computed. The following are the available fields: ====== ====== ====== ====== image image data array npix the number of pixels used to do the statistics mean the mean of the pixel distribution midpt estimate of the median of the pixel distribution mode the mode of the pixel distribution stddev the standard deviation of the pixel distribution min the minimum pixel value max the maximum pixel value ====== ====== **WARNING** Only those fields specified upon instantiation will be computed and available as an output value. lower : float Lowest valid value in the input array to be used for computing the statistical values upper : float Largest valid value in the input array to be used in computing the statistical values nclip : int Number of clipping iterations to apply in computing the results lsig : float Lower sigma clipping limit (in sigma) usig : float Upper sigma clipping limit (in sigma) binwidth : float Width of bins (in sigma) to use in generating histograms for computing median-related values Notes ----- The mean, standard deviation, min and max are computed in a single pass through the image using the expressions listed below. Only the quantities selected by the fields parameter are actually computed. :: mean = sum (x1,...,xN) / N y = x - mean variance = sum (y1 ** 2,...,yN ** 2) / (N-1) stddev = sqrt (variance) The midpoint and mode are computed in two passes through the image. In the first pass the standard deviation of the pixels is calculated and used with the *binwidth* parameter to compute the resolution of the data histogram. The midpoint is estimated by integrating the histogram and computing by interpolation the data value at which exactly half the pixels are below that data value and half are above it. The mode is computed by locating the maximum of the data histogram and fitting the peak by parabolic interpolation. **Warning** This data will be promoted down to float32 if provided as 64-bit datatype. """ def __init__( self, image, fields="npix,min,max,mean,stddev", lower=None, upper=None, nclip=0, lsig=3.0, usig=3.0, binwidth=0.1, ): # Initialize the start time of the program self.startTime = time.time() self._hist = None image = np.asanyarray(image) if image.size == 0: raise ValueError("Not enough data points to compute statistics.") # Input Value if image.dtype != np.float32: # Warning: Input array is being downcast to a float32 array image = image.astype(np.float32) if nclip < 0: raise ValueError("'nclip' must be a non-negative integer.") if lsig <= 0.0: raise ValueError("'lsig' must be a positive number.") if usig <= 0.0: raise ValueError("'usig' must be a positive number.") if binwidth <= 0.0: raise ValueError("'binwidth' must be a positive number.") self.image = image self.nclip = nclip self.lsig = lsig self.usig = usig self.binwidth = binwidth self.fields = fields.lower() # Initialize some return value attributes self.npix = None self.stddev = None self.mean = None self.mode = None self.bins = None self.median = None # numpy computed median with clipping self.midpt = None # IRAF-based pseudo-median using bins # Compute Global minimum and maximum self.min = np.minimum.reduce(np.ravel(image)) self.max = np.maximum.reduce(np.ravel(image)) # Apply initial mask to data: upper and lower limits if lower is None: lower = self.min elif lower > self.max: raise ValueError( "Lower data cutoff is larger than maximum pixel value.\n" "Not enough data points to compute statistics." ) elif upper is not None and lower > upper: raise ValueError("Lower data cutoff must be smaller than upper cutoff limit.") if upper is None: upper = self.max elif upper < self.min: raise ValueError( "Upper data cutoff is smaller than minimum pixel value.\n" "Not enough data points to compute statistics." ) self.lower = lower self.upper = upper # Compute the image statistics self._computeStats() # Initialize the end time of the program self.stopTime = time.time() self.deltaTime = self.stopTime - self.startTime def _error_no_valid_pixels(self, clipiter, minval, maxval, minclip, maxclip): errormsg = "\n##############################################\n" errormsg += "# #\n" errormsg += "# ERROR: #\n" errormsg += "# Unable to compute image statistics. No #\n" errormsg += "# valid pixels exist within the defined #\n" errormsg += "# pixel value range. #\n" errormsg += "# #\n" errormsg += " Image MIN pixel value: " + str(minval) + "\n" errormsg += " Image MAX pixel value: " + str(maxval) + "\n\n" errormsg += "# Current Clipping Range #\n" errormsg += " for iteration " + str(clipiter) + "\n" errormsg += " Excluding pixel values above: " + str(maxclip) + "\n" errormsg += " Excluding pixel values below: " + str(minclip) + "\n" errormsg += "# #\n" errormsg += "##############################################\n" return errormsg def _computeStats(self): """Compute all the basic statistics from the array object.""" # Initialize the local max and min _clipmin = self.lower _clipmax = self.upper # Compute the clipped mean iterating the user specified number of iterations for iter in range(self.nclip + 1): try: _npix, _mean, _stddev, _min, _max = computeMean(self.image, _clipmin, _clipmax) except Exception: raise RuntimeError( "An error processing the array object information occurred " "in the computeMean module of imagestats." ) if _npix <= 0: # Compute Global minimum and maximum errormsg = self._error_no_valid_pixels(iter, self.min, self.max, _clipmin, _clipmax) print(errormsg) raise ValueError("Not enough data points to compute statistics.") if iter < self.nclip: # Re-compute limits for iterations _clipmin = max(self.lower, _mean - self.lsig * _stddev) _clipmax = min(self.upper, _mean + self.usig * _stddev) if self.fields.find("median") != -1: # Use the clip range to limit the data before computing # the median value using numpy if self.nclip > 0: _image = self.image[(self.image <= _clipmax) & (self.image >= _clipmin)] else: _image = self.image self.median = np.median(_image) # clean-up intermediate product since it is no longer needed del _image if (self.fields.find("mode") != -1) or (self.fields.find("midpt") != -1): # Populate the histogram _hwidth = self.binwidth * _stddev _drange = _max - _min _minfloatval = 10.0 * np.finfo(dtype=np.float32).eps if _hwidth < _minfloatval or abs(_drange) < _minfloatval or _hwidth > _drange: _nbins = 1 _dz = _drange print("! WARNING: Clipped data falls within 1 histogram bin") else: _nbins = int((_max - _min) / _hwidth) + 1 _dz = float(_max - _min) / float(_nbins - 1) _hist = histogram1d(self.image, _nbins, _dz, _min) self._hist = _hist _bins = _hist.histogram if self.fields.find("mode") != -1: # Compute the mode, taking into account special cases if _nbins == 1: _mode = _min + 0.5 * _hwidth elif _nbins == 2: if _bins[0] > _bins[1]: _mode = _min + 0.5 * _hwidth elif _bins[0] < _bins[1]: _mode = _min + 1.5 * _hwidth else: _mode = _min + _hwidth else: # TODO: perform a better analysis and pick the middle when # there are multiple picks: _peakindex = np.where(_bins == np.maximum.reduce(_bins))[0].tolist()[0] if _peakindex == 0: _mode = _min + 0.5 * _hwidth elif _peakindex == (_nbins - 1): _mode = _min + (_nbins - 0.5) * _hwidth else: _dh1 = _bins[_peakindex] - _bins[_peakindex - 1] _dh2 = _bins[_peakindex] - _bins[_peakindex + 1] _denom = _dh1 + _dh2 if _denom == 0: _mode = _min + (_peakindex + 0.5) * _hwidth else: _mode = _peakindex + 1 + (0.5 * (int(_dh1) - int(_dh2)) / _denom) _mode = _min + ((_mode - 0.5) * _hwidth) # Return the mode self.mode = _mode if self.fields.find("midpt") != -1: # Compute a pseudo-Median Value using IRAF's algorithm if _bins.size > 1: _binSum = np.cumsum(_bins).astype(np.float32) _binSum = _binSum / _binSum[-1] _lo = np.where(_binSum >= 0.5)[0][0] _hi = _lo + 1 _h1 = _min + _lo * _hwidth if _lo == 0: _hdiff = _binSum[_hi - 1] else: _hdiff = _binSum[_hi - 1] - _binSum[_lo - 1] if _hdiff == 0: _midpt = _h1 elif _lo == 0: _midpt = _h1 + 0.5 / _hdiff * _hwidth else: _midpt = _h1 + ((0.5 - _binSum[_lo - 1]) / _hdiff * _hwidth) self.midpt = _midpt else: self.midpt = _bins[0] # These values will only be returned if the histogram is computed. self.hmin = _min + 0.5 * _hwidth self.hwidth = _hwidth self.histogram = _bins # Return values self.stddev = _stddev self.mean = _mean self.npix = _npix self.min = _min self.max = _max
[docs] def getCenters(self): """Compute the array of bin center positions.""" if self._hist is not None: return self._hist.centers
[docs] def printStats(self): """Print the requested statistics values for those fields specified on input. """ print("--- Imagestats Results ---") if self.fields.find("npix") != -1: print("Number of pixels : ", self.npix) if self.fields.find("min") != -1: print("Minimum value : ", self.min) if self.fields.find("max") != -1: print("Maximum value : ", self.max) if self.fields.find("stddev") != -1: print("Standard Deviation: ", self.stddev) if self.fields.find("mean") != -1: print("Mean : ", self.mean) if self.fields.find("mode") != -1: print("Mode : ", self.mode) if self.fields.find("median") != -1: print("Median : ", self.median) if self.fields.find("midpt") != -1: print("Midpt : ", self.midpt)