#!/usr/bin/env python
u"""
fit.py
Written by Tyler Sutterley (05/2024)
Utilities for creating models from surface elevation data
PYTHON DEPENDENCIES:
numpy: Scientific Computing Tools For Python (https://numpy.org)
scipy: Scientific Tools for Python (https://docs.scipy.org/doc/)
UPDATE HISTORY:
Updated 05/2024: add function to build the complete design matrix
add function to build the constraints for the least-squares fit
add function to validate the columns in the design matrix
add functions to give the number of spatial and temporal terms
optionally use a bounded least-squares fit for the model runs
add grounding zone, elastic bending and breakpoint fits
Updated 04/2024: rewritten for python3 and added function docstrings
add optional TERMS argument to augment the design matrix
add spline design matrix option for time-variable fit
Updated 09/2017: using rcond=-1 in numpy least-squares algorithms
use median statistics for reducing to valid points
Written 03/2014
"""
from __future__ import print_function, annotations
import numpy as np
import scipy.stats
import scipy.optimize
from scipy.interpolate import BSpline
# PURPOSE: iteratively fit a polynomial surface to the elevation data to
# reduce to within a valid window
[docs]
def iterative_surface(t_in, x_in, y_in, d_in, TERMS=[], **kwargs):
"""
Iteratively fit a polynomial surface to the elevation data to reduce to
within a valid surface window [Schenk2012]_ [Smith2019]_ [Sutterley2014]_
Parameters
----------
t_in: np.ndarray
input time array
x_in: np.ndarray
x-coordinate array
y_in: np.ndarray
y-coordinate array
d_in: np.ndarray
input data array
FIT_TYPE: str
type of time-variable polynomial fit to apply
- ``'polynomial'``
- ``'chebyshev'``
- ``'spline'``
ITERATIONS: int, default 25
maximum number of iterations to use in fit
ORDER_TIME: int
maximum polynomial order in time-variable fit
ORDER_SPACE: int
maximum polynomial order in spatial fit
TERMS: list
list of extra terms
BOUNDED: bool
use bounded least-squares fit
STDEV: float
standard deviation of output error
CONF: float
confidence interval of output error
AICc: bool
use second order AIC
kwargs: dict
keyword arguments for the fit type
Returns
-------
beta: np.ndarray
regressed coefficients array
error: np.ndarray
regression fit error for each coefficient
data: np.ndarray
modeled elevation at centroid
model: np.ndarray
modeled surface time-series at input points
std_error: np.ndarray
standard error for each coefficient
R2: float
coefficient of determination (r^2)
R2Adj: float
adjusted r^2 value
MSE: float
mean square error
WSSE: float
weighted sum of squares error
NRMSE: float
normalized root mean square error
AIC: float
Akaike information criterion (Second-Order, AICc)
BIC: float
Bayesian information criterion (Schwarz criterion)
LOGLIK: float
log likelihood
residual: np.ndarray
model residual
DOF: int
degrees of freedom
count: int
final number of points used in the fit
indices: np.ndarray
indices of valid points
iterations: int
number of iterations performed
window: float
final window size for the fit
RDE: float
robust dispersion estimate
centroid: dict
centroid point used in the fit
References
----------
.. [Schenk2012] T. Schenk and B. M. Csatho, "A New Methodology for Detecting
Ice Sheet Surface Elevation Changes From Laser Altimetry Data",
*IEEE Transactions on Geoscience and Remote Sensing*, 50(9),
3302--3316, (2012). `doi:10.1109/TGRS.2011.2182357
<https://doi.org/10.1109/TGRS.2011.2182357>`_
.. [Smith2019] B. E. Smith el al., "Land ice height-retrieval
algorithm for NASA's ICESat-2 photon-counting laser altimeter",
*Remote Sensing of Environment*, 233, 111352, (2019).
`doi:10.1016/j.rse.2019.111352
<https://doi.org/10.1016/j.rse.2019.111352>`_
.. [Sutterley2014] T. C. Sutterley, I. Velicogna, E. J. Rignot, J. Mouginot,
T. Flament, M. R. van den Broeke, J. M. van Wessem, C. H. Reijmer,
"Mass loss of the Amundsen Sea Embayment of West Antarctica from
four independent techniques", *Geophysical Research Letters*,
41(23), 8421--8428, (2014). `doi:10.1002/2014GL061940
<https://doi.org/10.1002/2014GL061940>`_
"""
kwargs.setdefault('FIT_TYPE', 'polynomial')
kwargs.setdefault('ITERATIONS', 25)
kwargs.setdefault('MINIMUM_WINDOW', 1.0)
kwargs.setdefault('MAXIMUM_RDE', 20.0)
kwargs.setdefault('ORDER_TIME', 3)
kwargs.setdefault('ORDER_SPACE', 3)
kwargs.setdefault('KNOTS', [])
kwargs.setdefault('THRESHOLD', 10)
# number of points for fit
n_max = len(d_in)
# total number of spatial and temporal terms
n_space = _spatial_terms(**kwargs)
n_time = _temporal_terms(**kwargs)
# total number of terms in fit
n_terms = n_space + n_time + len(TERMS)
# threshold for minimum number of points for fit
# run only if number of points is above number of terms
FLAG1 = ((n_max - n_terms) > kwargs['THRESHOLD'])
# set initial window to the full data range
window = d_in.max() - d_in.min()
window_p1 = np.copy(window)
h_min_win = np.copy(kwargs['MINIMUM_WINDOW'])
# initial indices for reducing to window
filt = np.arange(n_max)
filt_p1 = np.copy(filt)
filt_p2 = -1*np.ones_like(filt)
# indices of valid points
ind = np.arange(n_max)
if FLAG1:
# save initial indices
indices = ind.copy()
# run fit program for fit types
s = polynomial_surface(t_in, x_in, y_in, d_in, **kwargs)
# number of iterations performed
n_iter = 1
# save beta coefficients
beta_mat = np.copy(s['beta'])
error_mat = np.copy(s['error'])
data = np.copy(s['data'])
model = np.copy(s['model'])
# residuals of model fit
resid = s['residual']
# standard deviation of the residuals
resid_std = np.std(resid)
# standard error
std_error = np.copy(s['std_error'])
# coefficients of determination
rsquare = np.copy(s['R2'])
rsq_adj = np.copy(s['R2Adj'])
# save MSE and DOF for error analysis
MSE = np.copy(s['MSE'])
DOF = np.copy(s['DOF'])
# Root mean square error
RMSE = np.sqrt(s['MSE'])
# Normalized root mean square error
NRMSE = RMSE/(np.max(d_in)-np.min(d_in))
# fit criterion
AIC = np.copy(s['AIC'])
BIC = np.copy(s['BIC'])
log_lik = np.copy(s['LOGLIK'])
# IQR pass: residual-(median value) is within 75% of IQR
# RDE pass: residual-(median value) is within 50% of P84-P16
IQR, RDE, MEDIAN = median_filter(resid)
# checking if any residuals are outside of the window
window = np.max([h_min_win, 6.0*RDE, 0.75*window_p1])
filt, = np.nonzero(np.abs(resid - MEDIAN) <= (window/2.0))
# save iteration of window
window_p1 = np.copy(window)
# run only if number of points is above number of terms
n_rem = np.count_nonzero(np.abs(resid - MEDIAN) <= (window/2.0))
FLAG1 = ((n_rem - n_terms) > kwargs['THRESHOLD'])
# maximum number of iterations to prevent infinite loops
FLAG2 = (n_iter <= kwargs['ITERATIONS'])
# compare indices over two iterations to prevent false stoppages
FLAG3 = (set(filt) != set(filt_p1)) | (set(filt_p1) != set(filt_p2))
# compare robust dispersion estimate with maximum allowable
FLAG4 = (RDE >= kwargs['MAXIMUM_RDE'])
# iterate until there are no additional removed data points
while FLAG1 & FLAG2 & (FLAG3 | FLAG4):
# fit selected data for window
t_filt = t_in[filt]
x_filt = x_in[filt]
y_filt = y_in[filt]
d_filt = d_in[filt]
indices = ind[filt]
# reduce
terms = [t[filt] for t in TERMS]
# run fit program for polynomial type
s = polynomial_surface(t_filt, x_filt, y_filt, d_filt,
TERMS=terms, **kwargs)
# add to number of iterations performed
n_iter += 1
# save model coefficients
beta_mat = np.copy(s['beta'])
error_mat = np.copy(s['error'])
data = np.copy(s['data'])
model = np.copy(s['model'])
# save number of points
n_max = len(d_filt)
# residuals of model fit
resid = s['residual']
# standard deviation of the residuals
resid_std = np.std(resid)
# standard error
std_error = np.copy(s['std_error'])
# coefficients of determination
rsquare = np.copy(s['R2'])
rsq_adj = np.copy(s['R2Adj'])
# save MSE and DOF for error analysis
MSE = np.copy(s['MSE'])
DOF = np.copy(s['DOF'])
# Root mean square error
RMSE = np.sqrt(s['MSE'])
# Normalized root mean square error
NRMSE = RMSE/(np.max(d_filt)-np.min(d_filt))
# fit criterion
AIC = np.copy(s['AIC'])
BIC = np.copy(s['BIC'])
log_lik = np.copy(s['LOGLIK'])
# IQR pass: residual-(median value) is within 75% of IQR
# RDE pass: residual-(median value) is within 50% of P84-P16
IQR, RDE, MEDIAN = median_filter(resid)
# checking if any residuals are outside of the window
window = np.max([h_min_win, 6.0*RDE, 0.75*window_p1])
# filter out using median statistics and refit
filt_p2 = np.copy(filt_p1)
filt_p1 = np.copy(filt)
filt, = np.nonzero(np.abs(resid - MEDIAN) <= (window/2.0))
# save iteration of window
window_p1 = np.copy(window)
# run only if number of points is above number of terms
n_rem = np.count_nonzero(np.abs(resid - MEDIAN) <= (window/2.0))
FLAG1 = ((n_rem - n_terms) > kwargs['THRESHOLD'])
# maximum number of iterations to prevent infinite loops
FLAG2 = (n_iter <= kwargs['ITERATIONS'])
# compare indices over two iterations to prevent false stoppages
FLAG3 = (set(filt) != set(filt_p1)) | (set(filt_p1) != set(filt_p2))
# compare robust dispersion estimate with maximum allowable
FLAG4 = (RDE >= kwargs['MAXIMUM_RDE'])
# return reduced model fit
FLAG3 = (set(filt) == set(filt_p1))
if FLAG1 & FLAG3 & np.logical_not(FLAG4):
return {'beta':beta_mat, 'error':error_mat, 'data':data,
'model':model, 'std_error':std_error, 'R2':rsquare,
'R2Adj':rsq_adj, 'MSE':MSE, 'NRMSE':NRMSE,
'AIC':AIC, 'BIC':BIC, 'LOGLIK':log_lik,
'residual':resid, 'DOF':DOF, 'count':n_max,
'indices':indices, 'iterations':n_iter,
'window':window, 'RDE':RDE, 'centroid':s['centroid']}
else:
raise Exception(f'No valid fit found after {n_iter} iterations')
[docs]
def polynomial_surface(t_in, x_in, y_in, d_in,
STDEV=0,
CONF=0,
AICc=True,
**kwargs
):
"""
Fits a polynomial surface to a set of points [Schenk2012]_ [Sutterley2014]_
Parameters
----------
t_in: np.ndarray
input time array
x_in: np.ndarray
x-coordinate array
y_in: np.ndarray
y-coordinate array
d_in: np.ndarray
input data array
FIT_TYPE: str
type of time-variable polynomial fit to apply
- ``'polynomial'``
- ``'chebyshev'``
- ``'spline'``
ORDER_TIME: int
maximum polynomial order in time-variable fit
ORDER_SPACE: int
maximum polynomial order in spatial fit
KNOTS: list or np.ndarray
Sorted 1D array of knots for time-variable spline fit
TERMS: list
list of extra terms
BOUNDED: bool
use bounded least-squares fit
STDEV: float
standard deviation of output error
CONF: float
confidence interval of output error
AICc: bool
use second order AIC
kwargs: dict
keyword arguments for the fit type
Returns
-------
beta: np.ndarray
regressed coefficients array
error: np.ndarray
regression fit error for each coefficient
data: np.ndarray
modeled elevation at centroid
model: np.ndarray
modeled surface time-series at input points
std_error: np.ndarray
standard error for each coefficient
R2: float
coefficient of determination (r^2)
R2Adj: float
adjusted r^2 value
MSE: float
mean square error
WSSE: float
weighted sum of squares error
NRMSE: float
normalized root mean square error
AIC: float
Akaike information criterion (Second-Order, AICc)
BIC: float
Bayesian information criterion (Schwarz criterion)
LOGLIK: float
log likelihood
residual: np.ndarray
model residual
N: int
number of terms in the model
DOF: int
degrees of freedom
cov_mat: np.ndarray
covariance matrix
centroid: dict
centroid point used in the fit
References
----------
.. [Schenk2012] T. Schenk and B. M. Csatho, "A New Methodology for Detecting
Ice Sheet Surface Elevation Changes From Laser Altimetry Data",
*IEEE Transactions on Geoscience and Remote Sensing*, 50(9),
3302--3316, (2012). `doi:10.1109/TGRS.2011.2182357
<https://doi.org/10.1109/TGRS.2011.2182357>`_
.. [Sutterley2014] T. C. Sutterley, I. Velicogna, E. J. Rignot, J. Mouginot,
T. Flament, M. R. van den Broeke, J. M. van Wessem, C. H. Reijmer,
"Mass loss of the Amundsen Sea Embayment of West Antarctica from
four independent techniques", *Geophysical Research Letters*,
41(23), 8421--8428, (2014). `doi:10.1002/2014GL061940
<https://doi.org/10.1002/2014GL061940>`_
"""
# set default keyword arguments
kwargs.setdefault('FIT_TYPE', 'polynomial')
kwargs.setdefault('ORDER_TIME', 3)
kwargs.setdefault('ORDER_SPACE', 3)
kwargs.setdefault('KNOTS', [])
kwargs.setdefault('BOUNDED', True)
# remove singleton dimensions from input variables
t_in = np.squeeze(t_in)
x_in = np.squeeze(x_in)
y_in = np.squeeze(y_in)
d_in = np.squeeze(d_in)
# check that input dimensions match
assert (len(x_in) == len(t_in)) and (len(y_in) == len(t_in)) and \
(len(d_in) == len(t_in)), 'Input dimensions do not match'
# create design matrix for fit
M, centroid = _build_design_matrix(t_in, x_in, y_in, **kwargs)
# validate the design matrix
DMAT, indices = _validate_design_matrix(M)
# total number of temporal terms
n_time = _temporal_terms(**kwargs)
# total number of terms
n_max, n_total = M.shape
n_max, n_terms = DMAT.shape
# nu = Degrees of Freedom
nu = n_max - n_terms
# build the constraints for the fit
if kwargs['BOUNDED']:
bounds = _build_constraints(t_in, x_in, y_in, d_in,
INDICES=indices, **kwargs)
max_iter = None
else:
bounds = (-np.inf, np.inf)
max_iter = 1
# use linear least-squares (with bounds on the variables)
results = scipy.optimize.lsq_linear(DMAT, d_in,
bounds=bounds, max_iter=max_iter)
beta_mat = np.zeros((n_total))
beta_mat[indices] = np.copy(results['x'])
# estimated mean square error
MSE = np.sum(results['fun']**2)/np.float64(nu)
# Weights are equal
wi = 1.0
# modeled surface time-series
mod = np.dot(DMAT, results['x'])
# modeled data at centroid
data = np.dot(M[:,:n_time], beta_mat[:n_time])
# residual of fit
res = d_in - np.dot(DMAT, results['x'])
# calculating R^2 values
# SStotal = sum((Y-mean(Y))**2)
SStotal = np.dot(np.transpose(d_in[0:n_max] - np.mean(d_in[0:n_max])),
(d_in[0:n_max] - np.mean(d_in[0:n_max])))
# SSerror = sum((Y-X*B)**2)
SSerror = np.dot(np.transpose(d_in[0:n_max] - np.dot(DMAT,results['x'])),
(d_in[0:n_max] - np.dot(DMAT,results['x'])))
# R**2 term = 1- SSerror/SStotal
rsquare = 1.0 - (SSerror/SStotal)
# Adjusted R**2 term: weighted by degrees of freedom
rsq_adj = 1.0 - (SSerror/SStotal)*np.float64((n_max-1.0)/nu)
# Fit Criterion
# number of parameters including the intercept and the variance
K = np.float64(n_terms + 1)
# Log-Likelihood with weights (if unweighted, weight portions == 0)
# log(L) = -0.5*n*log(sigma^2) - 0.5*n*log(2*pi) - 0.5*n
log_lik = 0.5*(np.sum(np.log(wi)) - n_max*(np.log(2.0 * np.pi) + 1.0 -
np.log(n_max) + np.log(np.sum(wi * (res**2)))))
# Aikaike's Information Criterion
AIC = -2.0*log_lik + 2.0*K
if AICc:
# Second-Order AIC correcting for small sample sizes (restricted)
# Burnham and Anderson (2002) advocate use of AICc where
# ratio num/K is small
# A small ratio is defined in the definition at approximately < 40
AIC += (2.0*K*(K+1.0))/(n_max - K - 1.0)
# Bayesian Information Criterion (Schwarz Criterion)
BIC = -2.0*log_lik + np.log(n_max)*K
# Root mean square error
RMSE = np.sqrt(MSE)
# Normalized root mean square error
NRMSE = RMSE/(np.max(d_in[0:n_max]) - np.min(d_in[0:n_max]))
# Covariance Matrix
# Multiplying the design matrix by itself
Hinv = np.linalg.inv(np.dot(np.transpose(DMAT), DMAT))
# Taking the diagonal components of the covariance matrix
hdiag = np.zeros((n_total))
hdiag[indices] = np.diag(Hinv)
# set either the standard deviation or the confidence interval
if (STDEV != 0):
# Setting the standard deviation of the output error
alpha = 1.0 - scipy.special.erf(STDEV/np.sqrt(2.0))
elif (CONF != 0):
# Setting the confidence interval of the output error
alpha = 1.0 - CONF
else:
# Default is 95% confidence interval
alpha = 1.0 - (0.95)
# Student T-Distribution with D.O.F. nu
# t.ppf parallels tinv in matlab
tstar = scipy.stats.t.ppf(1.0-(alpha/2.0),nu)
# beta_err is the error for each coefficient
# beta_err = t(nu,1-alpha/2)*standard error
std_error = np.sqrt(MSE*hdiag)
beta_err = tstar*std_error
# return the modeled surface time-series and the coefficients
return {'beta':beta_mat, 'error':beta_err, 'data':data,
'model':mod, 'std_error':std_error, 'R2':rsquare,
'R2Adj':rsq_adj, 'MSE':MSE, 'NRMSE':NRMSE,
'AIC':AIC, 'BIC':BIC, 'LOGLIK':log_lik,
'residual':res, 'N':n_terms, 'DOF':nu,
'cov_mat':Hinv, 'centroid':centroid}
# Derivation of Sharp Breakpoint Piecewise Regression:
# http://www.esajournals.org/doi/abs/10.1890/02-0472
# y = beta_0 + beta_1*t + e (for x <= alpha)
# y = beta_0 + beta_1*t + beta_2*(t-alpha) + e (for x > alpha)
[docs]
def piecewise_bending(x, y, STEP=1, CONF=None):
"""
Fits a piecewise linear regression to elevation data
to find two sharp breakpoints
Parameters
----------
x: np.ndarray
input x-coordinate array
y: np.ndarray
input y-coordinate array
STEP: int, default 1
step size for regridding the input data
CONF: float or None, default None
confidence interval of output error
Returns
-------
point1: list
first breakpoint and confidence interval
point2: list
second breakpoint and confidence interval
model: np.ndarray
modeled surface from the piecewise fit
"""
# regrid x and y to STEP
XI = x[::STEP]
YI = y[::STEP]
# Creating Design matrix based on chosen input fit_type parameters:
nmax = len(XI)
P_x0 = np.ones((nmax))# Constant Term
P_x1a = XI[0:nmax]# Linear Term 1
# Calculating the number parameters to search
n_param = (nmax**2 - nmax)//2
# R^2 and Log-Likelihood
rsquare_array = np.zeros((n_param))
loglik_array = np.zeros((n_param))
# output cutoff and fit parameters
cutoff_array = np.zeros((n_param,2),dtype=int)
beta_matrix = np.zeros((n_param,4))
# counter variable
c = 0
# SStotal = sum((Y-mean(Y))^2)
SStotal = np.dot(np.transpose(YI - np.mean(YI)),(YI - np.mean(YI)))
# uniform distribution over entire range
for n in range(0,nmax):
# Linear Term 2 (= change from linear term1: trend2 = beta1+beta2)
P_x1b = np.zeros((nmax))
P_x1b[n:nmax] = XI[n:nmax] - XI[n]
for nn in range(n+1,nmax):
# Linear Term 3 (= change from linear term2)
P_x1c = np.zeros((nmax))
P_x1c[nn:nmax] = XI[nn:nmax] - XI[nn]
DMAT = np.transpose([P_x0, P_x1a, P_x1b, P_x1c])
# Calculating Least-Squares Coefficients
# Least-Squares fitting (the [0] denotes coefficients output)
beta_mat = np.linalg.lstsq(DMAT,YI,rcond=-1)[0]
# number of terms in least-squares solution
n_terms = len(beta_mat)
# nu = Degrees of Freedom
# number of measurements-number of parameters
nu = nmax - n_terms
# residual of data-model
residual = YI - np.dot(DMAT,beta_mat)
# CALCULATING R_SQUARE VALUES
# SSerror = sum((Y-X*B)^2)
SSerror = np.dot(np.transpose(residual),residual)
# R^2 term = 1- SSerror/SStotal
rsquare_array[c] = 1 - (SSerror/SStotal)
# Log-Likelihood
loglik_array[c] = 0.5*(-nmax*(np.log(2.0 * np.pi) + 1.0 - \
np.log(nmax) + np.log(np.sum(residual**2))))
# save cutoffs and beta matrix
cutoff_array[c,:] = [n,nn]
beta_matrix[c,:] = beta_mat
# add 1 to counter
c += 1
# find where Log-Likelihood is maximum
ind, = np.nonzero(loglik_array == loglik_array.max())
n,nn = cutoff_array[ind,:][0]
# create matrix of likelihoods
likelihood = np.zeros((nmax,nmax))
likelihood[:,:] = np.nan
likelihood[cutoff_array[:,0],cutoff_array[:,1]] = np.exp(loglik_array) / \
np.sum(np.exp(loglik_array))
# probability distribution functions of each cutoff
PDF1 = np.zeros((nmax))
PDF2 = np.zeros((nmax))
for i in range(nmax):
# PDF for cutoff 1 for all cutoff 2
PDF1[i] = np.nansum(likelihood[i,:])
# PDF for cutoff 2 for all cutoff 1
PDF2[i] = np.nansum(likelihood[:,i])
# calculate confidence intervals
if CONF is not None:
CI1 = _confidence_interval(XI, PDF1/np.sum(PDF1), CONF)
CI2 = _confidence_interval(XI, PDF2/np.sum(PDF2), CONF)
else:
# use a default confidence interval
CI1 = 5e3
CI2 = 5e3
# confidence interval for cutoffs
CMN1,CMX1 = (XI[n]-CI1, XI[n]+CI1)
CMN2,CMX2 = (XI[nn]-CI2, XI[nn]+CI2)
# calculate model using best fit coefficients
P_x0 = np.ones_like(x)
P_x1a = np.copy(x)
P_x1b = np.zeros_like(x)
P_x1c = np.zeros_like(x)
P_x1b[n*STEP:] = x[n*STEP:] - XI[n]
P_x1c[nn*STEP:] = x[nn*STEP:] - XI[nn]
DMAT = np.transpose([P_x0, P_x1a, P_x1b, P_x1c])
beta_mat, = beta_matrix[ind,:]
MODEL = np.dot(DMAT, beta_mat)
# return the cutoff points, their confidence interval and the model
point1 = [XI[n], CMN1, CMX1]
point2 = [XI[nn], CMN2, CMX2]
return (point1, point2, MODEL)
# PURPOSE: run a physical elastic bending model with Levenberg-Marquardt
# D. G. Vaughan, Journal of Geophysical Research Solid Earth, 1995
# A. M. Smith, Journal of Glaciology, 1991
[docs]
def elastic_bending(XI, YI,
METHOD='trf',
GRZ=[0,0,0],
TIDE=[0,0,0],
ORIENTATION=False,
THICKNESS=None,
CONF=0.95,
XOUT=None
):
"""
Fits an elastic bending model to the grounding zone of an
ice shelf [Smith1991]_ [Vaughan1995]_
Parameters
----------
XI: np.ndarray
input x-coordinate array
YI: np.ndarray
input y-coordinate array
METHOD: str
optimization algorithm to use in curve_fit
GRZ: np.ndarray
initial guess for the grounding line location
TIDE: np.ndarray
initial guess for the tidal amplitude
ORIENTATION: bool
reorient input parameters to go from land ice to floating
THICKNESS: np.ndarray or None
initial guess for ice thickness
CONF: float, default 0.95
confidence interval of output error
XOUT: np.ndarray or None
output x-coordinates for model fit
Returns
-------
GZ: np.ndarray
grounding line location and confidence interval
A: np.ndarray
tidal amplitude and confidence interval
E: np.ndarray
effective elastic modulus of ice and confidence interval
T: np.ndarray
ice thickness of ice shelf and confidence interval
dH: np.ndarray
mean height change and confidence interval
MODEL: np.ndarray
modeled surface from the elastic bending model
References
----------
.. [Smith1991] A. M. Smith, "The use of tiltmeters to study the
dynamics of Antarctic ice-shelf grounding lines",
*Journal of Glaciology*, 37(125), 51--58, (1991).
`doi:10.3198/1991JoG37-125-51-59
<https://doi.org/10.3198/1991JoG37-125-51-59>`_
.. [Vaughan1995] D. G. Vaughan, "Tidal flexure at ice shelf margins",
*Journal of Geophysical Research Solid Earth*, 100(B4),
6213--6224, (1995). `doi:10.1029/94JB02467
<https://doi.org/10.1029/94JB02467>`_
"""
# default output x-coordinates for model fit
if XOUT is None:
XOUT = np.copy(XI)
# reorient input parameters to go from land ice to floating
if ORIENTATION:
Xm1 = XI[-1]
GRZ = Xm1 - GRZ
GRZ[1:] = GRZ[:0:-1]
XI = Xm1 - XI[::-1]
YI = YI[::-1]
XOUT = Xm1 - XOUT[::-1]
# calculate thickness mean, min and max
if THICKNESS is not None:
# only use positive thickness values
# ocean points could be negative with tides
ii, = np.nonzero(THICKNESS > 0.0)
MTH = np.mean(THICKNESS[ii])
MNTH = np.min(THICKNESS[ii])
MXTH = np.max(THICKNESS[ii])
else:
MTH = 1000.0
MNTH = 100.0
MXTH = 1900.0
# elastic model parameters
# G0: location of grounding line
# A0: tidal amplitude (values from Padman 2002)
# E0: Effective Elastic modulus of ice [Pa]
# T0: ice thickness of ice shelf [m]
# dH0: mean height change (thinning/thickening)
p0 = [GRZ[0], TIDE[0], 1e9, MTH, 0.0]
# tuple for parameter bounds (lower and upper)
# G0: 95% confidence interval of initial fit
# A0: greater than +/- 2.4m value from Padman (2002)
# E0: Range from Table 1 of Vaughan (1995)
# T0: Range of ice thicknesses from Chuter (2015)
# dH0: mean height change +/- 10 m/yr
bounds = ([GRZ[1], TIDE[1], 8.3e8, MNTH, -10],
[GRZ[2], TIDE[2], 1e10, MXTH, 10])
# optimized curve fit with Levenberg-Marquardt algorithm
popt,pcov = scipy.optimize.curve_fit(_elastic, XI, YI,
p0=p0, bounds=bounds, method=METHOD)
MODEL = _elastic(XOUT, *popt)
# elasticmodel function outputs and 1 standard deviation uncertainties
GZ = np.zeros((2))
A = np.zeros((2))
E = np.zeros((2))
T = np.zeros((2))
dH = np.zeros((2))
GZ[0],A[0],E[0],T[0],dH[0] = popt[:]
# Error analysis
# nu = Degrees of Freedom = number of measurements-number of parameters
nu = len(XI) - len(p0)
# Setting the confidence interval of the output error
alpha = 1.0 - CONF
# Student T-Distribution with D.O.F. nu
# t.ppf parallels tinv in matlab
tstar = scipy.stats.t.ppf(1.0-(alpha/2.0),nu)
# error for each coefficient = t(nu,1-alpha/2)*standard error
perr = np.sqrt(np.diag(pcov))
GZ[1],A[1],E[1],T[1],dH[1] = tstar*perr[:]
# reverse the reorientation
if ORIENTATION:
GZ[0] = Xm1 - GZ[0]
MODEL = MODEL[::-1]
# return the model outputs
return (GZ, A, E, T, dH, MODEL)
# PURPOSE: calculate the interquartile range (Pritchard et al, 2009) and
# robust dispersion estimator (Smith et al, 2017) of the model residuals
[docs]
def _build_design_matrix(t_in, x_in, y_in,
FIT_TYPE='polynomial',
ORDER_SPACE=3,
TERMS=[],
**kwargs,
):
"""
Builds the complete design matrix for the surface fit
Parameters
----------
t_in: np.ndarray
input time array
x_in: np.ndarray
x-coordinate array
y_in: np.ndarray
y-coordinate array
FIT_TYPE: str
type of time-variable polynomial fit to apply
- ``'polynomial'``
- ``'chebyshev'``
- ``'spline'``
ORDER_TIME: int
maximum polynomial order in time-variable fit
ORDER_SPACE: int
maximum polynomial order in spatial fit
KNOTS: list or np.ndarray
Sorted 1D array of knots for time-variable spline fit
TERMS: list
list of extra terms
kwargs: dict
keyword arguments for the fit type
Returns
-------
DMAT: np.ndarray
Design matrix for the fit type
centroid: dict
centroid point of input coordinates
"""
# output design matrix
DMAT = []
# time-variable design matrix
if (FIT_TYPE.lower() == 'polynomial'):
TMAT, t_rel = _polynomial(t_in, **kwargs)
elif (FIT_TYPE.lower() == 'chebyshev'):
TMAT = _chebyshev(t_in, **kwargs)
elif (FIT_TYPE.lower() == 'spline'):
TMAT = _spline(t_in, **kwargs)
else:
raise ValueError(f'Fit type {FIT_TYPE} not recognized')
# append the time-variable design matrix
DMAT.extend(TMAT)
# surface design matrix
SMAT, centroid = _surface(x_in, y_in,
ORDER_SPACE=ORDER_SPACE, **kwargs)
DMAT.extend(SMAT)
# add additional terms to the design matrix
for t in TERMS:
DMAT.append(t)
# return the transpose of the design matrix and the centroid
return np.transpose(DMAT), centroid
[docs]
def _validate_design_matrix(DMAT):
"""
Validates the design matrix for the surface fit
Parameters
----------
DMAT: np.ndarray
Design matrix for the fit type
Returns
-------
DMAT: np.ndarray
Design matrix for the fit type
indices: np.ndarray
indices of valid columns in the design matrix
"""
# indices of valid columns in the design matrix
indices, = np.nonzero(np.any(DMAT != 0, axis=0))
# return the design matrix and the indices
return DMAT[:,indices], indices
[docs]
def _build_constraints(t_in, x_in, y_in, d_in, **kwargs):
"""
Builds the constraints for the surface fit
Parameters
----------
t_in: np.ndarray
input time array
x_in: np.ndarray
x-coordinate array
y_in: np.ndarray
y-coordinate array
d_in: np.ndarray
input data array
FIT_TYPE: str
type of time-variable polynomial fit to apply
- ``'polynomial'``
- ``'chebyshev'``
- ``'spline'``
ORDER_TIME: int
maximum polynomial order in time-variable fit
ORDER_SPACE: int
maximum polynomial order in spatial fit
KNOTS: list or np.ndarray
Sorted 1D array of knots for time-variable spline fit
TERMS: list
list of extra terms
INDICES: np.ndarray
indices of valid columns in the design matrix
kwargs: dict
keyword arguments for the fit type
Returns
-------
lb: np.ndarray
Lower bounds for the fit
ub: dict
Upper bounds for the fit
"""
# default keyword arguments
kwargs.setdefault('INDICES', Ellipsis)
kwargs.setdefault('TERMS', [])
# indices of valid columns in the design matrix
indices = kwargs['INDICES'].copy()
# total number of spatial and temporal terms
n_space = _spatial_terms(**kwargs)
n_time = _temporal_terms(**kwargs)
# total number of terms in fit
n_terms = n_space + n_time + len(kwargs['TERMS'])
# parameter bounds
lb = np.full((n_terms), -np.inf)
ub = np.full((n_terms), np.inf)
# minimum and maximum values for data and time
dmin = np.min(d_in)
dmax = np.max(d_in)
dsigma = np.std(d_in)
tmin = np.min(t_in)
tmax = np.max(t_in)
# bounds for surface
lb[0] = dmin - dsigma
ub[0] = dmax + dsigma
# time-variable constraints
FIT_TYPE = kwargs['FIT_TYPE'].lower()
if (FIT_TYPE == 'polynomial') and (n_time > 1):
lb[1] = (dmin - dmax - 2.0*dsigma)/(tmax - tmin)
ub[1] = (dmax - dmin + 2.0*dsigma)/(tmax - tmin)
elif (FIT_TYPE == 'chebyshev'):
pass
elif (FIT_TYPE == 'spline'):
# bounds for spline fit
for i in range(1, n_time):
lb[i] = dmin - dsigma
ub[i] = dmax + dsigma
else:
raise ValueError(f'Fit type {FIT_TYPE} not recognized')
# return the constraints
return (lb[indices], ub[indices])
[docs]
def _temporal_terms(**kwargs):
"""
Calculates the number of temporal terms for a given fit
Parameters
----------
FIT_TYPE: str
type of time-variable polynomial fit to apply
- ``'polynomial'``
- ``'chebyshev'``
- ``'spline'``
ORDER_TIME: int
maximum polynomial order in time-variable fit
KNOTS: list or np.ndarray
Sorted 1D array of knots for time-variable spline fit
Returns
-------
n_time: int
Number of time-variable terms in fit
"""
# calculate the number of temporal terms for a given fit
if kwargs['FIT_TYPE'] in ('spline', ):
n_time = len(kwargs['KNOTS']) - 2
else:
n_time = (kwargs['ORDER_TIME'] + 1)
# return the number of temporal terms for fit
return n_time
[docs]
def _spatial_terms(**kwargs):
"""
Calculates the number of spatial terms for a given fit
Parameters
----------
ORDER_SPACE: int
maximum polynomial order in spatial fit
Returns
-------
n_space: int
Number of spatial terms in fit
"""
n_space = np.sum(np.arange(2, kwargs['ORDER_SPACE'] + 2))
# return the number of temporal terms for fit
return n_space
[docs]
def _polynomial(t_in, RELATIVE=Ellipsis, ORDER_TIME=3, **kwargs):
"""
Create a polynomial design matrix for a time-series
Parameters
----------
t_in: np.ndarray
input time array
RELATIVE: int or np.ndarray
relative period
ORDER_TIME: int
maximum polynomial order in time-variable fit
Returns
-------
TMAT: list
time-variable design matrix based on polynomial order
t_rel: float
relative time
"""
# calculate epoch for calculating relative times
if isinstance(RELATIVE, (list, np.ndarray)):
t_rel = np.mean(RELATIVE)
elif isinstance(RELATIVE, (float, int, np.float64, np.int_)):
t_rel = np.copy(RELATIVE)
elif RELATIVE in (Ellipsis, None):
t_rel = t_in[RELATIVE].mean()
# time-variable design matrix based on polynomial order
TMAT = []
# add polynomial orders (0=constant, 1=linear, 2=quadratic, etc)
for o in range(ORDER_TIME+1):
TMAT.append((t_in-t_rel)**o)
# return the design matrix and the relative time
return (TMAT, t_rel)
[docs]
def _chebyshev(t_in, RELATIVE=None, ORDER_TIME=3, **kwargs):
"""
Create a Chebyshev design matrix for a time-series
Parameters
----------
t_in: np.ndarray
input time array
RELATIVE: list or np.ndarray
relative period
ORDER_TIME: int
maximum polynomial order in time-variable fit
Returns
-------
TMAT: list
time-variable design matrix based on polynomial order
"""
# scale time-series to be [-1,1]
# using either max and min of time-series or relative dates
if RELATIVE is None:
tmin = np.nanmin(t_in)
tmax = np.nanmax(t_in)
t_norm = ((t_in - tmin) - (tmax - t_in))/(tmax - tmin)
elif isinstance(RELATIVE, (list, np.ndarray)):
t_norm = ((t_in - RELATIVE[0]) - (RELATIVE[1]-t_in)) / \
(RELATIVE[1] - RELATIVE[0])
# time-variable design matrix based on polynomial order
TMAT = []
TMAT.append(np.ones_like(t_in))
TMAT.append(t_norm)
for o in range(2, ORDER_TIME+1):
TMAT.append(2.0*t_norm*TMAT[o-1] - TMAT[o-2])
# return the design matrix
return TMAT
def _spline(t_in, KNOTS=[], ORDER_TIME=3, **kwargs):
"""
Create a B-spline design matrix for a time-series
Parameters
----------
t_in: np.ndarray
input time array
KNOTS: list or np.ndarray
Sorted 1D array of knots
ORDER_TIME: int
B-spline degree
Returns
-------
TMAT: list
time-variable design matrix based on polynomial order
"""
# transpose the design matrix and add constant term
TMAT = BSpline.design_matrix(t_in, KNOTS, ORDER_TIME,
extrapolate=True).transpose().toarray()
TMAT[0,:] = np.ones_like(t_in)
# return the design matrix as a list
return TMAT.tolist()
[docs]
def _surface(x_in, y_in, ORDER_SPACE=3, **kwargs):
"""
Create a surface design matrix for fit
Parameters
----------
x_in: np.ndarray
x-coordinate array
y_in: np.ndarray
y-coordinate array
ORDER_SPACE: int
maximum polynomial order in spatial fit
kwargs: dict
keyword arguments for the fit type
Returns
-------
SMAT: list
surface design matrix
centroid: dict
centroid point of input coordinates
"""
# calculate centroid point
kwargs.setdefault('CX', np.mean(x_in))
kwargs.setdefault('CY', np.mean(y_in))
# calculate x and y relative to centroid point
centroid = dict(x=kwargs['CX'], y=kwargs['CY'])
rel_x = x_in - centroid['x']
rel_y = y_in - centroid['y']
# surface design matrix
SMAT = []
for o in range(1,ORDER_SPACE+1):
for p in range(o+1):
SMAT.append(rel_x**(o-p) * rel_y**p)
# return the design matrix and the centroid
return (SMAT, centroid)
# PURPOSE: create physical elastic bending model with a mean height change
[docs]
def _elastic(x, GZ, A, E, T, dH):
"""
Physical elastic bending model with a mean height change
Parameters
----------
x: np.ndarray
x-coordinate array
GZ: float
grounding line location
A: float
tidal amplitude
E: float
effective elastic modulus of ice
T: float
ice thickness of ice shelf
dH: float
mean height change
Returns
-------
model: np.ndarray
modeled surface from the elastic bending model
"""
# density of water [kg/m^3]
rho_w = 1030.0
# gravitational constant [m/s^2]
g = 9.806
# Poisson's ratio of ice
nu = 0.3
# structural rigidity of ice
D = (E*T**3)/(12.0*(1.0-nu**2))
# beta elastic damping parameter
b = (0.25*rho_w*g/D)**0.25
# distance of points from grounding line (R0 = 0 at grounding line)
R0 = (x[x >= GZ] - GZ)
# deflection of ice beyond the grounding line (elastic)
eta = np.zeros_like(x)
eta[x >= GZ] = A*(1.0-np.exp(-b*R0)*(np.cos(b*R0) + np.sin(b*R0)))
# model = large scale height change + tidal deflection
model = (dH + eta)
return model
# PURPOSE: calculate the confidence interval in the retrieval
[docs]
def _confidence_interval(x, f, p):
"""
Calculate the confidence interval
Parameters
----------
x: np.ndarray
input x-coordinate array
f: np.ndarray
input probability distribution
p: float
confidence interval
"""
# sorting probability distribution from smallest probability to largest
ii = np.argsort(f)
# compute the sorted cumulative probability distribution
cdf = np.cumsum(f[ii])
# linearly interpolate to confidence interval
J = np.interp(p, cdf, x[ii])
# position with maximum probability
K = x[ii[-1]]
return np.abs(K-J)