# This version is for Python, tested with Python 2.7.10 and 3.6.0
# It requires scipy and its dependencies
# based on example from nbviewer.ipython.org/gist/cdeil/5030045 on 4 March 2015

import matplotlib
import matplotlib.pyplot as plt
import pylab
import numpy as np
from scipy.optimize import curve_fit
from scipy.stats import chisqprob

# can run with
# python runtestfit.py
# or within the python shell
# import runtestfit

# example data
xdata = np.array( [1.0, 2.0, 3.0, 5.0, 7.0, 10.0] )
ydata = np.array( [1.64, 3.22, 3.87, 6.85, 10.96, 14.37] )
sigma = np.array( [0.20, 0.40, 0.50, 0.60, 0.70, 0.80] )

# Alternate, less precise data gives larger uncertainties on slope
#ydata = np.array( [1.43, 2.10, 3.20, 6.43, 12.33, 12.75] )
#sigma = np.array( [0.40, 0.80, 1.00, 1.20, 1.40, 1.60] )

print('fit coefficients from polyfit unweighted')
p = np.polyfit(xdata,ydata,1)

print(p)

print('The above is the incorrect fit from an unweighted fit to data')
print('If you tweak the function to return an error, it would be wrong too.')
print(' ')

print('Now start a correct weighted fit, printing out stuff as we go')

# define an arbitrary function.  Here its a slope+intercept linear function.

def modelfun(x,slope,intercept):
    """Linear function (two fit parameters)"""
    return slope * x + intercept

# Here's what curve_fit gives by default, should be same as polyfit.
popt, pcov = curve_fit(modelfun, xdata, ydata, p0=[2,0])
# suppress it for now, but draw it later, just for kicks.
# Here is the weighted one we want.  One syntax is to give sigmas.

p0 = np.array( [2.0,0.0] )  # start guess for the fit
wpopt, wpcov = curve_fit(modelfun, xdata, ydata, p0, sigma=sigma)
# note, wants us to give sigma (like from a Gaussian error estimate) directly
# other statistics packages want weight = 1.0 / (sigma)^2


wperr = np.sqrt(np.diag(wpcov))
#print('slope:     {0:.5f} +- {1:.5f}'.format(popt[0], perr[0]))
#print('intercept: {0:.5f} +- {1:.5f}'.format(popt[1], perr[1]))

print(' ')
print('The correct fit but incorrect error directly from covariance matrix')
print('Doesnt yet know we had assigned uncertainties to the data points')
print('popt best fit parameters:')
print(wpopt)
print('perr parameter errors unscaled:')
print(wpopt)

print(' ')
print('Here is the unscaled covariance matrix')
print('It wont change if you change the sigmas')
print('You can verify the errors above are just sqrt(diagonal elements)')
print('wpcov:')
print(wpcov)

# But the above is not what a physicist wants.  We have to do one more step.
# The above best fit is right, but the uncertainties are not right.
# you can figure that out by artificially making sigma smaller
# and contrary to what you expect, the parameter uncertainties don't shrink.
# curve_fit is thinking of sigma as weights (relative size is important)
# and not as error bars (absolute size is also important).
# this will scale everything to the right absolute size.
chi = (ydata - modelfun(xdata, *wpopt)) / sigma  # compute chi for every data point by hand
chi2 = (chi **2).sum()  #add them up.
dof = len(ydata) - len(wpopt)  #degrees of freedom.
# print 'dof: ', dof
 
perr_scale_factor = 1.0 / np.sqrt((chi2/dof))

print(' ')
print('The correct fit and approximate errors from diagonals of covariance matrix')
print('Scaled the covariance matrix by this chisquare/dof based on our sigmas ')
print('best fit slope and intercept:')
print(wpopt)
print('correct uncertainties on the parameters:')
print(perr_scale_factor * wperr)
print('scale factor', perr_scale_factor)
print("weighted chisquare ", chi2, " for dof ", dof, " probability ", chisqprob(chi2,dof))

# now make a pretty picture.

# set up some formatting things first
pylab.rc('font', family='sans', size=16)

# make a plot from the data, with some formatting and titles.
plt.figure(facecolor="white")
plt.errorbar(xdata, ydata, yerr=sigma, fmt='o', linewidth = 2.0, color="black")
plt.axis([0,12,0,18])
plt.title("illustration of a weighted fit")
plt.ylabel("distance (micrometers)", fontsize=16)  # the fontsize here is gratuitious
plt.xlabel("time (milliseconds)", fontsize=16)

# make a plot from the best fit function, and also the wrong fit.
plt.plot(xdata,modelfun(xdata,*popt),'-',linewidth = 2.0, color="gray")
plt.plot(xdata,modelfun(xdata,*wpopt),'-',linewidth = 2.0, color="blue")

plt.show()


# would be cool to draw parameters with higher and lower values.
# there is still a funny thing here.  The two fit parameters are correlated
# some uses of fits to data need to incorporate that information too.

# raw_input()