# This version is for Python, tested with Python 2.7.10 and 3.6.0 # It requires scipy and its dependencies # based on example from nbviewer.ipython.org/gist/cdeil/5030045 on 4 March 2015 import matplotlib import matplotlib.pyplot as plt import pylab import numpy as np from scipy.optimize import curve_fit from scipy.stats import chisqprob # can run with # python runtestfit.py # or within the python shell # import runtestfit # example data xdata = np.array( [1.0, 2.0, 3.0, 5.0, 7.0, 10.0] ) ydata = np.array( [1.64, 3.22, 3.87, 6.85, 10.96, 14.37] ) sigma = np.array( [0.20, 0.40, 0.50, 0.60, 0.70, 0.80] ) # Alternate, less precise data gives larger uncertainties on slope #ydata = np.array( [1.43, 2.10, 3.20, 6.43, 12.33, 12.75] ) #sigma = np.array( [0.40, 0.80, 1.00, 1.20, 1.40, 1.60] ) print('fit coefficients from polyfit unweighted') p = np.polyfit(xdata,ydata,1) print(p) print('The above is the incorrect fit from an unweighted fit to data') print('If you tweak the function to return an error, it would be wrong too.') print(' ') print('Now start a correct weighted fit, printing out stuff as we go') # define an arbitrary function. Here its a slope+intercept linear function. def modelfun(x,slope,intercept): """Linear function (two fit parameters)""" return slope * x + intercept # Here's what curve_fit gives by default, should be same as polyfit. popt, pcov = curve_fit(modelfun, xdata, ydata, p0=[2,0]) # suppress it for now, but draw it later, just for kicks. # Here is the weighted one we want. One syntax is to give sigmas. p0 = np.array( [2.0,0.0] ) # start guess for the fit wpopt, wpcov = curve_fit(modelfun, xdata, ydata, p0, sigma=sigma) # note, wants us to give sigma (like from a Gaussian error estimate) directly # other statistics packages want weight = 1.0 / (sigma)^2 wperr = np.sqrt(np.diag(wpcov)) #print('slope: {0:.5f} +- {1:.5f}'.format(popt[0], perr[0])) #print('intercept: {0:.5f} +- {1:.5f}'.format(popt[1], perr[1])) print(' ') print('The correct fit but incorrect error directly from covariance matrix') print('Doesnt yet know we had assigned uncertainties to the data points') print('popt best fit parameters:') print(wpopt) print('perr parameter errors unscaled:') print(wpopt) print(' ') print('Here is the unscaled covariance matrix') print('It wont change if you change the sigmas') print('You can verify the errors above are just sqrt(diagonal elements)') print('wpcov:') print(wpcov) # But the above is not what a physicist wants. We have to do one more step. # The above best fit is right, but the uncertainties are not right. # you can figure that out by artificially making sigma smaller # and contrary to what you expect, the parameter uncertainties don't shrink. # curve_fit is thinking of sigma as weights (relative size is important) # and not as error bars (absolute size is also important). # this will scale everything to the right absolute size. chi = (ydata - modelfun(xdata, *wpopt)) / sigma # compute chi for every data point by hand chi2 = (chi **2).sum() #add them up. dof = len(ydata) - len(wpopt) #degrees of freedom. # print 'dof: ', dof perr_scale_factor = 1.0 / np.sqrt((chi2/dof)) print(' ') print('The correct fit and approximate errors from diagonals of covariance matrix') print('Scaled the covariance matrix by this chisquare/dof based on our sigmas ') print('best fit slope and intercept:') print(wpopt) print('correct uncertainties on the parameters:') print(perr_scale_factor * wperr) print('scale factor', perr_scale_factor) print("weighted chisquare ", chi2, " for dof ", dof, " probability ", chisqprob(chi2,dof)) # now make a pretty picture. # set up some formatting things first pylab.rc('font', family='sans', size=16) # make a plot from the data, with some formatting and titles. plt.figure(facecolor="white") plt.errorbar(xdata, ydata, yerr=sigma, fmt='o', linewidth = 2.0, color="black") plt.axis([0,12,0,18]) plt.title("illustration of a weighted fit") plt.ylabel("distance (micrometers)", fontsize=16) # the fontsize here is gratuitious plt.xlabel("time (milliseconds)", fontsize=16) # make a plot from the best fit function, and also the wrong fit. plt.plot(xdata,modelfun(xdata,*popt),'-',linewidth = 2.0, color="gray") plt.plot(xdata,modelfun(xdata,*wpopt),'-',linewidth = 2.0, color="blue") plt.show() # would be cool to draw parameters with higher and lower values. # there is still a funny thing here. The two fit parameters are correlated # some uses of fits to data need to incorporate that information too. # raw_input()