We can modify the procedure for fitting data from here : Fitting a Line
import numpy import matplotlib.pyplot as plt from scipy.optimize import fmin #============================================== ## Generating data to fit n = 20 xmin = 1 xmax = 11 a=2 b=4 funct=lambda x,a,b:a*x+b x = linspace(xmin,xmax,4) y = funct(x,a,b) #=============================================== #scores fit of functions for a set of parameters def score((parms)): #1 pull off parameters from optimizer input A,B=parms #2 data you are matching to funct=lambda x,a,b:a*x+b x = linspace(1,11,20) y = funct(x,2,4) #3 write function to fit data my_guess_function=lambda x,A,B:A*x+B #4 write cost function to see how well it fits ss=lambda x,y,A,B:((my_guess_function(x,A,B)-y)**2).sum() #5 return the fit score for this try return ss(x,y,A,B) #find the parameters that give us the lowest score answ=fmin(score,(3,1),full_output=1,maxiter=1000) #Create function from lowest scoring parameters function=lambda x,A,B:A*x+B A=answ[0][0] B=answ[0][1] X1 = linspace(xmin,xmax,n*5) Y1= funct(X1,A,B) #=============================================== #plotting the data and lowest scoring (aka best)fit plt.plot(x,y,'ro',X1,Y1,'-') plt.legend(('data', 'the lowest scoring fit'),'upper center') graphname='optimized fit' plt.title(graphname) plt.xlabel('time') plt.show() #===============================================
1) Figure out what the distribution of your data is at each timepoint:
Lets say we know that each of those red dots has a normal distribution andwe have the standard deviation for each sample
Python lets you create distributions and randomly sample from them
using the random module.
Here is an example: let's say we have a mean of 5 and a standard deviation of 2
for a timepoint and we know our population is normally distributed.
Using this knowledge, we can resample from the hypothetical population 1000
times... see below.
import numpy as np import matplotlib.pyplot as plt import matplotlib.mlab as mlab import random dist=[] for i in range(1,1000): v=random.normalvariate(5.0, 2.0) print v dist.append(v) x=np.array(dist) #Create Histogram of Sampled Data #============================================================== fig = plt.figure() ax = fig.add_subplot(111) binnum=20 #number of bins you want to sort the data into n, bins, patches = ax.hist(x,binnum, normed=1, facecolor='purple', alpha=0.75) ax.set_xlabel('x values') ax.set_ylabel('Probability') ax.set_title('Histogram') ax.set_xlim(0,11) ax.set_ylim(0,1) ax.grid(True) plt.show() #==========================================================================
This creates a resampled population normally distributed around 5.
2) Create Pseudo-Data Sets from Distributions
Now that we know what the distribution looks like for each point we can
randomly resample each data point and create new pseudodata sets that
could represent the larger population.
I have created 9 such pseudo-data sets for the line fit using the code below:
import numpy import matplotlib.pyplot as plt from scipy.optimize import fmin #============================================== ## Generating data to fit n = 20 xmin = 1 xmax = 11 a=2 b=4 funct=lambda x,a,b:a*x+b x = linspace(xmin,xmax,4) y = funct(x,a,b) #=============================================== #create resampled data sets #==================================================================== #create vector that will hold shape parameters for each distribution #(in this case our shape parameter is the standard deviation) sp=[2,4,5,3] rs=[] #resample matrix for i in range(1,10): test=[random.normalvariate(y[i],sp[i]) for i in range(0,len(y))] rs.append(test) for i in rs: print i #===================================================================== >>> [4.7843939442434049, 16.335795443248493, 23.923415020827772, 28.926647319876341] [1.4550122401530485, 11.938249400807138, 18.58514566588746, 28.753592173175928] [8.4116306190572594, 13.739921412912709, 15.298217154055491, 25.072461269093058] [10.804490320068146, 11.70660584443795, 9.3519437241967207, 22.426218200316015] [8.1523361887669168, 15.342873727826307, 17.955868012168718, 27.418223311717338] [4.6100643262417726, 14.725450585717125, 12.801632690166691, 27.866769533245787] [8.1626813397096498, 11.975600098300783, 21.663261724594363, 28.361739176867761] [7.774551346614186, 11.701822045751257, 17.199540728133663, 26.062178459008841] [4.7840722792105623, 18.40165646924935, 25.292649340312746, 20.674055879381079] >>>
3) Fit Pseudo-Data
import numpy import matplotlib.pyplot as plt from scipy.optimize import fmin ## Generating data to fit #=================================================================== n = 20 xmin = 1 xmax = 11 a=2 b=4 funct=lambda x,a,b:a*x+b x = linspace(xmin,xmax,4) y = funct(x,a,b) ##Create resampled data sets #==================================================================== #create vector that will hold shape parameters for each distribution #(in this case our shape parameter is the standard deviation) sp=[2,4,5,3] rs=[] #resample matrix for i in range(1,10): test=[random.normalvariate(y[i],sp[i]) for i in range(0,len(y))] rs.append(test) ## Scoring Function #===================================================================== def score((parms)): #1 pull off parameters from optimizer input A,B=parms #2 data you are matching to funct=lambda x,a,b:a*x+b #3 write function to fit data my_guess_function=lambda x,A,B:A*x+B #4 write cost function to see how well it fits ss=lambda x,y,A,B:((my_guess_function(x,A,B)-y)**2).sum() #5 return the fit score for this try return ss(x,y,A,B) ## Fit Pseudo Data #==================================================================== #find the parameters that give us the lowest score for each set of #pseudo-data FitMat=[] for ps in rs: y=ps answ=fmin(score,(3,1),full_output=1,maxiter=1000) print answ #Create function from lowest scoring parameters function=lambda x,A,B:A*x+B A=answ[0][0] B=answ[0][1] X1 = linspace(xmin,xmax,n*5) Y1= funct(X1,A,B) FitMat.append(Y1) ##Plot Pseudo Data and Fits #=================================================================== #plotting the data and lowest scoring (aka best)fit for i in range(0,len(rs)): plt.plot(x,rs[i],'o',X1,FitMat[i],'-') plt.legend(('data', 'the lowest scoring fit'),'upper center') graphname='optimized fit' plt.title(graphname) plt.xlabel('time') plt.show()
4) Examine the variation in the Parameter fits on the Pseudo-data
import numpy import matplotlib.pyplot as plt from scipy.optimize import fmin ## Generating data to fit #=================================================================== n = 20 xmin = 1 xmax = 11 a=2 b=4 funct=lambda x,a,b:a*x+b x = linspace(xmin,xmax,4) y = funct(x,a,b) ##Create resampled data sets #==================================================================== #create vector that will hold shape parameters for each distribution #(in this case our shape parameter is the standard deviation) sp=[2,4,5,3] rs=[] #resample matrix for i in range(1,10): test=[random.normalvariate(y[i],sp[i]) for i in range(0,len(y))] rs.append(test) ## Scoring Function #===================================================================== def score((parms)): #1 pull off parameters from optimizer input A,B=parms #2 data you are matching to funct=lambda x,a,b:a*x+b #3 write function to fit data my_guess_function=lambda x,A,B:A*x+B #4 write cost function to see how well it fits ss=lambda x,y,A,B:((my_guess_function(x,A,B)-y)**2).sum() #5 return the fit score for this try return ss(x,y,A,B) ## Fit Pseudo Data #==================================================================== #find the parameters that give us the lowest score for each set of #pseudo-data FitMat=[] parmA=[] parmB=[] for ps in rs: y=ps answ=fmin(score,(3,1),full_output=1,maxiter=1000) print answ #Create function from lowest scoring parameters function=lambda x,A,B:A*x+B A=answ[0][0] B=answ[0][1] X1 = linspace(xmin,xmax,n*5) Y1= funct(X1,A,B) FitMat.append(Y1) parmA.append(A) parmB.append(B) ##Figure out Stats on Parm #================================================================== print "slope is on average " + str(np.average(parmA)) print "with a standard deviation of " +str(np.std(parmA)) ''' ##Plot Pseudo Data and Fits #=================================================================== #plotting the data and lowest scoring (aka best)fit for i in range(0,len(rs)): plt.plot(x,rs[i],'o',X1,FitMat[i],'-') plt.legend(('data', 'the lowest scoring fit'),'upper center') graphname='optimized fit' plt.title(graphname) plt.xlabel('time') plt.show() ''' >>> slope is on average 1.92806743556 with a standard deviation of 0.269858064123 >>>
No comments:
Post a Comment