We can modify the procedure for fitting data from here : Fitting a Line
import numpy
import matplotlib.pyplot as plt
from scipy.optimize import fmin
#==============================================
## Generating data to fit
n = 20
xmin = 1
xmax = 11
a=2
b=4
funct=lambda x,a,b:a*x+b
x = linspace(xmin,xmax,4)
y = funct(x,a,b)
#===============================================
#scores fit of functions for a set of parameters
def score((parms)):
#1 pull off parameters from optimizer input
A,B=parms
#2 data you are matching to
funct=lambda x,a,b:a*x+b
x = linspace(1,11,20)
y = funct(x,2,4)
#3 write function to fit data
my_guess_function=lambda x,A,B:A*x+B
#4 write cost function to see how well it fits
ss=lambda x,y,A,B:((my_guess_function(x,A,B)-y)**2).sum()
#5 return the fit score for this try
return ss(x,y,A,B)
#find the parameters that give us the lowest score
answ=fmin(score,(3,1),full_output=1,maxiter=1000)
#Create function from lowest scoring parameters
function=lambda x,A,B:A*x+B
A=answ[0][0]
B=answ[0][1]
X1 = linspace(xmin,xmax,n*5)
Y1= funct(X1,A,B)
#===============================================
#plotting the data and lowest scoring (aka best)fit
plt.plot(x,y,'ro',X1,Y1,'-')
plt.legend(('data', 'the lowest scoring fit'),'upper center')
graphname='optimized fit'
plt.title(graphname)
plt.xlabel('time')
plt.show()
#===============================================
1) Figure out what the distribution of your data is at each timepoint:
Lets say we know that each of those red dots has a normal distribution andwe have the standard deviation for each sample
Python lets you create distributions and randomly sample from them
using the random module.
Here is an example: let's say we have a mean of 5 and a standard deviation of 2
for a timepoint and we know our population is normally distributed.
Using this knowledge, we can resample from the hypothetical population 1000
times... see below.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import random
dist=[]
for i in range(1,1000):
v=random.normalvariate(5.0, 2.0)
print v
dist.append(v)
x=np.array(dist)
#Create Histogram of Sampled Data
#==============================================================
fig = plt.figure()
ax = fig.add_subplot(111)
binnum=20 #number of bins you want to sort the data into
n, bins, patches = ax.hist(x,binnum, normed=1, facecolor='purple', alpha=0.75)
ax.set_xlabel('x values')
ax.set_ylabel('Probability')
ax.set_title('Histogram')
ax.set_xlim(0,11)
ax.set_ylim(0,1)
ax.grid(True)
plt.show()
#==========================================================================
This creates a resampled population normally distributed around 5.
2) Create Pseudo-Data Sets from Distributions
Now that we know what the distribution looks like for each point we can
randomly resample each data point and create new pseudodata sets that
could represent the larger population.
I have created 9 such pseudo-data sets for the line fit using the code below:
import numpy
import matplotlib.pyplot as plt
from scipy.optimize import fmin
#==============================================
## Generating data to fit
n = 20
xmin = 1
xmax = 11
a=2
b=4
funct=lambda x,a,b:a*x+b
x = linspace(xmin,xmax,4)
y = funct(x,a,b)
#===============================================
#create resampled data sets
#====================================================================
#create vector that will hold shape parameters for each distribution
#(in this case our shape parameter is the standard deviation)
sp=[2,4,5,3]
rs=[] #resample matrix
for i in range(1,10):
test=[random.normalvariate(y[i],sp[i]) for i in range(0,len(y))]
rs.append(test)
for i in rs:
print i
#=====================================================================
>>>
[4.7843939442434049, 16.335795443248493, 23.923415020827772, 28.926647319876341]
[1.4550122401530485, 11.938249400807138, 18.58514566588746, 28.753592173175928]
[8.4116306190572594, 13.739921412912709, 15.298217154055491, 25.072461269093058]
[10.804490320068146, 11.70660584443795, 9.3519437241967207, 22.426218200316015]
[8.1523361887669168, 15.342873727826307, 17.955868012168718, 27.418223311717338]
[4.6100643262417726, 14.725450585717125, 12.801632690166691, 27.866769533245787]
[8.1626813397096498, 11.975600098300783, 21.663261724594363, 28.361739176867761]
[7.774551346614186, 11.701822045751257, 17.199540728133663, 26.062178459008841]
[4.7840722792105623, 18.40165646924935, 25.292649340312746, 20.674055879381079]
>>>
3) Fit Pseudo-Data
import numpy
import matplotlib.pyplot as plt
from scipy.optimize import fmin
## Generating data to fit
#===================================================================
n = 20
xmin = 1
xmax = 11
a=2
b=4
funct=lambda x,a,b:a*x+b
x = linspace(xmin,xmax,4)
y = funct(x,a,b)
##Create resampled data sets
#====================================================================
#create vector that will hold shape parameters for each distribution
#(in this case our shape parameter is the standard deviation)
sp=[2,4,5,3]
rs=[] #resample matrix
for i in range(1,10):
test=[random.normalvariate(y[i],sp[i]) for i in range(0,len(y))]
rs.append(test)
## Scoring Function
#=====================================================================
def score((parms)):
#1 pull off parameters from optimizer input
A,B=parms
#2 data you are matching to
funct=lambda x,a,b:a*x+b
#3 write function to fit data
my_guess_function=lambda x,A,B:A*x+B
#4 write cost function to see how well it fits
ss=lambda x,y,A,B:((my_guess_function(x,A,B)-y)**2).sum()
#5 return the fit score for this try
return ss(x,y,A,B)
## Fit Pseudo Data
#====================================================================
#find the parameters that give us the lowest score for each set of
#pseudo-data
FitMat=[]
for ps in rs:
y=ps
answ=fmin(score,(3,1),full_output=1,maxiter=1000)
print answ
#Create function from lowest scoring parameters
function=lambda x,A,B:A*x+B
A=answ[0][0]
B=answ[0][1]
X1 = linspace(xmin,xmax,n*5)
Y1= funct(X1,A,B)
FitMat.append(Y1)
##Plot Pseudo Data and Fits
#===================================================================
#plotting the data and lowest scoring (aka best)fit
for i in range(0,len(rs)):
plt.plot(x,rs[i],'o',X1,FitMat[i],'-')
plt.legend(('data', 'the lowest scoring fit'),'upper center')
graphname='optimized fit'
plt.title(graphname)
plt.xlabel('time')
plt.show()
4) Examine the variation in the Parameter fits on the Pseudo-data
import numpy
import matplotlib.pyplot as plt
from scipy.optimize import fmin
## Generating data to fit
#===================================================================
n = 20
xmin = 1
xmax = 11
a=2
b=4
funct=lambda x,a,b:a*x+b
x = linspace(xmin,xmax,4)
y = funct(x,a,b)
##Create resampled data sets
#====================================================================
#create vector that will hold shape parameters for each distribution
#(in this case our shape parameter is the standard deviation)
sp=[2,4,5,3]
rs=[] #resample matrix
for i in range(1,10):
test=[random.normalvariate(y[i],sp[i]) for i in range(0,len(y))]
rs.append(test)
## Scoring Function
#=====================================================================
def score((parms)):
#1 pull off parameters from optimizer input
A,B=parms
#2 data you are matching to
funct=lambda x,a,b:a*x+b
#3 write function to fit data
my_guess_function=lambda x,A,B:A*x+B
#4 write cost function to see how well it fits
ss=lambda x,y,A,B:((my_guess_function(x,A,B)-y)**2).sum()
#5 return the fit score for this try
return ss(x,y,A,B)
## Fit Pseudo Data
#====================================================================
#find the parameters that give us the lowest score for each set of
#pseudo-data
FitMat=[]
parmA=[]
parmB=[]
for ps in rs:
y=ps
answ=fmin(score,(3,1),full_output=1,maxiter=1000)
print answ
#Create function from lowest scoring parameters
function=lambda x,A,B:A*x+B
A=answ[0][0]
B=answ[0][1]
X1 = linspace(xmin,xmax,n*5)
Y1= funct(X1,A,B)
FitMat.append(Y1)
parmA.append(A)
parmB.append(B)
##Figure out Stats on Parm
#==================================================================
print "slope is on average " + str(np.average(parmA))
print "with a standard deviation of " +str(np.std(parmA))
'''
##Plot Pseudo Data and Fits
#===================================================================
#plotting the data and lowest scoring (aka best)fit
for i in range(0,len(rs)):
plt.plot(x,rs[i],'o',X1,FitMat[i],'-')
plt.legend(('data', 'the lowest scoring fit'),'upper center')
graphname='optimized fit'
plt.title(graphname)
plt.xlabel('time')
plt.show()
'''
>>>
slope is on average 1.92806743556
with a standard deviation of 0.269858064123
>>>



No comments:
Post a Comment