2016-12-28 330 views
0

我有一个240个月的最大潮汐阵列,我试图将GEV曲线拟合为该数据用于返回期间计算。然而,拟合的GEV曲线并不像输入到GEV函数的潮汐直方图。Python scipy GEV拟合不匹配分布

import numpy as np 
import matplotlib.pyplot as plt 
from scipy.stats import genextreme as gev 

tides = np.array([204.25, 184.87, 164.15, 158.54, 194.47, 206.31, 212.04, 
209.24, 186.28, 176.27, 181.72, 199.49, 205.97, 198.42, 187.2, 170.42, 188.22, 
193.66, 206.12, 204.03, 187.64, 188.66, 190.92, 191.3, 196.25, 191.91, 166.42, 
188.73, 192.57, 199.81, 193.57, 193.28, 198.45, 192.17, 200.9, 212.57, 205.65, 
188.84, 175.5, 180.52, 199.2, 202.07, 209.27, 202.07, 187.95, 199.11, 206.81, 
235.44, 204.04, 195.15, 173.85, 163.17, 191.7, 201.87, 212.38, 207.92, 171.61, 
186.32, 201.58, 222.89, 206.96, 200.68, 178.82, 183.91, 198.82, 209.23, 
224.03, 230.06, 199.87, 201.07, 205.59, 211.58, 210.78, 205.9, 182.66, 199.49, 
195.04, 196.12, 197.82, 203.91, 188.28, 196.81, 200.88, 201.25, 212.27, 
178.33, 173.86, 185.71, 191.83, 202.56, 195.54, 189.08, 184.48, 199.92, 
206.66, 198.95, 188.12, 176.24, 161.95, 172.67, 196.1, 207.34, 208.96, 209.65, 
178.95, 188.49, 211.91, 218.64, 201.82, 193.37, 170.33, 185.98, 201.05, 
212.28, 213.93, 204.78, 195.17, 196.68, 210.0, 211.09, 208.75, 191.5, 201.17, 
190.19, 195.78, 197.68, 209.58, 205.62, 190.79, 198.04, 206.89, 210.84, 
202.58, 180.44, 178.58, 191.25, 209.43, 205.74, 194.24, 192.74, 193.11, 
209.92, 214.03, 220.04, 187.46, 191.46, 161.37, 180.56, 192.58, 205.59, 208.1, 
192.8, 180.27, 195.74, 201.17, 209.86, 201.87, 179.38, 167.11, 179.99, 208.07, 
212.23, 205.14, 201.21, 180.63, 176.36, 190.89, 206.73, 205.34, 188.07, 
169.57, 176.18, 191.82, 194.07, 205.99, 204.98, 200.29, 190.52, 189.14, 
194.65, 188.97, 198.19, 178.03, 182.65, 194.29, 196.0, 193.19, 194.43, 179.63, 
197.73, 204.24, 199.32, 209.48, 204.62, 193.44, 181.99, 196.02, 204.84, 209.4, 
194.12, 175.39, 194.88, 208.65, 205.94, 197.69, 184.47, 172.59, 183.86, 
199.14, 213.82, 206.46, 194.48, 175.3, 176.1, 194.91, 208.59, 209.01, 190.92, 
191.17, 175.59, 195.32, 206.8, 217.82, 212.64, 195.08, 180.13, 190.87, 203.0, 
196.91, 189.42, 170.31, 170.07, 181.7, 187.96, 194.01, 207.64, 194.11, 192.11, 
202.95, 197.85]) 

gev_fit = gev.fit(tides) 

x = np.linspace(np.min(tides)-10,np.max(tides)+10,1000) 
gev_pdf = gev.pdf(x, gev_fit[0], gev_fit[1], gev_fit[2]) 

plt.subplot(1,2,1) 
plt.hist(tides, normed=True, alpha=0.2, label='Data') 
plt.xlabel('Tides (cm)') 

plt.subplot(1,2,2) 
plt.hist(tides, normed=True, alpha=0.2, label='Data') 
plt.plot(x, gev_pdf, 'r--', label='GEV Fit') 
plt.xlabel('Tides (cm)') 
plt.legend(loc='upper left') 
plt.show() 

fit image

正如你所看到的,GEV配合完全不匹配原始数据的分布。你有任何改善健康的建议吗?

+0

我复制并粘贴了代码,然后只运行它,直到'gev_fit = gev.fit(tides)',因此它发出了这个诊断:'C:\ Python34 \ lib \ site-packages \ scipy \ stats \ _distn_infrastructure .py:1608:RuntimeWarning:在日志中遇到零除以 return log(self._pdf(x,* args))''。我一直希望能找到一些ML估计的特点。 –

+0

@BillBell感谢您的评论,你有什么想法为什么提出这个警告或如何解决它? – N1B4

+0

我的怀疑是,这是一个众所周知的MLE应用程序失败的例子(请参阅https://en.wikipedia.org/wiki/Maximum_spacing_estimation)。我正在寻找一种MPS算法(这是免费的!),目前为止没有取得太大的成功。 –

回答

1

我承认:经过几个小时的汗流effort背,这就是我所拥有的。这是一个粗略的示例最大间距估计似乎确实收敛到合理的估计向量。

import numpy as np 
import matplotlib.pyplot as plt 
from scipy.stats import genextreme 
from scipy.optimize import minimize 
from collections import Counter 

tides = [204.25, 184.87, 164.15, 158.54, 194.47, 206.31, 212.04, 209.24, 186.28, 176.27, 181.72, 199.50, 205.97, 198.42, 187.2, 170.42, 188.22, 193.66, 206.12, 204.03, 187.64, 188.66, 190.92, 191.3, 196.25, 191.91, 166.42, 188.73, 192.57, 199.81, 193.57, 193.28, 198.45, 192.17, 200.9, 212.57, 205.65, 188.84, 175.5, 180.52, 199.2, 202.08, 209.27, 202.07, 187.95, 199.11, 206.81, 235.44, 204.04, 195.15, 173.85, 163.17, 191.7, 201.88, 212.38, 207.92, 171.61, 186.32, 201.58, 222.89, 206.96, 200.68, 178.82, 183.91, 198.82, 209.23, 224.03, 230.06, 199.87, 201.07, 205.60, 211.58, 210.78, 205.9, 182.66, 199.49, 195.04, 196.12, 197.82, 203.91, 188.28, 196.81, 200.88, 201.25, 212.27, 178.33, 173.86, 185.71, 191.83, 202.56, 195.54, 189.08, 184.48, 199.92, 206.66, 198.95, 188.12, 176.24, 161.95, 172.67, 196.1, 207.34, 208.96, 209.65, 178.95, 188.49, 211.91, 218.64, 201.82, 193.37, 170.33, 185.98, 201.05, 212.28, 213.93, 204.78, 195.17, 196.68, 210.0, 211.09, 208.75, 191.5, 201.17, 190.19, 195.78, 197.68, 209.58, 205.62, 190.79, 198.04, 206.89, 210.84, 202.58, 180.44, 178.58, 191.25, 209.43, 205.74, 194.24, 192.74, 193.11, 209.92, 214.03, 220.04, 187.46, 191.46, 161.37, 180.56, 192.58, 205.59, 208.1, 192.8, 180.27, 195.74, 201.18, 209.86, 201.87, 179.38, 167.11, 179.99, 208.07, 212.23, 205.14, 201.21, 180.63, 176.36, 190.89, 206.73, 205.34, 188.07, 169.57, 176.18, 191.82, 194.07, 205.99, 204.98, 200.29, 190.52, 189.14, 194.65, 188.97, 198.19, 178.03, 182.65, 194.29, 196.0, 193.19, 194.43, 179.63, 197.73, 204.24, 199.32, 209.48, 204.62, 193.44, 181.99, 196.02, 204.84, 209.4, 194.12, 175.39, 194.88, 208.65, 205.94, 197.69, 184.47, 172.59, 183.86, 199.14, 213.82, 206.46, 194.48, 175.3, 176.1, 194.91, 208.59, 209.01, 190.93, 191.17, 175.59, 195.32, 206.8, 217.82, 212.64, 195.08, 180.13, 190.87, 203.0, 196.91, 189.42, 170.31, 170.07, 181.7, 187.96, 194.01, 207.64, 194.11, 192.11, 202.95, 197.85] 
tides.sort() 

#~ counts = Counter(tides) 
#~ print (counts) 

def fun(params,tides): 
    F = lambda x: genextreme.cdf(x,*params) 
    result = -sum([np.log(d) for d in np.diff([0]+[F(_) for _ in tides]+[1])]) 
    return result 

shape = 0.5 
location = 234 
scale = 50 
x0 = (shape, location, scale) 

#~ fun(x0,tides) 

result = minimize(fun, x0, args=tides, method='Nelder-Mead') 
print (result) 

x = np.linspace(np.min(tides)-10,np.max(tides)+10,1000) 
#~ gev_pdf = gev.pdf(x, gev_fit[0], gev_fit[1], gev_fit[2]) 
f = lambda x: genextreme.pdf(x,*result.x) 

plt.subplot(1,2,1) 
plt.hist(tides, normed=True, alpha=0.2, label='Data') 
plt.xlabel('Tides (cm)') 

plt.subplot(1,2,2) 
plt.hist(tides, normed=True, alpha=0.2, label='Data') 
plt.plot(x, [f(_) for _ in x], 'r--', label='GEV Fit') 
plt.xlabel('Tides (cm)') 
plt.legend(loc='upper left') 
plt.show() 

代码中的碎片(例如计数器)表示我放入的用于识别重复数据的工作。它们导致产品间间距为零,导致试图取零对数。我所做的就是轻微干扰物品。例如,像201.17这样的项目之一成为201.18。

这是结果图。

enter image description here

无论如何,我很高兴你问这个问题。有趣的问题。

+0

感谢您的帮助,@BillBell! – N1B4

+0

@ N1B4,非常欢迎您! –