2016-02-05 179 views
0

我正尝试在Python中执行随机森林分析。似乎一切都OK,但是,当我尝试运行代码,我得到了以下错误消息:随机森林中的ValueError(Python)

enter image description here

没有做过的,你得到这个ValueError异常?

干杯

数据集:https://www.dropbox.com/s/ehyccl8kubazs8x/test.csv?dl=0&preview=test.csv

代码:

from sklearn.ensemble import RandomForestRegressor as RF 
import numpy as np 
import pylab as pl 


headers = file("test.csv").readline().strip().split('\r')[0].split(',')[1:] 

data = np.loadtxt("test.csv", delimiter=',', skiprows=1, usecols = range(1,14)) 

#yellow==PAR, green==VPD, blue== Tsoil and orange==Tair 
PAR = data[:,headers.index("PAR")] 
VPD = data[:,headers.index("VPD")] 
Tsoil= data[:,headers.index("Tsoil")] 
Tair = data[:,headers.index("Tair")] 

drivers = np.column_stack([PAR,VPD,Tsoil,Tair]) 

hour = data[:,-1].astype("int") 


#performs a random forest hour-wise to explain each NEE, GPP and Reco fluxes 
importances = np.zeros([24,2,3,4]) 

for ff,flux in enumerate(["NEE_f","GPP_f","Reco"]): 
    fid = headers.index(flux) 
    obs = data[:,fid] 

    #store importances: dim are average/std; obs var; expl var 


    for hh in range(24): 
     mask = hour == hh 
     forest = RF(n_estimators=1000) 
     forest.fit(drivers[mask],obs[mask]) 


     importances[hh,0,ff] = forest.feature_importances_ 
     importances[hh,1,ff] = np.std([tree.feature_importances_ for tree in forest.estimators_],axis=0) 

fig = pl.figure('importances',figsize=(15,5));fig.clf() 
xx=range(24) 

colors = ["#F0E442","#009E73","#56B4E9","#E69F00"];labels= ['PAR','VPD','Tsoil','Tair'] 
for ff,flux in enumerate(["NEE_f","GPP_f","Reco"]): 
    ax = fig.add_subplot(1,3,ff+1) 
    for vv in range(drivers.shape[1]): 
     ax.fill_between(xx,importances[:,0,ff,vv]+importances[:,1,ff,vv],importances[:,0,ff,vv]-importances[:,1,ff,vv],color=colors[vv],alpha=.35,edgecolor="none") 
     ax.plot(xx,importances[:,0,ff,vv],color=colors[vv],ls='-',lw=2,label = labels[vv]) 
     ax.set_title(flux);ax.set_xlim(0,23) 
     if ff == 0: 
      ax.legend(ncol=2,fontsize='medium',loc='upper center') 
fig.show() 
fig.savefig('importance-hourly.png') 
+1

在您的帖子中复制代码。 – k4ppa

回答

2

的问题是,我选择了这里多年都存储在列,而不是在那里时间是。因此,RF在空阵列上进行了训练。