我正在尝试每半小时填充一次碳通量。我想通过训练测试验证交叉验证来识别最简约的LSTM模型,方法是训练带有所有可用输入的模型,然后对其进行修剪,直到得分停止改进。对于每个模型,我使用k-fold CV来分割90%的训练,10%的验证,然后在model.fit()中将火车进一步分割成火车和测试集。我正在使用提前停止以帮助最小化运行时间,并使用ModelCheckpoint保存最佳权重(具有最低“val_loss”的时代)。然后,我想加载这些模型权重并计算10%的数据预留验证分数(MSE),以便在模型外部使用测试集上执行得最好的权重进行验证。Keras在使用MXNet后端时无法加载模型权重
这里是我的代码训练的LSTM 9个因素与13个timestimes(在6小时领导到每个观察)
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations
from functools import partial
from multiprocessing import Pool
from sklearn.neural_network import MLPRegressor as MPR
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import RepeatedKFold
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping,ModelCheckpoint
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.9
session = tf.Session(config=config)
def TimeShape(rolls,X1):
X = np.zeros(shape = (X1.shape[0],rolls+1,X1.shape[1]))
X[:,0,:] = X1
if rolls > 0:
for roll in range(0,rolls):
X2 = np.roll(X1,(roll+1),axis=0)
X[:,roll+1,:] = X2
return(X)
def LSTM_Model(time_steps,inputs,load=None):
model = Sequential()
model.add(LSTM(12, input_shape=(time_steps+1,inputs),return_sequences=True,init='normal', activation='tanh'))
model.add(LSTM(6,init='normal', activation='tanh'))
model.add(Dense(1, init='normal',activation='linear'))
NUM_GPU = 1 # or the number of GPUs available on your machine
gpu_list = []
for i in range(NUM_GPU): gpu_list.append('gpu(%d)' % i)
model.compile(loss='mean_squared_error', optimizer='adam',context=gpu_list) # - Add if using MXNET
return(model)
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.train_losses = []
self.test_losses = []
def on_epoch_end(self, batch, logs={}):
self.train_losses.append(logs.get('loss'))
self.test_losses.append(logs.get('val_loss'))
class LSTM_Optimize:
def __init__(self,Path,y_var):
# **Read and prep Data Data**
self.Master = pd.read_csv(Path,delimiter = ',',header = 0,na_values = -9999)
self.Master = self.Master.set_index(pd.DatetimeIndex(pd.to_datetime(self.Master['datetime'])))
self.Master['DOY'] = self.Master.index.dayofyear*1.0
self.Master['HR'] = self.Master.index.hour*1.0
self.Data = self.Master[np.isfinite(self.Master[y_var])]
self.Data = self.Data.interpolate().bfill()
self.Data = self.Data.interpolate().ffill()
# ** Nomralize Y variable**
# ** Pipeline takes care of X, but not Y, I've foun the models work better when normalizing Y **
self.y = self.Data[y_var].values
self.YStandard = StandardScaler()
self.YScaled = self.YStandard.fit(self.y.reshape(-1, 1))
Yscale = self.YScaled.transform(self.y.reshape(-1, 1))
self.y = np.ndarray.flatten(Yscale)
self.Ytru = self.YScaled.inverse_transform(self.y.reshape(-1,1))
def Run(self,Inputs):
# Preparing the input data
time_steps = 12
X = self.Data[Inputs]
input_shape = len(Inputs)
self.XStandard = StandardScaler()
self.XScaled= self.XStandard.fit(X)
Xscale = self.XScaled.transform(X)
Xscale = TimeShape(time_steps,Xscale)
Xscale = Xscale[time_steps+1:,:,:]
self.y = self.y[time_steps+1:]
ES = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=25, verbose=1, mode='auto')
CH = ModelCheckpoint(filepath='weights.hdf5',monitor='val_loss', verbose=0, save_best_only=True)
HS=LossHistory()
MSE = []
kf = RepeatedKFold(n_splits=10,n_repeats=2)
batch_size=25
Mod = LSTM_Model(time_steps,input_shape)
plt.figure(figsize = (7,7))
for train,test in kf.split(Xscale,self.y):
Mod.fit(Xscale[train],self.y[train],batch_size=batch_size, nb_epoch=1000,validation_split=0.1,
shuffle=True,callbacks=[ES,CH,HS],verbose=0)
Y = Mod.predict(Xscale[test],batch_size = batch_size)
Mod.load_weights('weights.hdf5')
Y = Mod.predict(Xscale[test],batch_size = batch_size)
MSE.append(metrics.mean_squared_error(self.y[test],Y))
plt.plot(HS.test_losses,linestyle='--')
plt.plot(HS.train_losses)
print(Mod.summary())
print(np.asanyarray(MSE).mean())
Path = 'FluxData.csv'
% matplotlib inline
start_time = time.time()
if __name__ == '__main__':
CH4_Model = ['Sedge','Shrubby','Temp','VWC','ustar','wind_speed','air_pressure',
'PPFD_Avg','NR_Wm2_Avg','AirTC_Avg']
y_var = 'ch4_flux'
Model = CH4_Model
Best = LSTM_Optimize(Path,y_var)
Best.Run(Model)
print()
print("--- %s seconds ---" % (time.time() - start_time))
这里是我的数据集的几行的工作示例 - 实际系列有1000的观察
datetime,co2_flux,ch4_flux,ustar,wind_speed,AirTC_Avg,air_pressure,AirTC_Min,RH,PPFD_Avg,NR_Wm2_Avg,VWC,Temp,Sedge,Shrubby
7/11/2016 8:00,-0.337747167,0.011732699,0.404379747,3.887986435,15.07,101118.6513,15.03,92.7,414.2,225.1,0.5895,7.950660426,0.001292044,0.823794007
7/11/2016 8:30,-1.021087283,0.010256442,0.424094541,3.94983083,14.89,101144.0926,14.84,92.8,339.7,177.1,0.5895,8.24119905,0.001058732,0.826866339
7/11/2016 9:00,-0.146511388,0.008503355,0.456274817,4.687202214,14.71,101177.3176,14.63,93.4,354.4,183.7,0.5895,8.146344257,0.000474955,0.84272365
7/11/2016 9:30,0.144368521,0.009458078,0.462915317,4.810986576,14.27,101203.9191,14.2,93.3,370.2,188.4,0.5895,7.995179025,0.00147768,0.854715683
7/11/2016 10:00,1.471425801,0.014895985,0.47095652,5.098075355,13.7,1.9171,13.62,94.3,462.9,233.9,0.5895,7.521166721,4.64E-05,0.871581919
7/11/2016 10:30,0.889911286,0.01564225,0.487227522,4.969666239,13.13,101277.0195,13.04,96,309.9,155.2,0.5895,7.923818563,8.14E-06,0.880709962
当我运行这与Tensorflow支持,一切顺利,我得到。 Howeverif我尝试用后端MXNet运行它,它无法加载保存模型权重,我得到这个回溯:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-14c6597a2feb> in <module>()
114 Model = CH4_Model
115 Best = LSTM_Optimize(Path,y_var)
--> 116 Best.Run(Model)
117 print()
118 print("--- %s seconds ---" % (time.time() - start_time))
<ipython-input-1-14c6597a2feb> in Run(self, Inputs)
96 shuffle=True,callbacks=[ES,CH,HS],verbose=0)
97 Y = Mod.predict(Xscale[test],batch_size = batch_size)
---> 98 Mod.load_weights('weights.hdf5')
99 Y = Mod.predict(Xscale[test],batch_size = batch_size)
100 MSE.append(metrics.mean_squared_error(self.y[test],Y))
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/engine/topology.py in load_weights(self, filepath, by_name)
2718 self.load_weights_from_hdf5_group_by_name(f)
2719 else:
-> 2720 self.load_weights_from_hdf5_group(f)
2721
2722 if hasattr(f, 'close'):
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/engine/topology.py in load_weights_from_hdf5_group(self, f)
2804 weight_values[0] = w
2805 weight_value_tuples += zip(symbolic_weights, weight_values)
-> 2806 K.batch_set_value(weight_value_tuples)
2807
2808 def load_weights_from_hdf5_group_by_name(self, f):
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/backend/mxnet_backend.py in batch_set_value(tuples)
2205 """
2206 for p, w in tuples:
-> 2207 set_value(p, w)
2208
2209
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/backend/mxnet_backend.py in set_value(x, value)
2193 if isinstance(value, Number):
2194 value = [value]
-> 2195 x.bind(mx.nd.array(value))
2196
2197
/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in array(source_array, ctx, dtype)
1295 raise TypeError('source_array must be array like object')
1296 arr = empty(source_array.shape, ctx, dtype)
-> 1297 arr[:] = source_array
1298 return arr
1299
/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in __setitem__(self, key, value)
384 _internal._set_value(float(value), out=self)
385 elif isinstance(value, (np.ndarray, np.generic)):
--> 386 self._sync_copyfrom(value)
387 else:
388 raise TypeError(
/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in _sync_copyfrom(self, source_array)
556 print(self.shape)
557 raise ValueError('Shape inconsistent: expected %s vs got %s'%(
--> 558 str(self.shape), str(source_array.shape)))
559 check_call(_LIB.MXNDArraySyncCopyFromCPU(
560 self.handle,
ValueError: Shape inconsistent: expected() vs got (1,)
为什么我要使用MXNet?它似乎比张量流更快,我将不得不在许多具有不同输入和不同节点和超参数的模型上进行训练测试验证。通过使用多处理技术来并行训练多个不同的模型,我已经能够显着提高具有MXNet后端的keras模型的速度。但是,使用tensroflow后端时,尝试执行多处理时会出现线程锁定错误。
对于上下文,我在p2.xlarge实例上使用深度学习AMI Ubuntu Linux - 2.3_Sep2017(ami-d6ee1dae)环境。
任何想法将不胜感激!
嘿,你知道了吗? – HalaKuwatly
不支持MXNet,我放弃了它并切换到tensorflow。我想出了如何实现tensorflow多处理以同时训练测试验证模型。 –