2015-07-20 139 views
2

我希望在打开损坏的HDF5文件有所帮助。我通过Pandas访问PyTables,但pd.read_hdf()呼叫产生以下错误。我不知道PyTables的内部工作原理。打开损坏的PyTables HDF5文件

我认为错误的形成是因为过程保存到文件(每10秒左右追加)得到了复制,所以当时有2个相同的过程追加。我不知道为什么这会破坏文件,而不是重复的数据,但两个错误发生一起这就是为什么我认为他们是因果关系。

--------------- 
HDF5ExtError        Traceback (most recent call last) 
<ipython-input-37-99558b43d768> in <module>() 
----> 1 ES2 = h.read('./ES_201509-1') 

/Users/AFK/Desktop/fastback/historical_store.pyc in read(self, path, key, **kwargs) 
    53   frame. Extra keyword args are all passed down to pandas.read_hdf(). 
    54   """ 
---> 55   df = pd.read_hdf(path, key, **kwargs) 
    56   df.index = pd.to_datetime(df.Time) 
    57   del df['Time'] 

//anaconda/lib/python2.7/site-packages/pandas/io/pytables.pyc in read_hdf(path_or_buf, key, **kwargs) 
    326    # if there is an error, close the store 
    327    try: 
--> 328     store.close() 
    329    except: 
    330     pass 

//anaconda/lib/python2.7/site-packages/pandas/io/pytables.pyc in close(self) 
    566   """ 
    567   if self._handle is not None: 
--> 568    self._handle.close() 
    569   self._handle = None 
    570 

//anaconda/lib/python2.7/site-packages/tables/file.pyc in close(self) 
    2726 
    2727   # Close all loaded nodes. 
-> 2728   self.root._f_close() 
    2729 
    2730   self._node_manager.shutdown() 

//anaconda/lib/python2.7/site-packages/tables/group.pyc in _f_close(self) 
    907   # this is not an explicit close issued by the user. 
    908   if not (self._v__deleting or self._v_objectid is None): 
--> 909    self._g_close_descendents() 
    910 
    911   # When all the descendents have been closed, close this group. 

//anaconda/lib/python2.7/site-packages/tables/group.pyc in _g_close_descendents(self) 
    870 
    871   node_manager = self._v_file._node_manager 
--> 872   node_manager.close_subtree(self._v_pathname) 
    873 
    874  _g_closeDescendents = previous_api(_g_close_descendents) 

//anaconda/lib/python2.7/site-packages/tables/file.pyc in close_subtree(self, prefix) 
    540    if path.startswith(prefix) and '/_i_' not in path 
    541   ] 
--> 542   self._close_nodes(paths, cache.pop) 
    543 
    544   # Close everything else (i.e. indices) 

//anaconda/lib/python2.7/site-packages/tables/file.pyc in _close_nodes(nodepaths, get_node) 
    515       node._g_close() 
    516      else: 
--> 517       node._f_close() 
    518      del node 
    519     except ClosedNodeError: 

//anaconda/lib/python2.7/site-packages/tables/table.pyc in _f_close(self, flush) 
    3034   # Flush right now so the row object does not get in the middle. 
    3035   if flush: 
-> 3036    self.flush() 
    3037 
    3038   # Some warnings can be issued after calling `self._g_set_location()` 

//anaconda/lib/python2.7/site-packages/tables/table.pyc in flush(self) 
    2969   if self.indexed and self.autoindex: 
    2970    # Flush any unindexed row 
-> 2971    rowsadded = self.flush_rows_to_index(_lastrow=True) 
    2972    assert rowsadded <= 0 or self._indexedrows == self.nrows, \ 
    2973     ("internal error: the number of indexed rows (%d) " 

//anaconda/lib/python2.7/site-packages/tables/table.pyc in flush_rows_to_index(self, _lastrow) 
    2578      if nrows > 0 and not col.index.dirty: 
    2579       rowsadded = self._add_rows_to_index(
-> 2580        colname, start, nrows, _lastrow, update=True) 
    2581    self._unsaved_indexedrows -= rowsadded 
    2582    self._indexedrows += rowsadded 

//anaconda/lib/python2.7/site-packages/tables/table.pyc in _add_rows_to_index(self, colname, start, nrows, lastrow, update) 
    2609   if lastrow and startLR < self.nrows: 
    2610    index.append_last_row(
-> 2611     [self._read(startLR, self.nrows, 1, colname)], 
    2612     update=update) 
    2613    indexedrows += self.nrows - startLR 

//anaconda/lib/python2.7/site-packages/tables/table.pyc in _read(self, start, stop, step, field, out) 
    1895    self._read_field_name(result, start, stop, step, field) 
    1896   else: 
-> 1897    self.row._fill_col(result, start, stop, step, field) 
    1898 
    1899   if select_field: 

//anaconda/lib/python2.7/site-packages/tables/tableextension.so in tables.tableextension.Row._fill_col (tables/tableextension.c:12653)() 

//anaconda/lib/python2.7/site-packages/tables/tableextension.so in tables.tableextension.Table._read_records (tables/tableextension.c:6721)() 

HDF5ExtError: HDF5 error back trace 

    File "H5Dio.c", line 174, in H5Dread 
    can't read data 
    File "H5Dio.c", line 449, in H5D_read 
    can't read data 
    File "H5Dchunk.c", line 1729, in H5D_chunk_read 
    unable to read raw data chunk 
    File "H5Dchunk.c", line 2760, in H5D_chunk_lock 
    data pipeline read failed 
    File "H5Z.c", line 1120, in H5Z_pipeline 
    filter returned failure during read 
    File "H5Zdeflate.c", line 125, in H5Z_filter_deflate 
    inflate() failed 

End of HDF5 error back trace 

Problems reading records. 

回答

3

你的文件是borked。没办法从这个恢复。这是特别警告(使用多个线程/进程作为编写者)。请参阅文档here

HDF5不是线程/进程安全的作家。

+0

该死。谢谢... – Wapiti