2011-01-11 95 views
20

我有archive.zip有两个文件:hello.txtworld.txt覆盖文件中ziparchive

我要覆盖hello.txt文件与新的与代码:

,但它不会覆盖文件,不知何故它创建另一个实例hello.txt - 看看winzip截图:

alt text

由于没有像zipfile.remove()那样的不合适,处理此问题的最佳方法是什么?

+1

尚未解决的问题:https://bugs.python.org/issue6818 – denfromufa 2016-05-25 16:50:22

回答

26

用python zipfile模块没有办法做到这一点。您必须创建一个新的zip文件并从第一个文件再次压缩所有内容,再加上新的修改后的文件。

下面是一些代码来做到这一点。但请注意,它不是有效的,因为它解压缩并重新压缩所有数据。

import tempfile 
import zipfile 
import shutil 
import os 

def remove_from_zip(zipfname, *filenames): 
    tempdir = tempfile.mkdtemp() 
    try: 
     tempname = os.path.join(tempdir, 'new.zip') 
     with zipfile.ZipFile(zipfname, 'r') as zipread: 
      with zipfile.ZipFile(tempname, 'w') as zipwrite: 
       for item in zipread.infolist(): 
        if item.filename not in filenames: 
         data = zipread.read(item.filename) 
         zipwrite.writestr(item, data) 
     shutil.move(tempname, zipfname) 
    finally: 
     shutil.rmtree(tempdir) 

用法:

remove_from_zip('archive.zip', 'hello.txt') 
with zipfile.ZipFile('archive.zip', 'a') as z: 
    z.write('hello.txt') 
+0

所以,对于任何覆盖文件没有有效的方法?也许另一个zip模块?无论如何,谢谢你 – nukl 2011-01-11 03:32:49

+0

@ cru3l:这正是我在回答中所说的。 – nosklo 2011-01-11 03:33:30

10

大厦nosklo的答案。 UpdateableZipFile一个从ZipFile继承的类,主要是提供相同的接口,但增加了覆盖文件(通过写入或写入)和删除文件的功能。

import os 
import shutil 
import tempfile 
from zipfile import ZipFile, ZIP_STORED, ZipInfo 


class UpdateableZipFile(ZipFile): 
    """ 
    Add delete (via remove_file) and update (via writestr and write methods) 
    To enable update features use UpdateableZipFile with the 'with statement', 
    Upon __exit__ (if updates were applied) a new zip file will override the exiting one with the updates 
    """ 

    class DeleteMarker(object): 
     pass 

    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): 
     # Init base 
     super(UpdateableZipFile, self).__init__(file, mode=mode, 
               compression=compression, 
               allowZip64=allowZip64) 
     # track file to override in zip 
     self._replace = {} 
     # Whether the with statement was called 
     self._allow_updates = False 

    def writestr(self, zinfo_or_arcname, bytes, compress_type=None): 
     if isinstance(zinfo_or_arcname, ZipInfo): 
      name = zinfo_or_arcname.filename 
     else: 
      name = zinfo_or_arcname 
     # If the file exits, and needs to be overridden, 
     # mark the entry, and create a temp-file for it 
     # we allow this only if the with statement is used 
     if self._allow_updates and name in self.namelist(): 
      temp_file = self._replace[name] = self._replace.get(name, 
                   tempfile.TemporaryFile()) 
      temp_file.write(bytes) 
     # Otherwise just act normally 
     else: 
      super(UpdateableZipFile, self).writestr(zinfo_or_arcname, 
                bytes, compress_type=compress_type) 

    def write(self, filename, arcname=None, compress_type=None): 
     arcname = arcname or filename 
     # If the file exits, and needs to be overridden, 
     # mark the entry, and create a temp-file for it 
     # we allow this only if the with statement is used 
     if self._allow_updates and arcname in self.namelist(): 
      temp_file = self._replace[arcname] = self._replace.get(arcname, 
                    tempfile.TemporaryFile()) 
      with open(filename, "rb") as source: 
       shutil.copyfileobj(source, temp_file) 
     # Otherwise just act normally 
     else: 
      super(UpdateableZipFile, self).write(filename, 
               arcname=arcname, compress_type=compress_type) 

    def __enter__(self): 
     # Allow updates 
     self._allow_updates = True 
     return self 

    def __exit__(self, exc_type, exc_val, exc_tb): 
     # call base to close zip file, organically 
     try: 
      super(UpdateableZipFile, self).__exit__(exc_type, exc_val, exc_tb) 
      if len(self._replace) > 0: 
       self._rebuild_zip() 
     finally: 
      # In case rebuild zip failed, 
      # be sure to still release all the temp files 
      self._close_all_temp_files() 
      self._allow_updates = False 

    def _close_all_temp_files(self): 
     for temp_file in self._replace.itervalues(): 
      if hasattr(temp_file, 'close'): 
       temp_file.close() 

    def remove_file(self, path): 
     self._replace[path] = self.DeleteMarker() 

    def _rebuild_zip(self): 
     tempdir = tempfile.mkdtemp() 
     try: 
      temp_zip_path = os.path.join(tempdir, 'new.zip') 
      with ZipFile(self.filename, 'r') as zip_read: 
       # Create new zip with assigned properties 
       with ZipFile(temp_zip_path, 'w', compression=self.compression, 
          allowZip64=self._allowZip64) as zip_write: 
        for item in zip_read.infolist(): 
         # Check if the file should be replaced/or deleted 
         replacement = self._replace.get(item.filename, None) 
         # If marked for deletion, do not copy file to new zipfile 
         if isinstance(replacement, self.DeleteMarker): 
          del self._replace[item.filename] 
          continue 
         # If marked for replacement, copy temp_file, instead of old file 
         elif replacement is not None: 
          del self._replace[item.filename] 
          # Write replacement to archive, 
          # and then close it (deleting the temp file) 
          replacement.seek(0) 
          data = replacement.read() 
          replacement.close() 
         else: 
          data = zip_read.read(item.filename) 
         zip_write.writestr(item, data) 
      # Override the archive with the updated one 
      shutil.move(temp_zip_path, self.filename) 
     finally: 
      shutil.rmtree(tempdir) 

使用示例:

with UpdateableZipFile("C:\Temp\Test2.docx", "a") as o: 
    # Overwrite a file with a string 
    o.writestr("word/document.xml", "Some data") 
    # exclude an exiting file from the zip 
    o.remove_file("word/fontTable.xml") 
    # Write a new file (with no conflict) to the zp 
    o.writestr("new_file", "more data") 
    # Overwrite a file with a file 
    o.write("C:\Temp\example.png", "word/settings.xml")