2015-09-27 28 views
0

我正在简单操作将S3压缩包中的gzip文件下载到本地目录。我将这些文件解压缩到另一个本地目录中,然后再将它们上传到S3存储桶再次存入文件夹路径。在执行此操作时,我想确保我正在处理的是我最初从S3存储区下载的同一组文件,其中(f_name)位于下面的代码中。现在,下面的代码不会将这些内容上传到S3,这就是我被卡住的地方。但能够从S3下载并将其解压缩到本地目录中。你能帮我理解_uploadFile函数有什么问题吗?使用python 2.7和boto 2从本地目录上传文件到aws S3的问题

from boto.s3.connection import S3Connection 
from boto.s3.key import * 
import os 
import os.path 
aws_bucket= "event-logs-dev” ## S3 Bucket name 
local_download_directory= "/Users/TargetData/Download/test_queue1/“ ## local    directory to download the gzip files from S3. 
Target_directory_to_extract = "/Users/TargetData/unzip” ##local directory to gunzip the downloaded files. 
Target_s3_path_to_upload= "event-logs-dev/data/clean/xact/logs/archive/“ ## S3 bucket path to upload the files. 

def decompressAllFilesFromNetfiler(self,aws_bucket,local_download_directory,Target_d irectory_to_extract,Target_s3_path_to_upload): 
zipFiles = [f for f in os.listdir(local_download_directory) if re.match(r'.*\.tar\.gz', f)] 
for f_name in zipFiles: 
    if os.path.exists(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')]) and os.access(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')], os.R_OK): 
     print ('File {} already exists!'.format(f_name)) 
    else: 
     f_name_with_path = os.path.join(local_download_directory, f_name) 
     os.system('mkdir -p {} && tar vxzf {} -C {}'.format(Target_directory_to_extract, f_name_with_path, Target_directory_to_extract)) 
     print ('Extracted file {}'.format(f_name)) 
     self._uploadFile(aws_bucket,f_name,Target_s3_path_to_upload,Target_directory_to_extract) 

def _uploadFile(self, aws_bucket, f_name,Target_s3_path_to_upload,Target_directory_to_extract): 
full_key_name = os.path.expanduser(os.path.join(Target_s3_path_to_upload, f_name)) 
path = os.path.expanduser(os.path.join(Target_directory_to_extract, f_name)) 
try: 
    print "Uploaded extracted file to: %s" % (full_key_name) 
    key = aws_bucket.new_key(full_key_name) 
    key.set_contents_from_filename(path) 
except: 
    if full_key_name is None: 
     print "Error uploading” 

当前,Uploaded extracted file to: event-logs-dev/data/clean/xact/logs/archive/1442235602129200000.tar.gz,但没有被上传到S3存储的输出打印。非常感谢您的帮助!先谢谢你!

回答

0

看起来你已经剪切并粘贴了你的代码的一部分 - 也许格式化会丢失,因为上面的代码不能以粘贴方式工作。我冒昧地制作了PEP8(大部分),但是仍然有一些缺少代码来创建S3对象。由于您导入了模块,我认为您有这段代码,并且不会粘贴它。

这是您的代码格式正确的清理版本。我还为您的try:block添加了一个Exception代码,以打印出您遇到的错误。您应该更新异常以更加特定于针对make_key或set_contents _...抛出的异常,但一般异常会让您开始。如果没有其他更具可读性,但您也应该包含您的S3连接代码 - 并删除特定于您的域名的任何内容(例如密钥,商业机密等)。

#!/usr/bin/env python 
""" 
do some download 
some extract 
and some upload 
""" 

from boto.s3.connection import S3Connection 
from boto.s3.key import * 
import os 
import os.path 


aws_bucket = 'event-logs-dev' 
local_download_directory = '/Users/TargetData/Download/test_queue1/' 
Target_directory_to_extract = '/Users/TargetData/unzip' 
Target_s3_path_to_upload = 'event-logs-dev/data/clean/xact/logs/archive/' 

''' 
MUST BE SOME MAGIC HERE TO GET AN S3 CONNECTION ??? 
aws_bucket IS NOT A BUCKET OBJECT ... 
''' 


def decompressAllFilesFromNetfiler(self, 
            aws_bucket, 
            local_download_directory, 
            Target_directory_to_extract, 
            Target_s3_path_to_upload): 
    ''' 
    decompress stuff 
    ''' 
    zipFiles = [f for f in os.listdir(
     local_download_directory) if re.match(r'.*\.tar\.gz', f)] 
    for f_name in zipFiles: 
     if os.path.exists(
      "{}/{}".format(Target_directory_to_extract, 
          f_name[:len('.tar.gz')])) and os.access(
           "{}/{}".format(Target_directory_to_extract, 
               f_name[:len('.tar.gz')])) and os.R_OK: 

      print ('File {} already exists!'.format(f_name)) 
     else: 
      f_name_with_path = os.path.join(local_download_directory, f_name) 
      os.system('mkdir -p {} && tar vxzf {} -C {}'.format(
       Target_directory_to_extract, 
       f_name_with_path, 
       Target_directory_to_extract)) 
      print ('Extracted file {}'.format(f_name)) 
      self._uploadFile(aws_bucket, 
          f_name, 
          Target_s3_path_to_upload, 
          Target_directory_to_extract) 


def _uploadFile(self, 
       aws_bucket, 
       f_name, 
       Target_s3_path_to_upload, 
       Target_directory_to_extract): 
    full_key_name = os.path.expanduser(os.path.join(Target_s3_path_to_upload, 
                f_name)) 
    path = os.path.expanduser(os.path.join(Target_directory_to_extract, f_name)) 
    try: 
     S3CONN = S3Connection() 
     BUCKET = S3CONN.get_bucket(aws_bucket) 
     key = BUCKET.new_key(full_key_name) 
     key.set_contents_from_filename(path) 
     print "Uploaded extracted file to: {}".format(full_key_name) 
    except Exception as UploadERR: 
     if full_key_name is None: 
      print 'Error uploading' 
     else: 
      print "Error : {}".format(UploadERR) 
+0

非常感谢您花时间更正此代码。我非常感谢你的帮助。是的,我已经有了一个连接到S3存储桶的代码。我运行了你的上面的代码,并得到低于输出。因此,目前使用此代码,文件提取工作正常,但它不会将这些文件上传到S3,同时也会触发错误。请咨询下一步。 '提取的文件1440190802314590000.tar.gz 错误:unicode的'对象没有属性 'new_key' X 144019080590001/V.data 提取的文件1440190802314590001.tar.gz 错误:unicode的'对象没有属性“new_key'' – Guddi

+0

该指令无效,除非aws_bucket是存储桶对象。 – cgseller

+0

谢谢,解决了这个问题。祝你今天愉快! – Guddi

相关问题