2016-07-27 50 views
0

我正在研究一个程序,该程序会搜索数据库中的所有文件,并根据文件名中的数字(从001到100)对它们进行分组。注册前识别前导零?

唯一的问题是,python将'001'解释为'1',但'001'是文件名中的确切数字,并且由于我使用正则表达式来搜索它并不能识别数字我想要的方式。
真的很感谢一些帮助!这里是我到目前为止的代码:

import sys 
import os 
import re 
import glob 

time_data = open("time_data.txt", "w") 
space_data = open("space_data.txt", "w") 


folder_list = ['/Users/fenyolab/Downloads/root images/pet week img seq - removed 621 and after - ch1 registered', 'C:/Users/fenyolab/Downloads/root images/0329 to 033116 - WERSCR regen - STELLAR - registered', 'C:/Users/fenyolab/Downloads/root images/0406 to 040816 - H2BIAAWOX regen - GOOD - pt II - REGISTERED'] 


def stack_at_time_point(direc, time_point): 
    time_list = [] 
    for x in glob.glob('%s/*' % direc): 
     if re.search("t.*%s_z" % time_point, x) != None and re.search('_c1.*', x) != None: 
      time_list.append(x) 
    for i in time_list: 
     time_data.write("%s\n" % i) 

def stack_at_zlocation(direc, location): 
    location_list = [] 
    for x in glob.glob('%s/*' % direc): 
     if re.search("_z.*%s_." % location, x) != None and re.search('_c1.*', x) != None: 
      location_list.append(x) 
    for i in location_list: 
     space_data.write("%s\n" % i) 

for i in folder_list: 
    for x in xrange(100): 
     stack_at_zlocation(i, x) 
     space_data.write("\n\n\n") 
     stack_at_time_point(i, x) 
     time_data.write("\n\n\n") 

space_data.close() 
time_data.close() 

print "Done." 

正则表达式"_z.*%s_." % location将匹配到_z023_如果指定的位置是23,但如果指定的位置是1,程序将返回_z001_, _z011_, _z021_, _z031_, _z041_ ... _z091_.

+0

字母'z'后面跟着一个三位数,零填充的数字? –

+0

是的!不幸的是,这就是文件给我的方式,而且有数以千计的手动重命名。 –

+0

在这种情况下,@Robᵩ指出使用'%03d'而不是'%s'。 –

回答

0

您使用printf-style string formatting。使用printf样式的格式,您可以指定前导zereos和字段宽度。

替换您%s%03d

re.search("t.*%03d_z" % time_point, x) 

re.search("_z.*%03d_." % location, x) 
0

建议您建立基于灵活的长度数部分的指数,并以此来查找相应的文件。例如:

>>> import re 
>>> 
>>> locations = ['_z{0:03d}_'.format(x) for x in range(1,101)] 
>>> 
>>> def create_zindex(names): 
... reg = re.compile('_z(\d+)_') 
... result = {} 
... for name in names: 
...  m = reg.search(name) 
...  if not m: 
...  print "Can't find z index in {0!r}".format(name) 
...  continue 
...  zindex = int(m.groups()[0]) 
...  if zindex in result: 
...  print "Duplicate z-index {0} - {1}".format(name,result[zindex]) 
...  continue 
...  result[zindex] = name 
... return result 
... 
>>> print locations 
['_z001_', '_z002_', '_z003_', '_z004_', '_z005_', '_z006_', '_z007_', '_z008_', '_z009_', '_z010_', '_z011_', '_z012_', '_z013_', '_z014_', '_z015_', '_z016_', '_z017_', '_z018_', '_z019_', '_z020_', '_z021_', '_z022_', '_z023_', '_z024_', '_z025_', '_z026_', '_z027_', '_z028_', '_z029_', '_z030_', '_z031_', '_z032_', '_z033_', '_z034_', '_z035_', '_z036_', '_z037_', '_z038_', '_z039_', '_z040_', '_z041_', '_z042_', '_z043_', '_z044_', '_z045_', '_z046_', '_z047_', '_z048_', '_z049_', '_z050_', '_z051_', '_z052_', '_z053_', '_z054_', '_z055_', '_z056_', '_z057_', '_z058_', '_z059_', '_z060_', '_z061_', '_z062_', '_z063_', '_z064_', '_z065_', '_z066_', '_z067_', '_z068_', '_z069_', '_z070_', '_z071_', '_z072_', '_z073_', '_z074_', '_z075_', '_z076_', '_z077_', '_z078_', '_z079_', '_z080_', '_z081_', '_z082_', '_z083_', '_z084_', '_z085_', '_z086_', '_z087_', '_z088_', '_z089_', '_z090_', '_z091_', '_z092_', '_z093_', '_z094_', '_z095_', '_z096_', '_z097_', '_z098_', '_z099_', '_z100_'] 
>>> print create_zindex(locations) 
{1: '_z001_', 2: '_z002_', 3: '_z003_', 4: '_z004_', 5: '_z005_', 6: '_z006_', 7: '_z007_', 8: '_z008_', 9: '_z009_', 10: '_z010_', 11: '_z011_', 12: '_z012_', 13: '_z013_', 14: '_z014_', 15: '_z015_', 16: '_z016_', 17: '_z017_', 18: '_z018_', 19: '_z019_', 20: '_z020_', 21: '_z021_', 22: '_z022_', 23: '_z023_', 24: '_z024_', 25: '_z025_', 26: '_z026_', 27: '_z027_', 28: '_z028_', 29: '_z029_', 30: '_z030_', 31: '_z031_', 32: '_z032_', 33: '_z033_', 34: '_z034_', 35: '_z035_', 36: '_z036_', 37: '_z037_', 38: '_z038_', 39: '_z039_', 40: '_z040_', 41: '_z041_', 42: '_z042_', 43: '_z043_', 44: '_z044_', 45: '_z045_', 46: '_z046_', 47: '_z047_', 48: '_z048_', 49: '_z049_', 50: '_z050_', 51: '_z051_', 52: '_z052_', 53: '_z053_', 54: '_z054_', 55: '_z055_', 56: '_z056_', 57: '_z057_', 58: '_z058_', 59: '_z059_', 60: '_z060_', 61: '_z061_', 62: '_z062_', 63: '_z063_', 64: '_z064_', 65: '_z065_', 66: '_z066_', 67: '_z067_', 68: '_z068_', 69: '_z069_', 70: '_z070_', 71: '_z071_', 72: '_z072_', 73: '_z073_', 74: '_z074_', 75: '_z075_', 76: '_z076_', 77: '_z077_', 78: '_z078_', 79: '_z079_', 80: '_z080_', 81: '_z081_', 82: '_z082_', 83: '_z083_', 84: '_z084_', 85: '_z085_', 86: '_z086_', 87: '_z087_', 88: '_z088_', 89: '_z089_', 90: '_z090_', 91: '_z091_', 92: '_z092_', 93: '_z093_', 94: '_z094_', 95: '_z095_', 96: '_z096_', 97: '_z097_', 98: '_z098_', 99: '_z099_', 100: '_z100_'}