2013-03-05 47 views

回答

2

不,你必须要么设置全局默认timout与socket.setdefaulttimeout(),或子类RobotFileParser类添加自定义超时:

from urllib.robotparser import RobotFileParser 
import urllib.request 

class TimoutRobotFileParser(RobotFileParser): 
    def __init__(self, url='', timeout=60): 
     super().__init__(url) 
     self.timeout = 60 

    def read(self): 
     """Reads the robots.txt URL and feeds it to the parser.""" 
     try: 
      f = urllib.request.urlopen(self.url, timeout=self.timeout) 
     except urllib.error.HTTPError as err: 
      if err.code in (401, 403): 
       self.disallow_all = True 
      elif err.code >= 400: 
       self.allow_all = True 
     else: 
      raw = f.read() 
      self.parse(raw.decode("utf-8").splitlines())