2017-08-06 104 views
-1

我有下面这段代码Python的要求期货似乎并没有为我工作

import concurrent.futures as cf 
from requests_futures.sessions import FuturesSession 

urls = ['http://www.foxnews.com/', 
     'http://www.cnn.com/', 
     'http://europe.wsj.com/', 
     'http://www.bbc.co.uk/', 
     'https://foursquare.com/'] * 500 

futures = [] 
session = FuturesSession(executor=cf.ThreadPoolExecutor(max_workers=8)) 

for url in urls: 
    futures.append(session.get(url)) 

for future in cf.as_completed(futures): 
    result = len(future.result().content) 
    print(result) 

但是这个代码使用像

ConnectionError的消息出现了错误:(“中止连接”, RemoteDisconnected(“远程结束时关闭不响应连接”,))

当URL列表的长度约为300它的工作原理

,但是当我有大约500网址我是回到同一个奇怪的错误

完整的堆栈跟踪就像

--------------------------------------------------------------------------- 
ConnectionResetError      Traceback (most recent call last) 
c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redi 
rect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 
    599             body=body, headers=headers, 
--> 600             chunked=chunked) 
    601 

c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked 
, **httplib_request_kw) 
    385      # otherwise it looks like a programming error was the cause. 
--> 386      six.raise_from(e, None) 
    387   except (SocketTimeout, BaseSSLError, SocketError) as e: 

c:\python36\lib\site-packages\requests\packages\urllib3\packages\six.py in raise_from(value, from_value) 

c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked 
, **httplib_request_kw) 
    381     try: 
--> 382      httplib_response = conn.getresponse() 
    383     except Exception as e: 

c:\python36\lib\http\client.py in getresponse(self) 
    1330    try: 
-> 1331     response.begin() 
    1332    except ConnectionError: 

c:\python36\lib\http\client.py in begin(self) 
    296   while True: 
--> 297    version, status, reason = self._read_status() 
    298    if status != CONTINUE: 

c:\python36\lib\http\client.py in _read_status(self) 
    257  def _read_status(self): 
--> 258   line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") 
    259   if len(line) > _MAXLINE: 

c:\python36\lib\socket.py in readinto(self, b) 
    585    try: 
--> 586     return self._sock.recv_into(b) 
    587    except timeout: 

c:\python36\lib\ssl.py in recv_into(self, buffer, nbytes, flags) 
    1001     self.__class__) 
-> 1002    return self.read(nbytes, buffer) 
    1003   else: 

c:\python36\lib\ssl.py in read(self, len, buffer) 
    864   try: 
--> 865    return self._sslobj.read(len, buffer) 
    866   except SSLError as x: 

c:\python36\lib\ssl.py in read(self, len, buffer) 
    624   if buffer is not None: 
--> 625    v = self._sslobj.read(len, buffer) 
    626   else: 

ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host 

During handling of the above exception, another exception occurred: 

ProtocolError        Traceback (most recent call last) 
c:\python36\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies) 
    422      retries=self.max_retries, 
--> 423      timeout=timeout 
    424    ) 

c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redi 
rect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 
    648    retries = retries.increment(method, url, error=e, _pool=self, 
--> 649           _stacktrace=sys.exc_info()[2]) 
    650    retries.sleep() 

c:\python36\lib\site-packages\requests\packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stack 
trace) 
    346    if read is False or not self._is_method_retryable(method): 
--> 347     raise six.reraise(type(error), error, _stacktrace) 
    348    elif read is not None: 

c:\python36\lib\site-packages\requests\packages\urllib3\packages\six.py in reraise(tp, value, tb) 
    684   if value.__traceback__ is not tb: 
--> 685    raise value.with_traceback(tb) 
    686   raise value 

c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redi 
rect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw) 
    599             body=body, headers=headers, 
--> 600             chunked=chunked) 
    601 

c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked 
, **httplib_request_kw) 
    385      # otherwise it looks like a programming error was the cause. 
--> 386      six.raise_from(e, None) 
    387   except (SocketTimeout, BaseSSLError, SocketError) as e: 

c:\python36\lib\site-packages\requests\packages\urllib3\packages\six.py in raise_from(value, from_value) 

c:\python36\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked 
, **httplib_request_kw) 
    381     try: 
--> 382      httplib_response = conn.getresponse() 
    383     except Exception as e: 

c:\python36\lib\http\client.py in getresponse(self) 
    1330    try: 
-> 1331     response.begin() 
    1332    except ConnectionError: 

c:\python36\lib\http\client.py in begin(self) 
    296   while True: 
--> 297    version, status, reason = self._read_status() 
    298    if status != CONTINUE: 

c:\python36\lib\http\client.py in _read_status(self) 
    257  def _read_status(self): 
--> 258   line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") 
    259   if len(line) > _MAXLINE: 

c:\python36\lib\socket.py in readinto(self, b) 
    585    try: 
--> 586     return self._sock.recv_into(b) 
    587    except timeout: 

c:\python36\lib\ssl.py in recv_into(self, buffer, nbytes, flags) 
    1001     self.__class__) 
-> 1002    return self.read(nbytes, buffer) 
    1003   else: 

c:\python36\lib\ssl.py in read(self, len, buffer) 
    864   try: 
--> 865    return self._sslobj.read(len, buffer) 
    866   except SSLError as x: 

c:\python36\lib\ssl.py in read(self, len, buffer) 
    624   if buffer is not None: 
--> 625    v = self._sslobj.read(len, buffer) 
    626   else: 

ProtocolError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', 
None, 10054, None)) 

During handling of the above exception, another exception occurred: 

ConnectionError       Traceback (most recent call last) 
<ipython-input-144-e24ea43223c2> in <module>() 
    15 
    16 for idx,future in enumerate(cf.as_completed(futures)): 
---> 17  result = len(future.result().content) 
    18  print(idx,result) 
    19 

c:\python36\lib\concurrent\futures\_base.py in result(self, timeout) 
    396     raise CancelledError() 
    397    elif self._state == FINISHED: 
--> 398     return self.__get_result() 
    399 
    400    self._condition.wait(timeout) 

c:\python36\lib\concurrent\futures\_base.py in __get_result(self) 
    355  def __get_result(self): 
    356   if self._exception: 
--> 357    raise self._exception 
    358   else: 
    359    return self._result 

c:\python36\lib\concurrent\futures\thread.py in run(self) 
    53 
    54   try: 
---> 55    result = self.fn(*self.args, **self.kwargs) 
    56   except BaseException as e: 
    57    self.future.set_exception(e) 

c:\python36\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeou 
t, allow_redirects, proxies, hooks, stream, verify, cert, json) 
    486   } 
    487   send_kwargs.update(settings) 
--> 488   resp = self.send(prep, **send_kwargs) 
    489 
    490   return resp 

c:\python36\lib\site-packages\requests\sessions.py in send(self, request, **kwargs) 
    607 
    608   # Send the request 
--> 609   r = adapter.send(request, **kwargs) 
    610 
    611   # Total elapsed time of the request (approximately) 

c:\python36\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies) 
    471 
    472   except (ProtocolError, socket.error) as err: 
--> 473    raise ConnectionError(err, request=request) 
    474 
    475   except MaxRetryError as e: 

ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host' 
, None, 10054, None)) 

In [145]: 
+0

是否少于2500个请求工作?将其缩小到2并完成。 –

+0

约翰辉煌我遵循你的意见,并做了一些变化其奇怪,但它不知道为什么 – user3249433

回答

2

问题是与您的网站,它们会限制从一个IP的,因为事情像DOS攻击(不是专家)的连接数。

正如你可以在错误看到ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host

+0

有趣的Reza我所做的是删除requests_futures并使用同步请求库和并发期货线程池执行程序和该库即使慢得多发生此错误 – user3249433

+1

@ user3249433网站限制请求的速率,而不仅限于它们的数量。通过8个工作线程,您比每个线程多8次访问每个站点。 –

+0

@ user3249433这是因为它比较慢。您无法每秒超过每个网站的一定数量的请求。 –