Use requests lib instead of low-level urllib calls
With requests library users can run stackalytics-processor from environments located behind HTTP/HTTPS proxy. Proxy address is configured by setting the environment variables HTTP_PROXY or HTTPS_PROXY. Closes-Bug: #1351136 Change-Id: I6a65afb0f99b351dc2183294d9127cbbebc35856
This commit is contained in:
parent
43f38cd4f8
commit
5a8bc5649e
@ -16,5 +16,7 @@ psutil<2.0.0,>=1.1.1
|
||||
PyGithub
|
||||
python-memcached>=1.56
|
||||
PyYAML>=3.1.0
|
||||
requests>=2.5.2
|
||||
requests-file
|
||||
sh
|
||||
six>=1.9.0
|
||||
|
@ -15,8 +15,6 @@
|
||||
|
||||
from oslo_log import log as logging
|
||||
import six
|
||||
from six.moves import http_client
|
||||
from six.moves.urllib import parse
|
||||
|
||||
from stackalytics.processor import utils
|
||||
|
||||
@ -54,14 +52,11 @@ def lp_profile_by_email(email):
|
||||
|
||||
def lp_module_exists(module):
|
||||
uri = LP_URI_DEVEL % module
|
||||
parsed_uri = parse.urlparse(uri)
|
||||
conn = http_client.HTTPConnection(parsed_uri.netloc)
|
||||
conn.request('GET', parsed_uri.path)
|
||||
res = conn.getresponse()
|
||||
request = utils.do_request(uri)
|
||||
|
||||
LOG.debug('Checked uri: %(uri)s, status: %(status)s',
|
||||
{'uri': uri, 'status': res.status})
|
||||
conn.close()
|
||||
return res.status != 404
|
||||
{'uri': uri, 'status': request.status_code})
|
||||
return request.status_code != 404
|
||||
|
||||
|
||||
def lp_blueprint_generator(module):
|
||||
|
@ -18,7 +18,6 @@ import re
|
||||
|
||||
from oslo_log import log as logging
|
||||
import six
|
||||
from six.moves import http_client
|
||||
from six.moves.urllib import parse
|
||||
|
||||
from stackalytics.processor import utils
|
||||
@ -60,31 +59,25 @@ def _get_mail_archive_links(uri):
|
||||
return [parse.urljoin(uri, link) for link in links]
|
||||
|
||||
|
||||
def _link_content_changed(link, runtime_storage_inst):
|
||||
LOG.debug('Check changes for mail archive located at uri: %s', link)
|
||||
parsed_uri = parse.urlparse(link)
|
||||
conn = http_client.HTTPConnection(parsed_uri.netloc)
|
||||
conn.request('HEAD', parsed_uri.path)
|
||||
res = conn.getresponse()
|
||||
last_modified = res.getheader('last-modified')
|
||||
conn.close()
|
||||
def _uri_content_changed(uri, runtime_storage_inst):
|
||||
LOG.debug('Check changes for mail archive located at: %s', uri)
|
||||
last_modified = utils.get_uri_last_modified(uri)
|
||||
|
||||
if last_modified != runtime_storage_inst.get_by_key('mail_link:' + link):
|
||||
if last_modified != runtime_storage_inst.get_by_key('mail_link:' + uri):
|
||||
LOG.debug('Mail archive changed, last modified at: %s', last_modified)
|
||||
runtime_storage_inst.set_by_key('mail_link:' + link, last_modified)
|
||||
runtime_storage_inst.set_by_key('mail_link:' + uri, last_modified)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _retrieve_mails(uri):
|
||||
LOG.debug('Retrieving mail archive from uri: %s', uri)
|
||||
content = utils.read_uri(uri)
|
||||
LOG.debug('Retrieving mail archive from: %s', uri)
|
||||
content = utils.read_gzip_from_uri(uri)
|
||||
if not content:
|
||||
LOG.error('Error reading mail archive from uri: %s', uri)
|
||||
LOG.error('Error reading mail archive from: %s', uri)
|
||||
return
|
||||
|
||||
content = utils.gzip_decompress(content)
|
||||
LOG.debug('Mail archive is loaded, start processing')
|
||||
|
||||
content += TRAILING_RECORD
|
||||
@ -116,7 +109,7 @@ def log(uri, runtime_storage_inst):
|
||||
|
||||
links = _get_mail_archive_links(uri)
|
||||
for link in links:
|
||||
if _link_content_changed(link, runtime_storage_inst):
|
||||
if _uri_content_changed(link, runtime_storage_inst):
|
||||
for mail in _retrieve_mails(link):
|
||||
LOG.debug('New mail: %s', mail['message_id'])
|
||||
yield mail
|
||||
|
@ -17,8 +17,6 @@ import calendar
|
||||
import cgi
|
||||
import datetime
|
||||
import gzip
|
||||
import io
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@ -26,6 +24,8 @@ import time
|
||||
import iso8601
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
import requests
|
||||
import requests_file
|
||||
import six
|
||||
|
||||
|
||||
@ -117,30 +117,31 @@ user_agents = [
|
||||
]
|
||||
|
||||
|
||||
def do_request(uri, method='get'):
|
||||
with requests.Session() as session:
|
||||
session.mount('file://', requests_file.FileAdapter())
|
||||
user_agent = random.choice(user_agents)
|
||||
|
||||
return session.request(method, uri, headers={'User-Agent': user_agent})
|
||||
|
||||
|
||||
def read_uri(uri):
|
||||
try:
|
||||
req = six.moves.urllib.request.Request(
|
||||
url=uri, headers={'User-Agent': random.choice(user_agents)})
|
||||
fd = six.moves.urllib.request.urlopen(req)
|
||||
if six.PY3:
|
||||
fd = io.TextIOWrapper(fd)
|
||||
raw = fd.read()
|
||||
fd.close()
|
||||
return raw
|
||||
return do_request(uri).text
|
||||
except Exception as e:
|
||||
LOG.warn('Error "%(error)s" while reading uri %(uri)s',
|
||||
LOG.warn('Error "%(error)s" retrieving uri %(uri)s',
|
||||
{'error': e, 'uri': uri})
|
||||
|
||||
|
||||
def read_json_from_uri(uri):
|
||||
try:
|
||||
return json.loads(read_uri(uri))
|
||||
return do_request(uri).json()
|
||||
except Exception as e:
|
||||
LOG.warn('Error "%(error)s" parsing json from uri %(uri)s',
|
||||
{'error': e, 'uri': uri})
|
||||
|
||||
|
||||
def gzip_decompress(content):
|
||||
def _gzip_decompress(content):
|
||||
if six.PY3:
|
||||
return gzip.decompress(content).decode('utf8')
|
||||
else:
|
||||
@ -148,6 +149,22 @@ def gzip_decompress(content):
|
||||
return gzip_fd.read()
|
||||
|
||||
|
||||
def read_gzip_from_uri(uri):
|
||||
try:
|
||||
return _gzip_decompress(do_request(uri).content)
|
||||
except Exception as e:
|
||||
LOG.warn('Error "%(error)s" retrieving uri %(uri)s',
|
||||
{'error': e, 'uri': uri})
|
||||
|
||||
|
||||
def get_uri_last_modified(uri):
|
||||
try:
|
||||
return do_request(uri, method='head').headers['last-modified']
|
||||
except Exception as e:
|
||||
LOG.warn('Error "%(error)s" retrieving uri %(uri)s',
|
||||
{'error': e, 'uri': uri})
|
||||
|
||||
|
||||
def cmp_to_key(mycmp): # ported from python 3
|
||||
"""Convert a cmp= function into a key= function."""
|
||||
class K(object):
|
||||
|
Loading…
x
Reference in New Issue
Block a user