Backend for openstack.org members report
Implemented backend Change-Id: I5c2fbb51eeed3a70f22fa7bde2e77b492e2060a3
This commit is contained in:
parent
81c3df03d9
commit
ed515b4be9
@ -6767,6 +6767,8 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"],
|
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"],
|
||||||
|
"member_lists": ["http://www.openstack.org/community/members/profile/"],
|
||||||
|
"voting_date": "2014-Jan-01",
|
||||||
"project_types": [
|
"project_types": [
|
||||||
{
|
{
|
||||||
"id": "all",
|
"id": "all",
|
||||||
|
@ -181,6 +181,15 @@
|
|||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"member_lists": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"voting_date": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
"project_types": {
|
"project_types": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
|
@ -17,6 +17,9 @@
|
|||||||
# Port where dashboard listens on
|
# Port where dashboard listens on
|
||||||
# listen_port = 8080
|
# listen_port = 8080
|
||||||
|
|
||||||
|
# Number of days to update members
|
||||||
|
# days_to_update_members = 7
|
||||||
|
|
||||||
# The address of file with corrections data
|
# The address of file with corrections data
|
||||||
# corrections_uri = https://raw.github.com/stackforge/stackalytics/master/etc/corrections.json
|
# corrections_uri = https://raw.github.com/stackforge/stackalytics/master/etc/corrections.json
|
||||||
|
|
||||||
|
@ -146,6 +146,8 @@
|
|||||||
],
|
],
|
||||||
|
|
||||||
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"],
|
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"],
|
||||||
|
"member_lists": ["http://www.openstack.org/community/members/profile/"],
|
||||||
|
"voting_date": "2014-Jan-01",
|
||||||
|
|
||||||
"project_types": [
|
"project_types": [
|
||||||
{
|
{
|
||||||
|
@ -28,6 +28,8 @@ OPTS = [
|
|||||||
help='The address dashboard listens on'),
|
help='The address dashboard listens on'),
|
||||||
cfg.IntOpt('listen-port', default=8080,
|
cfg.IntOpt('listen-port', default=8080,
|
||||||
help='The port dashboard listens on'),
|
help='The port dashboard listens on'),
|
||||||
|
cfg.IntOpt('days_to_update_members', default=7,
|
||||||
|
help='Number of days to update members'),
|
||||||
cfg.StrOpt('corrections-uri',
|
cfg.StrOpt('corrections-uri',
|
||||||
default=('https://raw.github.com/stackforge/stackalytics/'
|
default=('https://raw.github.com/stackforge/stackalytics/'
|
||||||
'master/etc/corrections.json'),
|
'master/etc/corrections.json'),
|
||||||
|
@ -127,6 +127,11 @@ def _store_companies(runtime_storage_inst, companies):
|
|||||||
for company in companies:
|
for company in companies:
|
||||||
for domain in company['domains']:
|
for domain in company['domains']:
|
||||||
domains_index[domain] = company['company_name']
|
domains_index[domain] = company['company_name']
|
||||||
|
|
||||||
|
if 'aliases' in company:
|
||||||
|
for alias in company['aliases']:
|
||||||
|
domains_index[alias] = company['company_name']
|
||||||
|
|
||||||
runtime_storage_inst.set_by_key('companies', domains_index)
|
runtime_storage_inst.set_by_key('companies', domains_index)
|
||||||
|
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@ from stackalytics.processor import config
|
|||||||
from stackalytics.processor import default_data_processor
|
from stackalytics.processor import default_data_processor
|
||||||
from stackalytics.processor import lp
|
from stackalytics.processor import lp
|
||||||
from stackalytics.processor import mls
|
from stackalytics.processor import mls
|
||||||
|
from stackalytics.processor import mps
|
||||||
from stackalytics.processor import rcs
|
from stackalytics.processor import rcs
|
||||||
from stackalytics.processor import record_processor
|
from stackalytics.processor import record_processor
|
||||||
from stackalytics.processor import runtime_storage
|
from stackalytics.processor import runtime_storage
|
||||||
@ -140,10 +141,24 @@ def process_mail_list(uri, runtime_storage_inst, record_processor_inst):
|
|||||||
runtime_storage_inst.set_records(processed_mail_iterator)
|
runtime_storage_inst.set_records(processed_mail_iterator)
|
||||||
|
|
||||||
|
|
||||||
def update_records(runtime_storage_inst):
|
def process_member_list(uri, runtime_storage_inst, record_processor_inst):
|
||||||
|
member_iterator = mps.log(uri, runtime_storage_inst,
|
||||||
|
cfg.CONF.days_to_update_members)
|
||||||
|
member_iterator_typed = _record_typer(member_iterator, 'member')
|
||||||
|
processed_member_iterator = record_processor_inst.process(
|
||||||
|
member_iterator_typed)
|
||||||
|
runtime_storage_inst.set_records(processed_member_iterator)
|
||||||
|
|
||||||
|
|
||||||
|
def update_members(runtime_storage_inst, record_processor_inst):
|
||||||
|
member_lists = runtime_storage_inst.get_by_key('member_lists') or []
|
||||||
|
for member_list in member_lists:
|
||||||
|
process_member_list(member_list, runtime_storage_inst,
|
||||||
|
record_processor_inst)
|
||||||
|
|
||||||
|
|
||||||
|
def update_records(runtime_storage_inst, record_processor_inst):
|
||||||
repos = utils.load_repos(runtime_storage_inst)
|
repos = utils.load_repos(runtime_storage_inst)
|
||||||
record_processor_inst = record_processor.RecordProcessor(
|
|
||||||
runtime_storage_inst)
|
|
||||||
|
|
||||||
for repo in repos:
|
for repo in repos:
|
||||||
process_repo(repo, runtime_storage_inst, record_processor_inst)
|
process_repo(repo, runtime_storage_inst, record_processor_inst)
|
||||||
@ -244,10 +259,16 @@ def main():
|
|||||||
|
|
||||||
update_pids(runtime_storage_inst)
|
update_pids(runtime_storage_inst)
|
||||||
|
|
||||||
update_records(runtime_storage_inst)
|
record_processor_inst = record_processor.RecordProcessor(
|
||||||
|
runtime_storage_inst)
|
||||||
|
|
||||||
|
update_records(runtime_storage_inst, record_processor_inst)
|
||||||
|
|
||||||
apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst)
|
apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst)
|
||||||
|
|
||||||
|
# long operation should be the last
|
||||||
|
update_members(runtime_storage_inst, record_processor_inst)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
110
stackalytics/processor/mps.py
Normal file
110
stackalytics/processor/mps.py
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
# Copyright (c) 2013 Mirantis Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
# implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
import six
|
||||||
|
|
||||||
|
from stackalytics.openstack.common import log as logging
|
||||||
|
from stackalytics.processor import utils
|
||||||
|
|
||||||
|
|
||||||
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
NAME_AND_DATE_PATTERN = r'<h3>(?P<member_name>[^<]*)[\s\S]*?' \
|
||||||
|
r'<div class="span-7 last">(?P<date_joined>[^<]*)'
|
||||||
|
COMPANY_PATTERN = r'<strong>Date\sJoined[\s\S]*?<b>(?P<company_draft>[^<]*)' \
|
||||||
|
r'[\s\S]*?From\s(?P<date_from>[\s\S]*?)\(Current\)'
|
||||||
|
|
||||||
|
CNT_EMPTY_MEMBERS = 50
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_str_fields_to_unicode(result):
|
||||||
|
for field, value in result.iteritems():
|
||||||
|
if type(value) is str:
|
||||||
|
try:
|
||||||
|
value = six.text_type(value, 'utf8')
|
||||||
|
result[field] = value
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _retrieve_member(uri, member_id):
|
||||||
|
|
||||||
|
content = utils.read_uri(uri)
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
member = {}
|
||||||
|
|
||||||
|
for rec in re.finditer(NAME_AND_DATE_PATTERN, content):
|
||||||
|
result = rec.groupdict()
|
||||||
|
|
||||||
|
member['member_id'] = member_id
|
||||||
|
member['member_name'] = result['member_name']
|
||||||
|
member['date_joined'] = result['date_joined']
|
||||||
|
member['member_uri'] = uri
|
||||||
|
break
|
||||||
|
|
||||||
|
member['company_draft'] = '*independent'
|
||||||
|
for rec in re.finditer(COMPANY_PATTERN, content):
|
||||||
|
result = rec.groupdict()
|
||||||
|
|
||||||
|
member['company_draft'] = result['company_draft']
|
||||||
|
|
||||||
|
return member
|
||||||
|
|
||||||
|
|
||||||
|
def log(uri, runtime_storage_inst, days_to_update_members):
|
||||||
|
LOG.debug('Retrieving new openstack.org members')
|
||||||
|
|
||||||
|
last_update_members_date = runtime_storage_inst.get_by_key(
|
||||||
|
'last_update_members_date') or 0
|
||||||
|
last_member_index = runtime_storage_inst.get_by_key(
|
||||||
|
'last_member_index') or 0
|
||||||
|
|
||||||
|
end_update_date = int(time.time()) - days_to_update_members * 24 * 60 * 60
|
||||||
|
|
||||||
|
if last_update_members_date <= end_update_date:
|
||||||
|
last_member_index = 0
|
||||||
|
last_update_members_date = int(time.time())
|
||||||
|
|
||||||
|
runtime_storage_inst.set_by_key('last_update_members_date',
|
||||||
|
last_update_members_date)
|
||||||
|
|
||||||
|
cnt_empty = 0
|
||||||
|
cur_index = last_member_index + 1
|
||||||
|
|
||||||
|
while cnt_empty < CNT_EMPTY_MEMBERS:
|
||||||
|
|
||||||
|
profile_uri = uri + str(cur_index)
|
||||||
|
member = _retrieve_member(profile_uri, str(cur_index))
|
||||||
|
|
||||||
|
if 'member_name' not in member:
|
||||||
|
cnt_empty += 1
|
||||||
|
cur_index += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
_convert_str_fields_to_unicode(member)
|
||||||
|
|
||||||
|
cnt_empty = 0
|
||||||
|
last_member_index = cur_index
|
||||||
|
cur_index += 1
|
||||||
|
LOG.debug('New member: %s', member['member_id'])
|
||||||
|
yield member
|
||||||
|
|
||||||
|
LOG.debug('Last_member_index: %s', last_member_index)
|
||||||
|
runtime_storage_inst.set_by_key('last_member_index', last_member_index)
|
@ -397,6 +397,38 @@ class RecordProcessor(object):
|
|||||||
|
|
||||||
yield bpc
|
yield bpc
|
||||||
|
|
||||||
|
def _process_member(self, record):
|
||||||
|
user_id = "member:" + record['member_id']
|
||||||
|
record['primary_key'] = user_id
|
||||||
|
record['date'] = utils.member_date_to_timestamp(record['date_joined'])
|
||||||
|
record['author_name'] = record['member_name']
|
||||||
|
record['module'] = 'unknown'
|
||||||
|
company_draft = record['company_draft']
|
||||||
|
|
||||||
|
company_name = self.domains_index.get(company_draft) or company_draft
|
||||||
|
|
||||||
|
# author_email is a key to create new user
|
||||||
|
record['author_email'] = user_id
|
||||||
|
record['company_name'] = company_name
|
||||||
|
# _update_record_and_user function will create new user if needed
|
||||||
|
self._update_record_and_user(record)
|
||||||
|
record['company_name'] = company_name
|
||||||
|
user = utils.load_user(self.runtime_storage_inst, user_id)
|
||||||
|
del record['author_email']
|
||||||
|
|
||||||
|
user['user_name'] = record['author_name']
|
||||||
|
user['companies'] = [{
|
||||||
|
'company_name': company_name,
|
||||||
|
'end_date': 0,
|
||||||
|
}]
|
||||||
|
user['company_name'] = company_name
|
||||||
|
|
||||||
|
utils.store_user(self.runtime_storage_inst, user)
|
||||||
|
|
||||||
|
record['company_name'] = company_name
|
||||||
|
|
||||||
|
yield record
|
||||||
|
|
||||||
def _apply_type_based_processing(self, record):
|
def _apply_type_based_processing(self, record):
|
||||||
if record['record_type'] == 'commit':
|
if record['record_type'] == 'commit':
|
||||||
for r in self._process_commit(record):
|
for r in self._process_commit(record):
|
||||||
@ -410,6 +442,9 @@ class RecordProcessor(object):
|
|||||||
elif record['record_type'] == 'bp':
|
elif record['record_type'] == 'bp':
|
||||||
for r in self._process_blueprint(record):
|
for r in self._process_blueprint(record):
|
||||||
yield r
|
yield r
|
||||||
|
elif record['record_type'] == 'member':
|
||||||
|
for r in self._process_member(record):
|
||||||
|
yield r
|
||||||
|
|
||||||
def _renew_record_date(self, record):
|
def _renew_record_date(self, record):
|
||||||
record['week'] = utils.timestamp_to_week(record['date'])
|
record['week'] = utils.timestamp_to_week(record['date'])
|
||||||
|
@ -46,6 +46,13 @@ def date_to_timestamp_ext(d):
|
|||||||
return int(d)
|
return int(d)
|
||||||
|
|
||||||
|
|
||||||
|
def member_date_to_timestamp(d):
|
||||||
|
if not d:
|
||||||
|
return 0
|
||||||
|
return int(time.mktime(
|
||||||
|
datetime.datetime.strptime(d, '%B %d, %Y ').timetuple()))
|
||||||
|
|
||||||
|
|
||||||
def iso8601_to_timestamp(s):
|
def iso8601_to_timestamp(s):
|
||||||
return int(time.mktime(iso8601.parse_date(s).timetuple()))
|
return int(time.mktime(iso8601.parse_date(s).timetuple()))
|
||||||
|
|
||||||
|
60
tests/unit/test_mps.py
Normal file
60
tests/unit/test_mps.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# Copyright (c) 2013 Mirantis Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
# implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
import testtools
|
||||||
|
|
||||||
|
from stackalytics.processor import mps
|
||||||
|
|
||||||
|
|
||||||
|
class TestMps(testtools.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
super(TestMps, self).setUp()
|
||||||
|
|
||||||
|
def test_member_parse_regex(self):
|
||||||
|
|
||||||
|
content = '''<h1>Individual Member Profile</h1>
|
||||||
|
<div class="candidate span-14">
|
||||||
|
<div class="span-4">
|
||||||
|
<img src="/themes/openstack/images/generic-profile-photo.png"><p> </p>
|
||||||
|
</div>
|
||||||
|
<a name="profile-10501"></a>
|
||||||
|
<div class="details span-10 last">
|
||||||
|
<div class="last name-and-title">
|
||||||
|
<h3>Jim Battenberg</h3>
|
||||||
|
</div>
|
||||||
|
<hr><div class="span-3"><strong>Date Joined</strong></div>
|
||||||
|
<div class="span-7 last">June 25, 2013 <br><br></div>
|
||||||
|
<div class="span-3"><strong>Affiliations</strong></div>
|
||||||
|
<div class="span-7 last">
|
||||||
|
<div>
|
||||||
|
<b>Rackspace</b> From (Current)
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="span-3"><strong>Statement of Interest </strong></div>
|
||||||
|
<div class="span-7 last">
|
||||||
|
<p>contribute logic and evangelize openstack</p>
|
||||||
|
</div>
|
||||||
|
<p> </p>'''
|
||||||
|
|
||||||
|
match = re.search(mps.NAME_AND_DATE_PATTERN, content)
|
||||||
|
self.assertTrue(match)
|
||||||
|
self.assertEqual('Jim Battenberg', match.group('member_name'))
|
||||||
|
self.assertEqual('June 25, 2013 ', match.group('date_joined'))
|
||||||
|
|
||||||
|
match = re.search(mps.COMPANY_PATTERN, content)
|
||||||
|
self.assertTrue(match)
|
||||||
|
self.assertEqual('Rackspace', match.group('company_draft'))
|
@ -536,6 +536,58 @@ class TestRecordProcessor(testtools.TestCase):
|
|||||||
self.assertEqual(user, utils.load_user(
|
self.assertEqual(user, utils.load_user(
|
||||||
record_processor_inst.runtime_storage_inst, 'john_doe@gmail.com'))
|
record_processor_inst.runtime_storage_inst, 'john_doe@gmail.com'))
|
||||||
|
|
||||||
|
def test_create_member(self):
|
||||||
|
member_record = {'member_id': '123456789',
|
||||||
|
'member_name': 'John Doe',
|
||||||
|
'member_uri': 'http://www.openstack.org/community'
|
||||||
|
'/members/profile/123456789',
|
||||||
|
'date_joined': 'August 01, 2012 ',
|
||||||
|
'company_draft': 'Mirantis'}
|
||||||
|
|
||||||
|
record_processor_inst = self.make_record_processor()
|
||||||
|
result_member = record_processor_inst._process_member(
|
||||||
|
member_record).next()
|
||||||
|
|
||||||
|
self.assertEqual(result_member['primary_key'], 'member:123456789')
|
||||||
|
self.assertEqual(result_member['date'], utils.member_date_to_timestamp(
|
||||||
|
'August 01, 2012 '))
|
||||||
|
self.assertEqual(result_member['author_name'], 'John Doe')
|
||||||
|
self.assertEqual(result_member['company_name'], 'Mirantis')
|
||||||
|
|
||||||
|
result_user = utils.load_user(
|
||||||
|
record_processor_inst.runtime_storage_inst, 'member:123456789')
|
||||||
|
|
||||||
|
self.assertEqual(result_user['user_name'], 'John Doe')
|
||||||
|
self.assertEqual(result_user['company_name'], 'Mirantis')
|
||||||
|
self.assertEqual(result_user['companies'],
|
||||||
|
[{'company_name': 'Mirantis', 'end_date': 0}])
|
||||||
|
|
||||||
|
def test_update_member(self):
|
||||||
|
member_record = {'member_id': '123456789',
|
||||||
|
'member_name': 'John Doe',
|
||||||
|
'member_uri': 'http://www.openstack.org/community'
|
||||||
|
'/members/profile/123456789',
|
||||||
|
'date_joined': 'August 01, 2012 ',
|
||||||
|
'company_draft': 'Mirantis'}
|
||||||
|
|
||||||
|
record_processor_inst = self.make_record_processor()
|
||||||
|
|
||||||
|
updated_member_record = member_record
|
||||||
|
updated_member_record['member_name'] = 'Bill Smith'
|
||||||
|
updated_member_record['company_draft'] = 'Rackspace'
|
||||||
|
|
||||||
|
result_member = record_processor_inst._process_member(
|
||||||
|
updated_member_record).next()
|
||||||
|
self.assertEqual(result_member['author_name'], 'Bill Smith')
|
||||||
|
self.assertEqual(result_member['company_name'], 'Rackspace')
|
||||||
|
|
||||||
|
result_user = utils.load_user(
|
||||||
|
record_processor_inst.runtime_storage_inst, 'member:123456789')
|
||||||
|
|
||||||
|
self.assertEqual(result_user['user_name'], 'Bill Smith')
|
||||||
|
self.assertEqual(result_user['companies'],
|
||||||
|
[{'company_name': 'Rackspace', 'end_date': 0}])
|
||||||
|
|
||||||
def test_process_email_then_review(self):
|
def test_process_email_then_review(self):
|
||||||
# it is expected that the user profile will contain both email and
|
# it is expected that the user profile will contain both email and
|
||||||
# LP id
|
# LP id
|
||||||
|
Loading…
x
Reference in New Issue
Block a user