diff --git a/etc/default_data.json b/etc/default_data.json index a54bc42a7..796f9a0ed 100644 --- a/etc/default_data.json +++ b/etc/default_data.json @@ -6767,6 +6767,8 @@ } ], "mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"], + "member_lists": ["http://www.openstack.org/community/members/profile/"], + "voting_date": "2014-Jan-01", "project_types": [ { "id": "all", diff --git a/etc/default_data.schema.json b/etc/default_data.schema.json index 03692f2ca..596b0e6a6 100644 --- a/etc/default_data.schema.json +++ b/etc/default_data.schema.json @@ -181,6 +181,15 @@ "type": "string" } }, + "member_lists": { + "type": "array", + "items": { + "type": "string" + } + }, + "voting_date": { + "type": "string" + }, "project_types": { "type": "array", "items": { diff --git a/etc/stackalytics.conf b/etc/stackalytics.conf index 73a628c3f..df382141b 100644 --- a/etc/stackalytics.conf +++ b/etc/stackalytics.conf @@ -17,6 +17,9 @@ # Port where dashboard listens on # listen_port = 8080 +# Number of days to update members +# days_to_update_members = 7 + # The address of file with corrections data # corrections_uri = https://raw.github.com/stackforge/stackalytics/master/etc/corrections.json diff --git a/etc/test_default_data.json b/etc/test_default_data.json index da5e2f445..2752dfcad 100644 --- a/etc/test_default_data.json +++ b/etc/test_default_data.json @@ -146,6 +146,8 @@ ], "mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"], + "member_lists": ["http://www.openstack.org/community/members/profile/"], + "voting_date": "2014-Jan-01", "project_types": [ { diff --git a/stackalytics/processor/config.py b/stackalytics/processor/config.py index 6a8273a0d..9f2eacfb5 100644 --- a/stackalytics/processor/config.py +++ b/stackalytics/processor/config.py @@ -28,6 +28,8 @@ OPTS = [ help='The address dashboard listens on'), cfg.IntOpt('listen-port', default=8080, help='The port dashboard listens on'), + cfg.IntOpt('days_to_update_members', default=7, + help='Number of days to update members'), cfg.StrOpt('corrections-uri', default=('https://raw.github.com/stackforge/stackalytics/' 'master/etc/corrections.json'), diff --git a/stackalytics/processor/default_data_processor.py b/stackalytics/processor/default_data_processor.py index 769bb420c..7e839b573 100644 --- a/stackalytics/processor/default_data_processor.py +++ b/stackalytics/processor/default_data_processor.py @@ -127,6 +127,11 @@ def _store_companies(runtime_storage_inst, companies): for company in companies: for domain in company['domains']: domains_index[domain] = company['company_name'] + + if 'aliases' in company: + for alias in company['aliases']: + domains_index[alias] = company['company_name'] + runtime_storage_inst.set_by_key('companies', domains_index) diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py index 8ae79c47e..5745f5065 100644 --- a/stackalytics/processor/main.py +++ b/stackalytics/processor/main.py @@ -26,6 +26,7 @@ from stackalytics.processor import config from stackalytics.processor import default_data_processor from stackalytics.processor import lp from stackalytics.processor import mls +from stackalytics.processor import mps from stackalytics.processor import rcs from stackalytics.processor import record_processor from stackalytics.processor import runtime_storage @@ -140,10 +141,24 @@ def process_mail_list(uri, runtime_storage_inst, record_processor_inst): runtime_storage_inst.set_records(processed_mail_iterator) -def update_records(runtime_storage_inst): +def process_member_list(uri, runtime_storage_inst, record_processor_inst): + member_iterator = mps.log(uri, runtime_storage_inst, + cfg.CONF.days_to_update_members) + member_iterator_typed = _record_typer(member_iterator, 'member') + processed_member_iterator = record_processor_inst.process( + member_iterator_typed) + runtime_storage_inst.set_records(processed_member_iterator) + + +def update_members(runtime_storage_inst, record_processor_inst): + member_lists = runtime_storage_inst.get_by_key('member_lists') or [] + for member_list in member_lists: + process_member_list(member_list, runtime_storage_inst, + record_processor_inst) + + +def update_records(runtime_storage_inst, record_processor_inst): repos = utils.load_repos(runtime_storage_inst) - record_processor_inst = record_processor.RecordProcessor( - runtime_storage_inst) for repo in repos: process_repo(repo, runtime_storage_inst, record_processor_inst) @@ -244,10 +259,16 @@ def main(): update_pids(runtime_storage_inst) - update_records(runtime_storage_inst) + record_processor_inst = record_processor.RecordProcessor( + runtime_storage_inst) + + update_records(runtime_storage_inst, record_processor_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) + # long operation should be the last + update_members(runtime_storage_inst, record_processor_inst) + if __name__ == '__main__': main() diff --git a/stackalytics/processor/mps.py b/stackalytics/processor/mps.py new file mode 100644 index 000000000..41cde1f3e --- /dev/null +++ b/stackalytics/processor/mps.py @@ -0,0 +1,110 @@ +# Copyright (c) 2013 Mirantis Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re +import time + +import six + +from stackalytics.openstack.common import log as logging +from stackalytics.processor import utils + + +LOG = logging.getLogger(__name__) + +NAME_AND_DATE_PATTERN = r'
+
contribute logic and evangelize openstack
+''' + + match = re.search(mps.NAME_AND_DATE_PATTERN, content) + self.assertTrue(match) + self.assertEqual('Jim Battenberg', match.group('member_name')) + self.assertEqual('June 25, 2013 ', match.group('date_joined')) + + match = re.search(mps.COMPANY_PATTERN, content) + self.assertTrue(match) + self.assertEqual('Rackspace', match.group('company_draft')) diff --git a/tests/unit/test_record_processor.py b/tests/unit/test_record_processor.py index e771128b1..019c68d59 100644 --- a/tests/unit/test_record_processor.py +++ b/tests/unit/test_record_processor.py @@ -536,6 +536,58 @@ class TestRecordProcessor(testtools.TestCase): self.assertEqual(user, utils.load_user( record_processor_inst.runtime_storage_inst, 'john_doe@gmail.com')) + def test_create_member(self): + member_record = {'member_id': '123456789', + 'member_name': 'John Doe', + 'member_uri': 'http://www.openstack.org/community' + '/members/profile/123456789', + 'date_joined': 'August 01, 2012 ', + 'company_draft': 'Mirantis'} + + record_processor_inst = self.make_record_processor() + result_member = record_processor_inst._process_member( + member_record).next() + + self.assertEqual(result_member['primary_key'], 'member:123456789') + self.assertEqual(result_member['date'], utils.member_date_to_timestamp( + 'August 01, 2012 ')) + self.assertEqual(result_member['author_name'], 'John Doe') + self.assertEqual(result_member['company_name'], 'Mirantis') + + result_user = utils.load_user( + record_processor_inst.runtime_storage_inst, 'member:123456789') + + self.assertEqual(result_user['user_name'], 'John Doe') + self.assertEqual(result_user['company_name'], 'Mirantis') + self.assertEqual(result_user['companies'], + [{'company_name': 'Mirantis', 'end_date': 0}]) + + def test_update_member(self): + member_record = {'member_id': '123456789', + 'member_name': 'John Doe', + 'member_uri': 'http://www.openstack.org/community' + '/members/profile/123456789', + 'date_joined': 'August 01, 2012 ', + 'company_draft': 'Mirantis'} + + record_processor_inst = self.make_record_processor() + + updated_member_record = member_record + updated_member_record['member_name'] = 'Bill Smith' + updated_member_record['company_draft'] = 'Rackspace' + + result_member = record_processor_inst._process_member( + updated_member_record).next() + self.assertEqual(result_member['author_name'], 'Bill Smith') + self.assertEqual(result_member['company_name'], 'Rackspace') + + result_user = utils.load_user( + record_processor_inst.runtime_storage_inst, 'member:123456789') + + self.assertEqual(result_user['user_name'], 'Bill Smith') + self.assertEqual(result_user['companies'], + [{'company_name': 'Rackspace', 'end_date': 0}]) + def test_process_email_then_review(self): # it is expected that the user profile will contain both email and # LP id