Rewrite maintainers.py functionality
Base it on maintainer-only votes cast within the requisite timeframe rather than attempting to parse Gerrit ACLs and query groups. Change-Id: I982cb2e422f267b2834b4b20b11f1fd011516548
This commit is contained in:
parent
3242fadc59
commit
fed34c981b
@ -1,6 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright (c) 2015 OpenStack Foundation
|
||||
# Copyright OpenDev Contributors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -8,154 +6,169 @@
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an "AS
|
||||
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
||||
# express or implied. See the License for the specific language
|
||||
# governing permissions and limitations under the License.
|
||||
|
||||
# Description: When run using OpenStack's Gerrit server, this builds
|
||||
# JSON and YAML representations of repos with information on the
|
||||
# official owning project team if any, deliverable tags, and groups
|
||||
# with approve rights listing the members of each along with their
|
||||
# Gerrit preferred E-mail addresses and usernames when available.
|
||||
|
||||
# Rationale: It was done as a demonstration to a representative of a
|
||||
# foundation member company who requested a list of the "core
|
||||
# reviewers" for official projects, optionally broken down by
|
||||
# integrated vs. other. I'm attempting to show that this data is
|
||||
# already publicly available and can be extracted/analyzed by anyone
|
||||
# without needing to request it.
|
||||
|
||||
# Use: This needs your Gerrit username passed as the command-line
|
||||
# parameter, found at https://review.opendev.org/#/settings/ when
|
||||
# authenticated in the WebUI. It also prompts for an HTTP password
|
||||
# which https://review.opendev.org/#/settings/http-password will
|
||||
# allow you to generate. The results end up in files named
|
||||
# approvers.json and approvers.yaml. At the time of writing, it
|
||||
# takes approximately 6.5 minutes to run on a well-connected machine
|
||||
# with 70-80ms round-trip latency to review.opendev.org.
|
||||
|
||||
# Example:
|
||||
#
|
||||
# $ virtualenv approvers
|
||||
# [...]
|
||||
# $ ./approvers/bin/pip install pyyaml requests
|
||||
# [...]
|
||||
# $ ./approvers/bin/python tools/who-approves.py fungi
|
||||
# Password:
|
||||
# [wait for completion]
|
||||
# $ ./approvers/bin/python
|
||||
# >>> import yaml
|
||||
# >>>
|
||||
# >>> def get_approvers(repos):
|
||||
# ... approvers = set()
|
||||
# ... for repo in repos:
|
||||
# ... for group in repos[repo]['approvers']:
|
||||
# ... for approver in repos[repo]['approvers'][group]:
|
||||
# ... approvers.add(approver)
|
||||
# ... return(approvers)
|
||||
# ...
|
||||
# >>> p = yaml.safe_load(open('approvers.yaml'))
|
||||
# >>> print('Total repos: %s' % len(p))
|
||||
# Total repos: 751
|
||||
# >>> print('Total approvers: %s' % len(get_approvers(p)))
|
||||
# Total approvers: 849
|
||||
# >>>
|
||||
# >>> o = {k: v for k, v in p.iteritems() if 'team' in v}
|
||||
# >>> print('Repos for official teams: %s' % len(o))
|
||||
# Repos for official teams: 380
|
||||
# >>> print('OpenStack repo approvers: %s' % len(get_approvers(o)))
|
||||
# OpenStack repo approvers: 456
|
||||
# >>>
|
||||
# >>> i = {k: v for k, v in p.iteritems() if 'tags' in v
|
||||
# ... and 'release:managed' in v['tags']}
|
||||
# >>> print('Repos under release management: %s' % len(i))
|
||||
# Repos under release management: 77
|
||||
# >>> print('Managed release repo approvers: %s' % len(get_approvers(i)))
|
||||
# Managed release repo approvers: 245
|
||||
|
||||
import getpass
|
||||
import json
|
||||
import datetime
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
import requests
|
||||
from engagement.stats import (
|
||||
from_gerrit_time,
|
||||
get_projects,
|
||||
query_gerrit,
|
||||
report_times,
|
||||
to_gerrit_time,
|
||||
)
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def main():
|
||||
gerrit_url = 'https://review.opendev.org/'
|
||||
try:
|
||||
gerrit_auth = requests.auth.HTTPDigestAuth(
|
||||
sys.argv[1], getpass.getpass())
|
||||
except IndexError:
|
||||
sys.stderr.write("Usage: %s USERNAME\n" % sys.argv[0])
|
||||
sys.exit(1)
|
||||
acl_path = (
|
||||
'gitweb?p=%s.git;a=blob_plain;f=project.config;hb=refs/meta/config')
|
||||
group_path = 'a/groups/%s/members/?recursive&pp=0'
|
||||
projects_file = ('gitweb?p=openstack/governance.git;a=blob_plain;'
|
||||
'f=reference/projects.yaml;hb=%s')
|
||||
ref_name = 'refs/heads/master'
|
||||
aprv_pattern = r'label-Workflow = .*\.\.\+1 group (.*)'
|
||||
projects = requests.get(gerrit_url + projects_file % ref_name)
|
||||
projects.encoding = 'utf-8' # Workaround for Gitweb encoding
|
||||
projects = yaml.safe_load(projects.text)
|
||||
repos_dump = json.loads(requests.get(
|
||||
gerrit_url + 'projects/?pp=0').text[4:])
|
||||
all_groups = json.loads(requests.get(gerrit_url + 'a/groups/',
|
||||
auth=gerrit_auth).text[4:])
|
||||
repos = {}
|
||||
aprv_groups = {}
|
||||
for repo in repos_dump:
|
||||
repos[repo.encode('utf-8')] = {'approvers': {}}
|
||||
acl_ini = requests.get(gerrit_url + acl_path % repo).text
|
||||
for aprv_group in [str(x) for x in re.findall(aprv_pattern, acl_ini)]:
|
||||
if aprv_group not in repos[repo]['approvers']:
|
||||
repos[repo]['approvers'][aprv_group] = []
|
||||
if aprv_group not in aprv_groups:
|
||||
aprv_groups[aprv_group] = []
|
||||
for team in projects:
|
||||
if 'deliverables' in projects[team]:
|
||||
for deli in projects[team]['deliverables']:
|
||||
if 'repos' in projects[team]['deliverables'][deli]:
|
||||
drepos = projects[team]['deliverables'][deli]['repos']
|
||||
for repo in drepos:
|
||||
if repo in repos:
|
||||
repos[repo]['team'] = team
|
||||
if 'tags' in projects[team]['deliverables'][deli]:
|
||||
repos[repo]['tags'] = projects[
|
||||
team]['deliverables'][deli]['tags']
|
||||
for aprv_group in aprv_groups.keys():
|
||||
# It's possible for built-in metagroups in recent Gerrit releases to
|
||||
# appear in ACLs but not in the groups list
|
||||
if aprv_group in all_groups:
|
||||
aprv_groups[aprv_group] = json.loads(requests.get(
|
||||
gerrit_url + group_path % all_groups[aprv_group]['id'],
|
||||
auth=gerrit_auth).text[4:])
|
||||
else:
|
||||
sys.stderr.write('Ignoring nonexistent "%s" group.\n' % aprv_group)
|
||||
for repo in repos:
|
||||
for aprv_group in repos[repo]['approvers'].keys():
|
||||
for approver in aprv_groups[aprv_group]:
|
||||
if 'name' in approver:
|
||||
approver_details = '"%s"' % approver['name']
|
||||
else:
|
||||
approver_details = ''
|
||||
if 'email' in approver:
|
||||
if approver_details:
|
||||
approver_details += ' '
|
||||
approver_details += '<%s>' % approver['email']
|
||||
if 'username' in approver:
|
||||
if approver_details:
|
||||
approver_details += ' '
|
||||
approver_details += '(%s)' % approver['username']
|
||||
repos[repo]['approvers'][aprv_group].append(
|
||||
approver_details.encode('utf-8'))
|
||||
approvers_yaml = open('approvers.yaml', 'w')
|
||||
yaml.dump(repos, approvers_yaml, allow_unicode=True, encoding='utf-8',
|
||||
default_flow_style=False)
|
||||
approvers_json = open('approvers.json', 'w')
|
||||
json.dump(repos, approvers_json, indent=2)
|
||||
def usage_error():
|
||||
"""Write a generic usage message to stderr and exit nonzero"""
|
||||
|
||||
sys.stderr.write(
|
||||
'ERROR: specify report period like YEAR, YEAR-H[1-2], YEAR-Q[1-4],\n'
|
||||
' YEAR-[01-12], or YYYY-MM-DD..YYYY-MM-DD for a date range\n'
|
||||
' (start date is inclusive, end date is exclusive)\n')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def parse_report_period(when):
|
||||
"""Parse a supplied report period string, returning a tuple of
|
||||
after and before datetime objects"""
|
||||
|
||||
daterange = re.compile(
|
||||
r'^(\d{4})-(\d{2})-(\d{2})\.\.(\d{4})-(\d{2})-(\d{2})$')
|
||||
monthly = re.compile(r'^(\d{4})-(\d{2})$')
|
||||
quarterly = re.compile(r'^(\d{4})-q([1-4])$', re.IGNORECASE)
|
||||
halfyearly = re.compile(r'^(\d{4})-h([1-4])$', re.IGNORECASE)
|
||||
yearly = re.compile(r'^\d{4}$')
|
||||
# TODO: merge this functionality into engagement.stats.parse_report_period
|
||||
if daterange.match(when):
|
||||
after = datetime.datetime(
|
||||
int(daterange.match(when).group(1)),
|
||||
int(daterange.match(when).group(2)),
|
||||
int(daterange.match(when).group(3)))
|
||||
before = datetime.datetime(
|
||||
int(daterange.match(when).group(4)),
|
||||
int(daterange.match(when).group(5)),
|
||||
int(daterange.match(when).group(6)))
|
||||
return after, before
|
||||
if monthly.match(when):
|
||||
start_year = int(monthly.match(when).group(1))
|
||||
start_month = int(monthly.match(when).group(2))
|
||||
end_year = start_year + start_month // 12
|
||||
end_month = 1 + start_month % 12
|
||||
elif quarterly.match(when):
|
||||
start_year = int(quarterly.match(when).group(1))
|
||||
start_month = 1 + 3 * (int(quarterly.match(when).group(2)) - 1)
|
||||
end_year = start_year + (start_month + 2) // 12
|
||||
end_month = 1 + (start_month + 2) % 12
|
||||
elif halfyearly.match(when):
|
||||
start_year = int(halfyearly.match(when).group(1))
|
||||
start_month = 1 + 6 * (int(halfyearly.match(when).group(2)) - 1)
|
||||
end_year = start_year + (start_month + 5) // 12
|
||||
end_month = 1 + (start_month + 5) % 12
|
||||
elif yearly.match(when):
|
||||
start_year = int(yearly.match(when).group())
|
||||
start_month = 1
|
||||
end_year = start_year + 1
|
||||
end_month = 1
|
||||
else:
|
||||
usage_error()
|
||||
after = datetime.datetime(start_year, start_month, 1)
|
||||
before = datetime.datetime(end_year, end_month, 1)
|
||||
return after, before
|
||||
|
||||
|
||||
def parse_command_line():
|
||||
"""Parse the command line to obtain the report period, then return it"""
|
||||
|
||||
if len(sys.argv) == 2:
|
||||
return sys.argv[1]
|
||||
else:
|
||||
usage_error()
|
||||
|
||||
|
||||
def main(verbose=0):
|
||||
"""Utility entry point"""
|
||||
|
||||
argument = parse_command_line()
|
||||
after, before = parse_report_period(argument)
|
||||
changes = dict()
|
||||
|
||||
# TODO: deduplicate this and the similar version in stats.main
|
||||
# Shard querying by project, to help with the inherent instability of
|
||||
# result pagination from the Gerrit API
|
||||
for project in get_projects(verbose=verbose):
|
||||
if verbose >= 1:
|
||||
print("Checking project: %s" % project)
|
||||
offset = 0
|
||||
# Loop due to unavoidable query result pagination
|
||||
while offset >= 0:
|
||||
# We only constrain the query by the after date, as changes created
|
||||
# between the before and after date may have been updated more
|
||||
# recently with a new revision or comment
|
||||
new_changes = query_gerrit("changes/", params={
|
||||
"q": "project:%s after:{%s}" % (
|
||||
project, to_gerrit_time(after)),
|
||||
"no-limit": "1",
|
||||
"start": offset,
|
||||
"o": ["DETAILED_ACCOUNTS", "DETAILED_LABELS", "SKIP_DIFFSTAT"],
|
||||
}, verbose=verbose)
|
||||
# Since we redundantly query ranges with offsets to help combat
|
||||
# pagination instability, we must deduplicate results
|
||||
for change in new_changes:
|
||||
if change["id"] not in changes:
|
||||
changes[change["id"]] = change
|
||||
# Offset additional pages by half the returned entry count to help
|
||||
# avoid missing changes due to pagination instability
|
||||
if new_changes and new_changes[-1].get("_more_changes", False):
|
||||
offset += int(len(new_changes) / 2)
|
||||
else:
|
||||
offset = -1
|
||||
|
||||
report = {"namespaces": dict()}
|
||||
report_times(report, after, before)
|
||||
maintainers = dict()
|
||||
for change in changes.values():
|
||||
namespace = change["project"].split("/")[0]
|
||||
if namespace not in report["namespaces"]:
|
||||
report["namespaces"][namespace] = set()
|
||||
if "labels" in change:
|
||||
for label, maintvotes in {
|
||||
"Code-Review": (-2, 2), "Workflow": (1,)}.items():
|
||||
if label in change["labels"]:
|
||||
for vote in change["labels"][label].get("all", []):
|
||||
when = vote.get("date")
|
||||
if ("name" in vote and "email" in vote
|
||||
and vote.get("value", 0) in maintvotes and when
|
||||
and after < from_gerrit_time(when) < before):
|
||||
if namespace not in maintainers:
|
||||
maintainers[namespace] = set()
|
||||
maintainers[namespace].add('"%s" <%s>' % (
|
||||
vote["name"], vote["email"]))
|
||||
for namespace in maintainers:
|
||||
report["namespaces"][namespace] = sorted(list(maintainers[namespace]))
|
||||
|
||||
# Operate on a copy of the keys since we'll be altering the dict
|
||||
for namespace in list(report["namespaces"].keys()):
|
||||
# Cull inactive namespaces from the report
|
||||
if not report["namespaces"][namespace]:
|
||||
del report["namespaces"][namespace]
|
||||
|
||||
# Write the full YAML structured data report
|
||||
os.makedirs("maintainers", exist_ok=True)
|
||||
open("maintainers/%s.yaml" % argument, "w").write(yaml.dump(report))
|
||||
|
||||
# Write per-namespace text dumps of names/addresses
|
||||
for namespace, maintlist in list(report["namespaces"].items()):
|
||||
with open("maintainers/%s_%s.txt" % (
|
||||
argument, namespace), "w", encoding="utf-8") as dumpfile:
|
||||
for maintainer in maintlist:
|
||||
dumpfile.write(maintainer + "\n")
|
||||
|
Loading…
x
Reference in New Issue
Block a user