# Copyright OpenDev Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an "AS # IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the License for the specific language # governing permissions and limitations under the License. import argparse import copy import datetime import getpass import math # Install opensearch-py from opensearchpy import OpenSearch # Base query parameters to get human authored changes BASE_CHANGES_QUERY = { "query": { "bool": { # Should is logical OR "should": [ ], # Must is logical AND "must": [ ], # NOT(A OR B) "must_not": [ ], "filter": [ { "term": { "type": "changeset" } }, { "term": { "author_bot": False } }, ] } }, } CLOSED_STATUS = [ { "term": { "status": "MERGED" } }, { "term": { "status": "ABANDONED" } }, ] MERGED_STATUS = [ { "term": { "status": "MERGED" } }, ] def set_date_range(start_date, end_date): date_range = { "range": { "grimoire_creation_date": { "gte": start_date, "lte": end_date, "format": "strict_date_optional_time" } } } return date_range def calculate_review_efficiency(client, project_name, start_date, end_date): date_range = set_date_range(start_date, end_date) project_term = { "term": { "project": project_name } } closed_query = copy.deepcopy(BASE_CHANGES_QUERY) closed_query["query"]["bool"]["should"] = CLOSED_STATUS closed_query["query"]["bool"]["filter"].append(date_range) closed_query["query"]["bool"]["must"].append(project_term) closed_query["track_total_hits"] = True closed_query["size"] = 0 open_query = copy.deepcopy(BASE_CHANGES_QUERY) open_query["query"]["bool"]["must_not"] = CLOSED_STATUS open_query["query"]["bool"]["filter"].append(date_range) open_query["query"]["bool"]["must"].append(project_term) open_query["track_total_hits"] = True open_query["size"] = 0 r = client.search(index='openstack_gerrit', body=closed_query) closed_total = r['hits']['total']['value'] r = client.search(index='openstack_gerrit', body=open_query) open_total = r['hits']['total']['value'] rei = float(closed_total / open_total) return rei, None def calculate_merge_time(client, project_name, start_date, end_date): date_range = set_date_range(start_date, end_date) project_term = { "term": { "project": project_name } } closed_query = copy.deepcopy(BASE_CHANGES_QUERY) closed_query["query"]["bool"]["should"] = MERGED_STATUS closed_query["query"]["bool"]["filter"].append(date_range) closed_query["query"]["bool"]["must"].append(project_term) closed_query["size"] = 100 times_to_merge = [] r = client.search(index='openstack_gerrit', scroll='1m', body=closed_query) hits = r['hits']['hits'] while hits: for hit in hits: open_date = datetime.datetime.fromisoformat( hit["_source"]["grimoire_creation_date"]) close_date = datetime.datetime.fromisoformat( hit["_source"]["last_updated"]) times_to_merge.append(close_date - open_date) # Beware normal pagination. Results are not consistent. # Use scroll instead r = client.scroll(scroll_id=r["_scroll_id"], scroll="1m") hits = r['hits']['hits'] times_sorted = sorted(times_to_merge) middle = math.floor(len(times_to_merge) / 2) median = times_sorted[middle] average_seconds = \ sum(map(lambda x: x.total_seconds(), times_sorted)) / len(times_sorted) average = datetime.timedelta(seconds=average_seconds) return median, average def calculate_time_to_review(client, project_name, start_date, end_date): date_range = set_date_range(start_date, end_date) project_term = { "term": { "project": project_name } } closed_query = copy.deepcopy(BASE_CHANGES_QUERY) closed_query["query"]["bool"]["should"] = MERGED_STATUS closed_query["query"]["bool"]["filter"].append(date_range) closed_query["query"]["bool"]["must"].append(project_term) closed_query["size"] = 100 times_to_review = [] r = client.search(index='openstack_gerrit', scroll='1m', body=closed_query) hits = r['hits']['hits'] while hits: for hit in hits: # Note time_to_first_review appears to be storing a float count # of the number of days to the first review. This is an odd # way to store the value so I'm documenting it here. time_to_first_review = hit["_source"]["time_to_first_review"] if time_to_first_review: # We can apparently get None values back. Ignore them. times_to_review.append(time_to_first_review) # Beware normal pagination. Results are not consistent. # Use scroll instead r = client.scroll(scroll_id=r["_scroll_id"], scroll="1m") hits = r['hits']['hits'] times_sorted = sorted(times_to_review) middle = math.floor(len(times_to_review) / 2) median = datetime.timedelta(days=times_sorted[middle]) average = datetime.timedelta(days=sum(times_sorted) / len(times_sorted)) return median, average def calculate_patchset_per_review(client, project_name, start_date, end_date): date_range = set_date_range(start_date, end_date) project_term = { "term": { "project": project_name } } closed_query = copy.deepcopy(BASE_CHANGES_QUERY) closed_query["query"]["bool"]["should"] = CLOSED_STATUS closed_query["query"]["bool"]["filter"].append(date_range) closed_query["query"]["bool"]["must"].append(project_term) closed_query["size"] = 100 patchsets_list = [] r = client.search(index='openstack_gerrit', scroll='1m', body=closed_query) hits = r['hits']['hits'] while hits: for hit in hits: patchsets = hit["_source"]["patchsets"] patchsets_list.append(patchsets) # Beware normal pagination. Results are not consistent. # Use scroll instead r = client.scroll(scroll_id=r["_scroll_id"], scroll="1m") hits = r['hits']['hits'] patchsets_sorted = sorted(patchsets_list) middle = math.floor(len(patchsets_list) / 2) median = patchsets_list[middle] average = sum(patchsets_sorted) / len(patchsets_sorted) return median, average QUERIES = { "rei": calculate_review_efficiency, "time-to-merge": calculate_merge_time, "time-to-review": calculate_time_to_review, "patchset-per-review": calculate_patchset_per_review, } def gather_metrics(client, args): if args.csv: print("metric,project,starttime,endtime,value") projects = [p for p in args.project.split(',') if p] if args.query == "ALL": queries = QUERIES.items() else: queries = [(args.query, QUERIES[args.query])] for query, func in queries: for project in projects: median, average = func( client, project, args.start_date, args.end_date) if args.csv: if isinstance(median, datetime.timedelta): median = median.total_seconds() if isinstance(average, datetime.timedelta): average = average.total_seconds() if not average: # Some values are singletons overload use of median print("%s,%s,%s,%s,%s" % (query, project, args.start_date, args.end_date, median)) else: print("median-%s,%s,%s,%s,%s" % (query, project, args.start_date, args.end_date, median)) print("average-%s,%s,%s,%s,%s" % (query, project, args.start_date, args.end_date, average)) else: if not average: # Some values are singletons overload use of median print("%s %s %s to %s: %s" % (project, query, args.start_date, args.end_date, median)) else: print("%s median %s %s to %s: %s" % (project, query, args.start_date, args.end_date, median)) print("%s average %s %s to %s: %s" % (project, query, args.start_date, args.end_date, average)) def main(): parser = argparse.ArgumentParser() parser.add_argument("--host", help="Bitergia opensearch host to connect to", default="openstack.biterg.io") parser.add_argument("--port", help="Bitergia opensearch port to connect to", type=int, default=443) parser.add_argument("--url-prefix", help="Bitergia opensearch url prefix to connect to", default='data') parser.add_argument("--user", help="Bitergia opensearch username", required=True) parser.add_argument("--query", help="Metric to query", default='ALL') parser.add_argument("--csv", help="Emit csv output", action="store_true") parser.add_argument("project", help="Project to filter results for. This must " "match bitergia's idea of a project name. May " "be a comma separated list.") parser.add_argument("start_date", help="Start date for results. " "eg 2025-01-01T00:00:00.000Z") parser.add_argument("end_date", help="End date for results. " "eg 2025-01-01T00:00:00.000Z") args = parser.parse_args() host = args.host port = args.port url_prefix = args.url_prefix passwd = getpass.getpass('Password: ') auth = (args.user, passwd) # indexes appear to be openstack_git, openstack_gerrit, openstack_mbox client = OpenSearch( hosts=[{'host': host, 'port': port}], url_prefix=url_prefix, http_compress=True, # enables gzip compression for request bodies http_auth=auth, use_ssl=True, verify_certs=True, ) gather_metrics(client, args) if __name__ == "__main__": main()