
Running this command for each different project and metric gets old particularly when you have to input the password each time. Update the script to collect all metrics for a list of projects. Then to make that more useful add support for csv output. Change-Id: Id5ee94e046e11813387ad0d3ae4a9a2e8490062d
328 lines
11 KiB
Python
328 lines
11 KiB
Python
# Copyright OpenDev Contributors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an "AS
|
|
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
|
# express or implied. See the License for the specific language
|
|
# governing permissions and limitations under the License.
|
|
|
|
import argparse
|
|
import copy
|
|
import datetime
|
|
import getpass
|
|
import math
|
|
|
|
# Install opensearch-py
|
|
from opensearchpy import OpenSearch
|
|
|
|
|
|
# Base query parameters to get human authored changes
|
|
BASE_CHANGES_QUERY = {
|
|
"query": {
|
|
"bool": {
|
|
# Should is logical OR
|
|
"should": [
|
|
],
|
|
# Must is logical AND
|
|
"must": [
|
|
],
|
|
# NOT(A OR B)
|
|
"must_not": [
|
|
],
|
|
"filter": [
|
|
{
|
|
"term": {
|
|
"type": "changeset"
|
|
}
|
|
},
|
|
{
|
|
"term": {
|
|
"author_bot": False
|
|
}
|
|
},
|
|
]
|
|
}
|
|
},
|
|
}
|
|
|
|
CLOSED_STATUS = [
|
|
{
|
|
"term": {
|
|
"status": "MERGED"
|
|
}
|
|
},
|
|
{
|
|
"term": {
|
|
"status": "ABANDONED"
|
|
}
|
|
},
|
|
]
|
|
|
|
MERGED_STATUS = [
|
|
{
|
|
"term": {
|
|
"status": "MERGED"
|
|
}
|
|
},
|
|
]
|
|
|
|
|
|
def set_date_range(start_date, end_date):
|
|
date_range = {
|
|
"range": {
|
|
"grimoire_creation_date": {
|
|
"gte": start_date,
|
|
"lte": end_date,
|
|
"format": "strict_date_optional_time"
|
|
}
|
|
}
|
|
}
|
|
return date_range
|
|
|
|
|
|
def calculate_review_efficiency(client, project_name, start_date, end_date):
|
|
date_range = set_date_range(start_date, end_date)
|
|
project_term = {
|
|
"term": {
|
|
"project": project_name
|
|
}
|
|
}
|
|
|
|
closed_query = copy.deepcopy(BASE_CHANGES_QUERY)
|
|
closed_query["query"]["bool"]["should"] = CLOSED_STATUS
|
|
closed_query["query"]["bool"]["filter"].append(date_range)
|
|
closed_query["query"]["bool"]["must"].append(project_term)
|
|
closed_query["track_total_hits"] = True
|
|
closed_query["size"] = 0
|
|
|
|
open_query = copy.deepcopy(BASE_CHANGES_QUERY)
|
|
open_query["query"]["bool"]["must_not"] = CLOSED_STATUS
|
|
open_query["query"]["bool"]["filter"].append(date_range)
|
|
open_query["query"]["bool"]["must"].append(project_term)
|
|
open_query["track_total_hits"] = True
|
|
open_query["size"] = 0
|
|
|
|
r = client.search(index='openstack_gerrit', body=closed_query)
|
|
closed_total = r['hits']['total']['value']
|
|
|
|
r = client.search(index='openstack_gerrit', body=open_query)
|
|
open_total = r['hits']['total']['value']
|
|
|
|
rei = float(closed_total / open_total)
|
|
return rei, None
|
|
|
|
|
|
def calculate_merge_time(client, project_name, start_date, end_date):
|
|
date_range = set_date_range(start_date, end_date)
|
|
project_term = {
|
|
"term": {
|
|
"project": project_name
|
|
}
|
|
}
|
|
closed_query = copy.deepcopy(BASE_CHANGES_QUERY)
|
|
closed_query["query"]["bool"]["should"] = MERGED_STATUS
|
|
closed_query["query"]["bool"]["filter"].append(date_range)
|
|
closed_query["query"]["bool"]["must"].append(project_term)
|
|
closed_query["size"] = 100
|
|
|
|
times_to_merge = []
|
|
r = client.search(index='openstack_gerrit', scroll='1m', body=closed_query)
|
|
hits = r['hits']['hits']
|
|
while hits:
|
|
for hit in hits:
|
|
open_date = datetime.datetime.fromisoformat(
|
|
hit["_source"]["grimoire_creation_date"])
|
|
close_date = datetime.datetime.fromisoformat(
|
|
hit["_source"]["last_updated"])
|
|
times_to_merge.append(close_date - open_date)
|
|
|
|
# Beware normal pagination. Results are not consistent.
|
|
# Use scroll instead
|
|
r = client.scroll(scroll_id=r["_scroll_id"], scroll="1m")
|
|
hits = r['hits']['hits']
|
|
times_sorted = sorted(times_to_merge)
|
|
middle = math.floor(len(times_to_merge) / 2)
|
|
median = times_sorted[middle]
|
|
average_seconds = \
|
|
sum(map(lambda x: x.total_seconds(), times_sorted)) / len(times_sorted)
|
|
average = datetime.timedelta(seconds=average_seconds)
|
|
return median, average
|
|
|
|
|
|
def calculate_time_to_review(client, project_name, start_date, end_date):
|
|
date_range = set_date_range(start_date, end_date)
|
|
project_term = {
|
|
"term": {
|
|
"project": project_name
|
|
}
|
|
}
|
|
closed_query = copy.deepcopy(BASE_CHANGES_QUERY)
|
|
closed_query["query"]["bool"]["should"] = MERGED_STATUS
|
|
closed_query["query"]["bool"]["filter"].append(date_range)
|
|
closed_query["query"]["bool"]["must"].append(project_term)
|
|
closed_query["size"] = 100
|
|
|
|
times_to_review = []
|
|
r = client.search(index='openstack_gerrit', scroll='1m', body=closed_query)
|
|
hits = r['hits']['hits']
|
|
while hits:
|
|
for hit in hits:
|
|
# Note time_to_first_review appears to be storing a float count
|
|
# of the number of days to the first review. This is an odd
|
|
# way to store the value so I'm documenting it here.
|
|
time_to_first_review = hit["_source"]["time_to_first_review"]
|
|
if time_to_first_review:
|
|
# We can apparently get None values back. Ignore them.
|
|
times_to_review.append(time_to_first_review)
|
|
|
|
# Beware normal pagination. Results are not consistent.
|
|
# Use scroll instead
|
|
r = client.scroll(scroll_id=r["_scroll_id"], scroll="1m")
|
|
hits = r['hits']['hits']
|
|
times_sorted = sorted(times_to_review)
|
|
middle = math.floor(len(times_to_review) / 2)
|
|
median = datetime.timedelta(days=times_sorted[middle])
|
|
average = datetime.timedelta(days=sum(times_sorted) / len(times_sorted))
|
|
return median, average
|
|
|
|
|
|
def calculate_patchset_per_review(client, project_name, start_date, end_date):
|
|
date_range = set_date_range(start_date, end_date)
|
|
project_term = {
|
|
"term": {
|
|
"project": project_name
|
|
}
|
|
}
|
|
closed_query = copy.deepcopy(BASE_CHANGES_QUERY)
|
|
closed_query["query"]["bool"]["should"] = CLOSED_STATUS
|
|
closed_query["query"]["bool"]["filter"].append(date_range)
|
|
closed_query["query"]["bool"]["must"].append(project_term)
|
|
closed_query["size"] = 100
|
|
|
|
patchsets_list = []
|
|
r = client.search(index='openstack_gerrit', scroll='1m', body=closed_query)
|
|
hits = r['hits']['hits']
|
|
while hits:
|
|
for hit in hits:
|
|
patchsets = hit["_source"]["patchsets"]
|
|
patchsets_list.append(patchsets)
|
|
|
|
# Beware normal pagination. Results are not consistent.
|
|
# Use scroll instead
|
|
r = client.scroll(scroll_id=r["_scroll_id"], scroll="1m")
|
|
hits = r['hits']['hits']
|
|
patchsets_sorted = sorted(patchsets_list)
|
|
middle = math.floor(len(patchsets_list) / 2)
|
|
median = patchsets_list[middle]
|
|
average = sum(patchsets_sorted) / len(patchsets_sorted)
|
|
return median, average
|
|
|
|
|
|
QUERIES = {
|
|
"rei": calculate_review_efficiency,
|
|
"time-to-merge": calculate_merge_time,
|
|
"time-to-review": calculate_time_to_review,
|
|
"patchset-per-review": calculate_patchset_per_review,
|
|
}
|
|
|
|
|
|
def gather_metrics(client, args):
|
|
if args.csv:
|
|
print("metric,project,starttime,endtime,value")
|
|
projects = [p for p in args.project.split(',') if p]
|
|
if args.query == "ALL":
|
|
queries = QUERIES.items()
|
|
else:
|
|
queries = [(args.query, QUERIES[args.query])]
|
|
for query, func in queries:
|
|
for project in projects:
|
|
median, average = func(
|
|
client, project, args.start_date, args.end_date)
|
|
if args.csv:
|
|
if isinstance(median, datetime.timedelta):
|
|
median = median.total_seconds()
|
|
if isinstance(average, datetime.timedelta):
|
|
average = average.total_seconds()
|
|
if not average:
|
|
# Some values are singletons overload use of median
|
|
print("%s,%s,%s,%s,%s" %
|
|
(query, project,
|
|
args.start_date, args.end_date, median))
|
|
else:
|
|
print("median-%s,%s,%s,%s,%s" %
|
|
(query, project,
|
|
args.start_date, args.end_date, median))
|
|
print("average-%s,%s,%s,%s,%s" %
|
|
(query, project,
|
|
args.start_date, args.end_date, average))
|
|
else:
|
|
if not average:
|
|
# Some values are singletons overload use of median
|
|
print("%s %s %s to %s: %s" %
|
|
(project, query,
|
|
args.start_date, args.end_date, median))
|
|
else:
|
|
print("%s median %s %s to %s: %s" %
|
|
(project, query,
|
|
args.start_date, args.end_date, median))
|
|
print("%s average %s %s to %s: %s" %
|
|
(project, query,
|
|
args.start_date, args.end_date, average))
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--host",
|
|
help="Bitergia opensearch host to connect to",
|
|
default="openstack.biterg.io")
|
|
parser.add_argument("--port",
|
|
help="Bitergia opensearch port to connect to",
|
|
type=int, default=443)
|
|
parser.add_argument("--url-prefix",
|
|
help="Bitergia opensearch url prefix to connect to",
|
|
default='data')
|
|
parser.add_argument("--user",
|
|
help="Bitergia opensearch username", required=True)
|
|
parser.add_argument("--query",
|
|
help="Metric to query",
|
|
default='ALL')
|
|
parser.add_argument("--csv", help="Emit csv output", action="store_true")
|
|
parser.add_argument("project",
|
|
help="Project to filter results for. This must "
|
|
"match bitergia's idea of a project name. May "
|
|
"be a comma separated list.")
|
|
parser.add_argument("start_date",
|
|
help="Start date for results. "
|
|
"eg 2025-01-01T00:00:00.000Z")
|
|
parser.add_argument("end_date",
|
|
help="End date for results. "
|
|
"eg 2025-01-01T00:00:00.000Z")
|
|
args = parser.parse_args()
|
|
host = args.host
|
|
port = args.port
|
|
url_prefix = args.url_prefix
|
|
|
|
passwd = getpass.getpass('Password: ')
|
|
auth = (args.user, passwd)
|
|
|
|
# indexes appear to be openstack_git, openstack_gerrit, openstack_mbox
|
|
client = OpenSearch(
|
|
hosts=[{'host': host, 'port': port}],
|
|
url_prefix=url_prefix,
|
|
http_compress=True, # enables gzip compression for request bodies
|
|
http_auth=auth,
|
|
use_ssl=True,
|
|
verify_certs=True,
|
|
)
|
|
gather_metrics(client, args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|