From bc02118c7f1a40245ae7f2206c65e555fb51d0dd Mon Sep 17 00:00:00 2001
From: Dan Berindei <dan@infinispan.org>
Date: Mon, 17 Feb 2014 19:46:38 +0200
Subject: [PATCH] ISPN-4054 Scripts to analyze test failures and corresponding
 issues

* find_unstable_tests.py - list the tests with the @Test(groups =
  "unstable") annotation
* find_unstable_tests_jira.py - list the tests with an open issue in JIRA
* find_unstable_tests_teamcity.py - list the tests with failures in CI
* diff_test_lists.py - find differences in the three sets of tests, e.g.
  tests with failures in CI and with an issue in JIRA but not in the
  unstable group.
---
 bin/diff_test_lists.py              |  89 ++++++++++++++++++++
 bin/find_unstable_tests.py          |  69 +++++++++-------
 bin/find_unstable_tests_jira.py     |  79 ++++++++++++++++++
 bin/find_unstable_tests_teamcity.py | 121 ++++++++++++++++++++++++++++
 4 files changed, 328 insertions(+), 30 deletions(-)
 create mode 100755 bin/diff_test_lists.py
 create mode 100755 bin/find_unstable_tests_jira.py
 create mode 100755 bin/find_unstable_tests_teamcity.py

diff --git a/bin/diff_test_lists.py b/bin/diff_test_lists.py
new file mode 100755
index 000000000000..20f108506ffb
--- /dev/null
+++ b/bin/diff_test_lists.py
@@ -0,0 +1,89 @@
+#!/usr/bin/python
+
+"""
+  Merge the results of the find_unstable_tests.py, find_unstable_tests_jira.py, and find_unstable_tests_teamcity.py
+"""
+
+import argparse
+import csv
+import os
+from pprint import pprint
+
+
+def parse_tsv(annotations_file, testNameReplacement, verbose):
+  tests = dict()
+  with open(annotations_file, 'rb') as csvfile:
+    reader = csv.reader(csvfile, dialect='excel-tab')
+    for row in reader:
+      # AsyncDistExtendedStatisticTest	extended-statistics/src/test/java/org/infinispan/stats/simple/AsyncDistExtendedStatisticTest.java
+      # AsyncDistExtendedStatisticTest	ISPN-3995	AsyncDistExtendedStatisticTest.testReplaceWithOldVal fails randomly
+      # AsyncDistExtendedStatisticTest	org.infinispan.stats.simple.AsyncDistExtendedStatisticTest.testReplaceWithOldVal	2
+      if verbose: pprint(row)
+      class_name = row[0]
+      row[0] = testNameReplacement
+      rows = tests.setdefault(class_name, [])
+      rows.append(row)
+
+  if verbose: pprint(tests)
+  return tests
+
+
+def print_diffs(target_dict, source1_dict, source2_dict, verbose):
+  diffs = []
+  for test, rows in sorted(source1_dict.iteritems()):
+    if test not in target_dict:
+      diffs.append((test, rows))
+  rows = sorted(diffs)
+  if verbose: pprint(rows)
+
+  for test, rows in diffs:
+    print(test)
+    for row in rows:
+      print("\t%s" % ("\t".join(row)))
+    source2_rows = source2_dict.get(test)
+    if source2_rows:
+      for row in source2_rows:
+        print("\t%s" % ("\t".join(row)))
+
+    print('')
+
+def main(args):
+  verbose = args.verbose
+  annotations_file = args.annotations_file
+  jiras_file = args.jira_file
+  teamcity_file = args.teamcity_file
+  location = args.find_missing
+  if verbose: print csv.list_dialects(); print os.getcwd()
+
+  annotations = parse_tsv(annotations_file, "annotation", verbose)
+  jiras = parse_tsv(jiras_file, "jira", verbose)
+  teamcity_failures = parse_tsv(teamcity_file, "failure", verbose)
+
+  if location == 'jira' or location == 'all':
+    print("Tests annotated as unstable or failing in TeamCity missing an issue in JIRA:")
+    print_diffs(jiras, annotations, teamcity_failures, verbose)
+
+  if location == 'annotation' or location == 'all':
+    print("Tests with a random failure issue in JIRA or failing in TeamCity missing the unstable annotation:")
+    print_diffs(annotations, jiras, teamcity_failures, verbose)
+
+  if location == 'teamcity' or location == 'all':
+    print("Tests annotated as unstable or with a random failure issue in JIRA but not failing in TeamCity:")
+    print_diffs(teamcity_failures, annotations, jiras, verbose)
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument("-a", "--annotations-file", help="Unstable test annotations file",
+                      required=True)
+  parser.add_argument("-j", "--jira-file", help="Unstable test JIRAs file", required=True)
+  parser.add_argument("-t", "--teamcity-file", help="TeamCity test failures file",
+                      required=True)
+  parser.add_argument("-v", "--verbose", help="print debugging information",
+                      action="store_true")
+  parser.add_argument("find_missing",
+                      choices=['jira', 'teamcity', 'annotation', 'all'], default='all')
+  args = parser.parse_args()
+  if args.verbose: pprint(args)
+
+  main(args)
\ No newline at end of file
diff --git a/bin/find_unstable_tests.py b/bin/find_unstable_tests.py
index 7880bdd12cd6..f0c39507d5dd 100755
--- a/bin/find_unstable_tests.py
+++ b/bin/find_unstable_tests.py
@@ -2,37 +2,46 @@
 import re
 import time
 import sys
-from utils import *
+import csv
+import argparse
+import os.path
+import fnmatch
 
-def main():
-  start_time = time.clock()
-  disabled_test_files = []
-  
-  test_annotation_matcher = re.compile('^\s*@Test')
-  disabled_matcher = re.compile('groups\s*=\s*"unstable"')
-  
-  for test_file in GlobDirectoryWalker(get_search_path(sys.argv[0]), '*Test.java'):
-    tf = open(test_file)
-    try:
-      for line in tf:
-        if test_annotation_matcher.search(line) and disabled_matcher.search(line):
-          disabled_test_files.append(test_file)
-          break
-    finally:
-      tf.close()
-      
-  print "Files containing disabled tests: \n"
-  unique_tests=to_set(disabled_test_files)
-  i = 1
-  for f in unique_tests:
-    zeropad=""
-    if i < 10 and len(unique_tests) > 9:
-      zeropad = " "
-    print "%s%s. %s" % (zeropad, str(i), strip_leading_dots(f))
-    i += 1
-
-  print "\n      (finished in " +  str(time.clock() - start_time) + " seconds)"
+
+def main(args):
+  base_dir = args.dir
+
+  annotated_test_files = []
   
+  disabled_test_matcher = re.compile('\s*@Test.*groups\s*=\s*("unstable|Array\("unstable"\))|@Category\(UnstableTest\.class\).*')
+  filename_matcher = re.compile('.*Test.(java|scala)')
+
+  for dirpath, dirnames, filenames in os.walk(base_dir):
+    for filename in filenames:
+      if filename_matcher.match(filename):
+        test_file = os.path.join(dirpath, filename)
+        with open(test_file) as tf:
+          for line in tf:
+            if disabled_test_matcher.search(line):
+              class_name = os.path.splitext(filename)[0]
+              rel_test_file = os.path.relpath(test_file, base_dir)
+              annotated_test_files.append((class_name, rel_test_file))
+              break
+
+  annotated_test_files=sorted(annotated_test_files)
+
+  csvwriter = csv.writer(sys.stdout, dialect='excel-tab')
+  for row in annotated_test_files:
+    csvwriter.writerow(row)
+
+def extract_class_name(f):
+  return splitext(basename(f))[0]
+
+
 if __name__ == '__main__':
-  main()
+  parser = argparse.ArgumentParser()
+  parser.add_argument("dir", help="base directory", nargs='?', default='.')
+  args = parser.parse_args()
+
+  main(args)
   
diff --git a/bin/find_unstable_tests_jira.py b/bin/find_unstable_tests_jira.py
new file mode 100755
index 000000000000..13b7f2c38e6b
--- /dev/null
+++ b/bin/find_unstable_tests_jira.py
@@ -0,0 +1,79 @@
+#!/usr/bin/python
+
+"""
+  Search JIRA using the restkit library (yum install python-restkit).
+
+  JIRA REST API documentation: https://docs.atlassian.com/jira/REST/5.0-m5
+"""
+
+import json
+import re
+from restkit import Resource, BasicAuth, request
+from pprint import pprint
+import argparse
+from getpass import getpass
+import csv
+import sys
+
+default_base_url = 'https://issues.jboss.org'
+jql_search = 'project = ISPN AND (component in ("Test Suite - Core", "Test Suite - Server", "Test Suite - Query") OR labels = testsuite_stability) AND status in (Open, "Coding In Progress", Reopened, "Pull Request Sent") ORDER BY priority DESC'
+
+
+def main(args):
+  verbose = args.verbose
+  server_base_url = args.url
+  user = args.user
+  password = args.password
+
+  # This sends the user and password with the request.
+  filters = []
+  if user:
+    auth = BasicAuth(user, password)
+    filters = [auth]
+
+  url = "%s/rest/api/latest" % (server_base_url)
+  resource = Resource(url, filters=filters)
+
+  issueList = get_json(resource, "search", jql=jql_search, fields="key,issuetype,created,status,summary", expand="renderedFields", maxResults=500)
+  if verbose: pprint(issueList)
+
+  tests = []
+  for issue in issueList['issues']:
+    id = issue['key']
+    summary = issue['fields']['summary']
+    match = re.search(r'\w+Test', summary)
+    if match:
+      test = match.group(0)
+      tests.append((test, id, summary))
+
+  tests = sorted(tests)
+
+  csvwriter = csv.writer(sys.stdout, dialect='excel-tab')
+  for row in tests:
+    csvwriter.writerow(row)
+
+
+def get_json(resource, path, **params):
+  response = resource.get(path, headers={'Content-Type': 'application/json'},
+                          params_dict = params)
+
+  # Most successful responses have an HTTP 200 status
+  if response.status_int != 200:
+    raise Exception("ERROR: status %s" % response.status_int)
+
+  # Convert the text in the reply into a Python dictionary
+  return json.loads(response.body_string())
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument("--url", help="site URL", default=default_base_url)
+  parser.add_argument("-u", "--user", help="user name", required = True)
+  parser.add_argument("-p", "--password", help="password")
+  parser.add_argument("-v", "--verbose", help="print debugging information", action="store_true")
+  args = parser.parse_args()
+
+  if args.user and not args.password:
+    args.password = getpass()
+
+  main(args)
diff --git a/bin/find_unstable_tests_teamcity.py b/bin/find_unstable_tests_teamcity.py
new file mode 100755
index 000000000000..9439a1d05de6
--- /dev/null
+++ b/bin/find_unstable_tests_teamcity.py
@@ -0,0 +1,121 @@
+#!/usr/bin/python
+
+"""
+  Search JIRA using the restkit library (yum install python-restkit).
+
+  Teamcity REST API documentation: http://confluence.jetbrains.com/display/TCD8/REST+API
+"""
+
+import json
+import re
+from restkit import Resource, BasicAuth, request
+from pprint import pprint
+import argparse
+import datetime
+from getpass import getpass
+import csv
+import sys
+
+default_base_url = 'http://ci.infinispan.org'
+default_build_types = ['Master Hotspot JDK6', 'Master Hotspot JDK7', 'Master Unstable Tests JDK6']
+default_days = 15
+
+
+def main(args):
+  verbose = args.verbose
+  server_base_url = args.base_url
+  user = args.user
+  password = args.password
+  days = args.days
+  build_type_names = args.build
+
+  # This sends the user and password with the request.
+  url = "%s/guestAuth/app/rest/" % (server_base_url)
+  filters = []
+  if user:
+    auth = BasicAuth(user, password)
+    filters = [auth]
+    url = "%s/httpAuth/app/rest/" % (server_base_url)
+
+  resource = Resource(url, filters=filters)
+
+  buildTypes = get_json(resource, "buildTypes")
+  watched_build_type_ids = [bt['id'] for bt in buildTypes['buildType']
+      if bt['name'] in default_build_types]
+  if verbose: print("Found build ids: %s" %watched_build_type_ids)
+
+  unstable_tests = []
+  for btid in watched_build_type_ids:
+    days_delta = datetime.timedelta(days=default_days)
+    days_ago = datetime.datetime.utcnow() - days_delta
+    date = days_ago.strftime('%Y%m%dT%H%M%S') + '+0000'
+
+    builds_path = 'buildTypes/id:%s/builds' % btid
+    builds = get_json(resource, builds_path, locator = build_locator(sinceDate = date, status = 'FAILURE'))
+    build_ids = [build['id'] for build in builds['build']]
+    if verbose: print("Found build ids for build type %s: %s" % (btid, build_ids))
+
+    for bid in build_ids:
+      build_path = "builds/id:%s" % bid
+      build = get_json(resource, build_path)
+      #pprint(build)
+      bname = "%s#%s" % (build['buildType']['name'], build['number'])
+      bdate = build['startDate']
+
+      test_occurrences_path = "testOccurrences"
+      failed_tests = get_json(resource, test_occurrences_path, locator = build_locator(build = "(id:%s)" % bid, status = 'FAILURE'))
+      #pprint(failed_tests)
+      if 'testOccurrence' in failed_tests:
+        failed_test_names = [test['name'] for test in failed_tests['testOccurrence']]
+        if verbose: print("Found failed tests for build %s: %s" % (bid, failed_test_names))
+        for test_name in failed_test_names:
+          clean_test_name = test_name.replace("TestSuite: ", "")
+          unstable_tests.append((extract_class_name(clean_test_name), clean_test_name, bname, bdate))
+
+
+  unstable_tests = sorted(unstable_tests)
+
+  csvwriter = csv.writer(sys.stdout, dialect='excel-tab')
+  for row in unstable_tests:
+    csvwriter.writerow(row)
+
+
+def extract_class_name(test_name):
+  match = re.search(r'\w+Test', test_name)
+  if match:
+    class_name = match.group(0)
+  else:
+    components = test_name.split('.')
+    class_name = components[-2]
+  return class_name
+
+
+def build_locator(**locators):
+  return ",".join("%s:%s" %(k, v) for (k, v )in locators.items())
+
+def get_json(resource, path, **params):
+  response = resource.get(path, headers={'Accept': 'application/json'},
+                          params_dict = params)
+
+  # Most successful responses have an HTTP 200 status
+  if response.status_int != 200:
+    raise Exception("ERROR: status %s" % response.status_int)
+
+  # Convert the text in the reply into a Python dictionary
+  return json.loads(response.body_string())
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument("-b", "--base-url", help="base URL", default=default_base_url)
+  parser.add_argument("-u", "--user", help="user name")
+  parser.add_argument("-p", "--password", help="password")
+  parser.add_argument("-d", "--days", help="days to search back", default=default_days)
+  parser.add_argument("--build", help="one or more builds to search", nargs='*', action='append', default=default_build_types)
+  parser.add_argument("-v", "--verbose", help="print debugging information", action="store_true")
+  args = parser.parse_args()
+
+  if args.user and not args.password:
+    args.password = getpass()
+
+  main(args)
\ No newline at end of file