filter-summarize.py 3.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. ## This script will extract the results of the line profiler
  2. ## for the CB Azure Client specifically, filters all long functions
  3. ## with given cutoffs, outputing a filtered results file containing
  4. ## only profiling of functions fitting the cut-offs, and a csv summary
  5. ## file containing information about the longest functions and associated
  6. ## lines
  7. from re import search
  8. from os import path, walk
  9. time_cutoff = 5 # seconds
  10. perc_cutoff = 33 # % of total functino time spent on this line
  11. results = '../results/'
  12. metaresults = '../metaresults/'
  13. print("Results directory used: {}".format(results))
  14. files = []
  15. for (dirpath, dirnames, filenames) in walk(results):
  16. for each_file in filenames:
  17. if ".res" in each_file:
  18. files.append(path.join(dirpath, each_file))
  19. print("Collected list: {}".format(files))
  20. summary = "{},{},{},{},{}\n".format("Test File", "CB functon", "Total Time (in s)", "Azure Operation", "Time per hit")
  21. inside = False
  22. capturing = False
  23. purge_line = True
  24. for each_file in files:
  25. print("Processing: {}\n".format(each_file))
  26. with open(metaresults + "filtered-" + each_file.split('/')[-1], 'w+') as \
  27. fil_file:
  28. with open(each_file, 'r') as current:
  29. filtered = ""
  30. for line in current:
  31. match = search(r'^Total time: ([\.e\-0-9]+) s', line)
  32. if match:
  33. inside = False
  34. capturing = False
  35. total_time = float(match.group(1))
  36. if total_time > time_cutoff:
  37. inside = True
  38. filtered += "Test suite: {}\n".format(each_file)
  39. filtered += line
  40. elif inside:
  41. match = search(r'^Function: (.+) at line ([0-9]+)', line)
  42. if match:
  43. func_name = match.group(1)
  44. line_num = int(match.group(2))
  45. capturing = True
  46. filtered += line
  47. elif capturing:
  48. if purge_line:
  49. complete_contents = ""
  50. paran_num = 0
  51. filtered += line
  52. match = search(r'^\s+([0-9]+)\s+[0-9]+\s+([\.0-9]+)\s+([\.0-9]+)\s+([\.0-9]+)\s+([^\n]+)', line)
  53. if match:
  54. percentage = float(match.group(4))
  55. line_contents = match.group(5)
  56. complete_contents += line_contents.replace(" \\", "")
  57. paran_num += line_contents.count("(") - line_contents.count(")")
  58. purge_line = " \\" not in line_contents and paran_num == 0
  59. if percentage > perc_cutoff:
  60. line_num = int(match.group(1))
  61. line_time = float(match.group(2))/1000000
  62. hit_time = float(match.group(3))/1000000
  63. summary += "{},\"{}\",{},\"{}\",{}\n".format(each_file.split('/')[-1].replace(".res", ""), func_name, total_time, complete_contents, hit_time)
  64. else:
  65. match = search(r'^\s+([0-9]+)\s+([^\n]+)', line)
  66. if match:
  67. line_contents = match.group(2)
  68. complete_contents += line_contents.replace(" \\", "")
  69. paran_num += line_contents.count("(") - line_contents.count(")")
  70. purge_line = " \\" not in line_contents and paran_num == 0
  71. fil_file.write(filtered)
  72. with open(metaresults+"summary.csv", 'w+') as sum_file:
  73. sum_file.write(summary)