filter-summarize.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. ## This script will extract the results of the line profiler
  2. ## for the CB Azure Client specifically, filters all long functions
  3. ## with given cutoffs, outputing a filtered results file containing
  4. ## only profiling of functions fitting the cut-offs, and a csv summary
  5. ## file containing information about the longest functions and associated
  6. ## lines
  7. from re import search
  8. from os import path, walk
  9. time_cutoff = 0 # seconds
  10. perc_cutoff = 33 # % of total function time spent on this line
  11. results = '../results/'
  12. metaresults = '../metaresults/'
  13. print("Results directory used: {}".format(results))
  14. files_dict = {}
  15. for (dirpath, dirnames, filenames) in walk(results):
  16. for each_file in filenames:
  17. if ".res" in each_file:
  18. provider = each_file.split('-')[0]
  19. curr_list = files_dict.get(provider, [])
  20. curr_list.append(path.join(dirpath, each_file))
  21. files_dict[provider] = curr_list
  22. print("Collected files:\n{}".format(files_dict))
  23. for key in files_dict.keys():
  24. provider = key
  25. files = files_dict[provider]
  26. summary = "{},{},{},{},{}\n".format("Test File", "CB functon", "Total Time (in s)", "Azure Operation", "Time per hit")
  27. for each_file in files:
  28. inside = True
  29. capturing = True
  30. purge_line = True
  31. print("Processing: {}\n".format(each_file))
  32. with open(metaresults + "filtered-" + each_file.split('/')[-1], 'w+') as \
  33. fil_file:
  34. with open(each_file, 'r') as current:
  35. filtered = ""
  36. last_time = 1000000000
  37. all_text = []
  38. for line in current:
  39. match = search(r'^Total time: ([\.e\-0-9]+) s', line)
  40. if match:
  41. if filtered:
  42. all_text.append((last_time, filtered))
  43. filtered = ""
  44. inside = False
  45. capturing = False
  46. last_time = float(match.group(1))
  47. if last_time > time_cutoff:
  48. inside = True
  49. filtered += line
  50. elif inside:
  51. match = search(r'^Function: (.+) at line ([0-9]+)', line)
  52. if match:
  53. func_name = match.group(1)
  54. line_num = int(match.group(2))
  55. capturing = True
  56. filtered += line
  57. elif capturing:
  58. if purge_line:
  59. complete_contents = ""
  60. paran_num = 0
  61. filtered += line
  62. match = search(r'^\s+([0-9]+)\s+[0-9]+\s+([\.0-9]+)\s+([\.0-9]+)\s+([\.0-9]+)\s+([^\n]+)', line)
  63. if match:
  64. percentage = float(match.group(4))
  65. line_contents = match.group(5)
  66. complete_contents += line_contents.replace(" \\", "")
  67. paran_num += line_contents.count("(") - line_contents.count(")")
  68. purge_line = " \\" not in line_contents and paran_num == 0
  69. if percentage > perc_cutoff:
  70. line_num = int(match.group(1))
  71. line_time = float(match.group(2))/1000000
  72. hit_time = float(match.group(3))/1000000
  73. summary += "{},\"{}\",{},\"{}\"," \
  74. "{}\n".format(
  75. each_file.split('/')[-1].replace(
  76. ".res", ""), func_name, last_time,
  77. complete_contents, hit_time)
  78. else:
  79. match = search(r'^\s+([0-9]+)\s+([^\n]+)', line)
  80. if match:
  81. line_contents = match.group(2)
  82. complete_contents += line_contents.replace(" \\", "")
  83. paran_num += line_contents.count("(") - line_contents.count(")")
  84. purge_line = " \\" not in line_contents and paran_num == 0
  85. all_text.sort(key=lambda x: x[0], reverse=True)
  86. for text in all_text:
  87. fil_file.write(text[1])
  88. with open(metaresults+provider+"-summary.csv", 'w+') as sum_file:
  89. sum_file.write(summary)