Skip to content

Instantly share code, notes, and snippets.

@JoshRosen
Created May 24, 2017 22:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JoshRosen/1ae12a979880d9a98988aa87d70ff2a8 to your computer and use it in GitHub Desktop.
Save JoshRosen/1ae12a979880d9a98988aa87d70ff2a8 to your computer and use it in GitHub Desktop.
Scapegoat output to CSV converter for Spark
import xml.etree.ElementTree as ET
import glob
import fnmatch
import os
import csv
SCAPEGOAT_VERSION = 'd9392e5072e3e408dd232e6fc799e0ac1640189b'
SPARK_VERSION = '4816c2ef5e04eb2dd70bed8b99882aa0b7fe7fd7'
SPARK_HOME = '/Users/joshrosen/Documents/spark/'
BLACKLISTED_RULES = [
'com.sksamuel.scapegoat.inspections.nulls.NullAssignment',
'com.sksamuel.scapegoat.inspections.nulls.NullParameter',
'com.sksamuel.scapegoat.inspections.FinalModifierOnCaseClass',
'com.sksamuel.scapegoat.inspections.unsafe.AsInstanceOf',
'com.sksamuel.scapegoat.inspections.option.OptionGet',
'com.sksamuel.scapegoat.inspections.collections.NegationIsEmpty',
'com.sksamuel.scapegoat.inspections.matching.PartialFunctionInsteadOfMatch',
'com.sksamuel.scapegoat.inspections.string.EmptyInterpolatedString',
'com.sksamuel.scapegoat.inspections.unsafe.IsInstanceOf',
'com.sksamuel.scapegoat.inspections.unneccesary.UnnecessaryReturnUse',
'com.sksamuel.scapegoat.inspections.controlflow.WhileTrue',
'com.sksamuel.scapegoat.inspections.naming.ClassNames',
'com.sksamuel.scapegoat.inspections.collections.FindDotIsDefined',
'com.sksamuel.scapegoat.inspections.MaxParameters',
'com.sksamuel.scapegoat.inspections.naming.ObjectNames',
'com.sksamuel.scapegoat.inspections.naming.MethodNames',
'com.sksamuel.scapegoat.inspections.unneccesary.UnusedMethodParameter',
]
matches = []
for root, dirnames, filenames in os.walk(SPARK_HOME):
for filename in fnmatch.filter(filenames, 'scapegoat-scalastyle.xml'):
matches.append(os.path.join(root, filename))
with open('scapegoat.csv', 'wb') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
writer.writerow([
'file',
'line',
'message',
'rule',
'spark_revision',
'scapegoat_revision',
])
for m in matches:
tree = ET.parse(m)
root = tree.getroot()
for f in root:
fname = f.attrib['name']
assert fname.startswith(SPARK_HOME)
fname = fname[len(SPARK_HOME):]
for w in f:
assert len(list(w)) == 0
message = w.attrib['message']
rule = w.attrib['source']
line = w.attrib['line']
if rule not in BLACKLISTED_RULES:
writer.writerow((fname, line, message, rule, SPARK_VERSION, SCAPEGOAT_VERSION))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment