JoshRosen/scapegoat-to-csv-spark.py

## scapegoat-to-csv-spark.py
import xml.etree.ElementTree as ET
import glob
import fnmatch
import os
import csv

SCAPEGOAT_VERSION = 'd9392e5072e3e408dd232e6fc799e0ac1640189b'
SPARK_VERSION = '4816c2ef5e04eb2dd70bed8b99882aa0b7fe7fd7'
SPARK_HOME = '/Users/joshrosen/Documents/spark/'

BLACKLISTED_RULES = [
    'com.sksamuel.scapegoat.inspections.nulls.NullAssignment',
    'com.sksamuel.scapegoat.inspections.nulls.NullParameter',
    'com.sksamuel.scapegoat.inspections.FinalModifierOnCaseClass',
    'com.sksamuel.scapegoat.inspections.unsafe.AsInstanceOf',
    'com.sksamuel.scapegoat.inspections.option.OptionGet',
    'com.sksamuel.scapegoat.inspections.collections.NegationIsEmpty',
    'com.sksamuel.scapegoat.inspections.matching.PartialFunctionInsteadOfMatch',
    'com.sksamuel.scapegoat.inspections.string.EmptyInterpolatedString',
    'com.sksamuel.scapegoat.inspections.unsafe.IsInstanceOf',
    'com.sksamuel.scapegoat.inspections.unneccesary.UnnecessaryReturnUse',
    'com.sksamuel.scapegoat.inspections.controlflow.WhileTrue',
    'com.sksamuel.scapegoat.inspections.naming.ClassNames',
    'com.sksamuel.scapegoat.inspections.collections.FindDotIsDefined',
    'com.sksamuel.scapegoat.inspections.MaxParameters',
    'com.sksamuel.scapegoat.inspections.naming.ObjectNames',
    'com.sksamuel.scapegoat.inspections.naming.MethodNames',
    'com.sksamuel.scapegoat.inspections.unneccesary.UnusedMethodParameter',
]

matches = []
for root, dirnames, filenames in os.walk(SPARK_HOME):
    for filename in fnmatch.filter(filenames, 'scapegoat-scalastyle.xml'):
        matches.append(os.path.join(root, filename))

with open('scapegoat.csv', 'wb') as csvfile:
    writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
    writer.writerow([
        'file',
        'line',
        'message',
        'rule',
        'spark_revision',
        'scapegoat_revision',
    ])
    for m in matches:
        tree = ET.parse(m)
        root = tree.getroot()
        for f in root:
            fname = f.attrib['name']
            assert fname.startswith(SPARK_HOME)
            fname = fname[len(SPARK_HOME):]
            for w in f:
                assert len(list(w)) == 0
                message = w.attrib['message']
                rule = w.attrib['source']
                line = w.attrib['line']
                if rule not in BLACKLISTED_RULES:
                    writer.writerow((fname, line, message, rule, SPARK_VERSION, SCAPEGOAT_VERSION))
	import xml.etree.ElementTree as ET
	import glob
	import fnmatch
	import os
	import csv

	SCAPEGOAT_VERSION = 'd9392e5072e3e408dd232e6fc799e0ac1640189b'
	SPARK_VERSION = '4816c2ef5e04eb2dd70bed8b99882aa0b7fe7fd7'
	SPARK_HOME = '/Users/joshrosen/Documents/spark/'

	BLACKLISTED_RULES = [
	'com.sksamuel.scapegoat.inspections.nulls.NullAssignment',
	'com.sksamuel.scapegoat.inspections.nulls.NullParameter',
	'com.sksamuel.scapegoat.inspections.FinalModifierOnCaseClass',
	'com.sksamuel.scapegoat.inspections.unsafe.AsInstanceOf',
	'com.sksamuel.scapegoat.inspections.option.OptionGet',
	'com.sksamuel.scapegoat.inspections.collections.NegationIsEmpty',
	'com.sksamuel.scapegoat.inspections.matching.PartialFunctionInsteadOfMatch',
	'com.sksamuel.scapegoat.inspections.string.EmptyInterpolatedString',
	'com.sksamuel.scapegoat.inspections.unsafe.IsInstanceOf',
	'com.sksamuel.scapegoat.inspections.unneccesary.UnnecessaryReturnUse',
	'com.sksamuel.scapegoat.inspections.controlflow.WhileTrue',
	'com.sksamuel.scapegoat.inspections.naming.ClassNames',
	'com.sksamuel.scapegoat.inspections.collections.FindDotIsDefined',
	'com.sksamuel.scapegoat.inspections.MaxParameters',
	'com.sksamuel.scapegoat.inspections.naming.ObjectNames',
	'com.sksamuel.scapegoat.inspections.naming.MethodNames',
	'com.sksamuel.scapegoat.inspections.unneccesary.UnusedMethodParameter',
	]

	matches = []
	for root, dirnames, filenames in os.walk(SPARK_HOME):
	for filename in fnmatch.filter(filenames, 'scapegoat-scalastyle.xml'):
	matches.append(os.path.join(root, filename))

	with open('scapegoat.csv', 'wb') as csvfile:
	writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
	writer.writerow([
	'file',
	'line',
	'message',
	'rule',
	'spark_revision',
	'scapegoat_revision',
	])
	for m in matches:
	tree = ET.parse(m)
	root = tree.getroot()
	for f in root:
	fname = f.attrib['name']
	assert fname.startswith(SPARK_HOME)
	fname = fname[len(SPARK_HOME):]
	for w in f:
	assert len(list(w)) == 0
	message = w.attrib['message']
	rule = w.attrib['source']
	line = w.attrib['line']
	if rule not in BLACKLISTED_RULES:
	writer.writerow((fname, line, message, rule, SPARK_VERSION, SCAPEGOAT_VERSION))