rafaelcorsi/parse.py

## parse.py
import os
import csv
import re
from subprocess import Popen, PIPE

pdir = '.'
qstring = 'Código da Questão'
rstring = 'Classi cação da resposta'

header = ['NAME', 'QA1', 'QA2', 'QA3', 'QA4', 'QA5', 'QB1', 'QB2', 'QB3', 'QC1', 'QC2', 'QC3',
             'QD1',	'QD2', 'QD3', 'QD4', 'QD5',	'QE1', 'QE2', 'QE3', 'QE4',	'QF1',
             'QF2',	'QF3',	'QF4', 'QG1', 'QG2', 'QG3']

def parseQuestions(line):
    return( re.search('Questão: (.*)', line).group(1))

def parseAnswers(line):
    return( re.search('resposta: (.*)', line).group(1))


with open('resultado.csv', 'w', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(header)

    for file in os.listdir(pdir):
        if file.endswith(".pdf"):
            f = os.path.join(pdir, file)

            process = Popen(["pdfgrep", qstring , f],  encoding='utf-8', stdout=PIPE)
            (retQuestions, err) = process.communicate()
            exit_code = process.wait()

            process = Popen(["pdfgrep", rstring , f],  encoding='utf-8', stdout=PIPE)
            (retAnswers, err) = process.communicate()
            exit_code = process.wait()

            name = re.search('mutirão_(.*)_tentativa', f).group(1)
            questions = list(map(parseQuestions, retQuestions.splitlines()))
            answers = list(map(parseAnswers, retAnswers.splitlines()))

            print(name)
            data = {k: [] for k in header}
            data['NAME'] = name

            if len(questions) == len(answers):
                for i in range(len(questions)):
                    data[questions[i]] = answers[i]

            writer.writerow(list(data.values()))
	import os
	import csv
	import re
	from subprocess import Popen, PIPE

	pdir = '.'
	qstring = 'Código da Questão'
	rstring = 'Classi cação da resposta'

	header = ['NAME', 'QA1', 'QA2', 'QA3', 'QA4', 'QA5', 'QB1', 'QB2', 'QB3', 'QC1', 'QC2', 'QC3',
	'QD1', 'QD2', 'QD3', 'QD4', 'QD5', 'QE1', 'QE2', 'QE3', 'QE4', 'QF1',
	'QF2', 'QF3', 'QF4', 'QG1', 'QG2', 'QG3']

	def parseQuestions(line):
	return( re.search('Questão: (.*)', line).group(1))

	def parseAnswers(line):
	return( re.search('resposta: (.*)', line).group(1))


	with open('resultado.csv', 'w', encoding='UTF8') as f:
	writer = csv.writer(f)
	writer.writerow(header)

	for file in os.listdir(pdir):
	if file.endswith(".pdf"):
	f = os.path.join(pdir, file)

	process = Popen(["pdfgrep", qstring , f], encoding='utf-8', stdout=PIPE)
	(retQuestions, err) = process.communicate()
	exit_code = process.wait()

	process = Popen(["pdfgrep", rstring , f], encoding='utf-8', stdout=PIPE)
	(retAnswers, err) = process.communicate()
	exit_code = process.wait()

	name = re.search('mutirão_(.*)_tentativa', f).group(1)
	questions = list(map(parseQuestions, retQuestions.splitlines()))
	answers = list(map(parseAnswers, retAnswers.splitlines()))

	print(name)
	data = {k: [] for k in header}
	data['NAME'] = name

	if len(questions) == len(answers):
	for i in range(len(questions)):
	data[questions[i]] = answers[i]

	writer.writerow(list(data.values()))