Skip to content

Instantly share code, notes, and snippets.

@EthanDF
Created April 18, 2017 19:07
Show Gist options
  • Save EthanDF/f13596155756c127fd6925b0f30dc95d to your computer and use it in GitHub Desktop.
Save EthanDF/f13596155756c127fd6925b0f30dc95d to your computer and use it in GitHub Desktop.
How to create the comparison file for the quoted notes... just a place to share the script
import csv
import codecs
def readFile():
print('reading file...')
fileName = 'sus_dups_quotes_by-oclc.csv'
checkList = []
with codecs.open(fileName, 'r', encoding='utf-8') as c:
reader = csv.reader(c)
for row in reader:
checkList.append(row)
print('done reading file...')
return checkList
def writeResultsToCSV(oclcNumber, bib, quotedNote, matchResult):
outputFile = 'quotedNotesResults.csv'
data = [[str(oclcNumber), str(bib), str(quotedNote), str(matchResult)]]
with codecs.open(outputFile, 'a', encoding='utf-8') as out:
a = csv.writer(out, delimiter=',', quoting=csv.QUOTE_ALL)
a.writerows(data)
def buildDict(list):
print('making dictionary...')
oDict = {}
oSet = set(oDict.keys())
tempList = []
for row in list:
# print(row)
dID = row[0]
bib = row[1]
note = row[2]
if dID in oSet:
# print('dID: '+str(dID)+' is in oSet...')
dictList = oDict[dID]
for o in dictList:
tempList.append(o)
tempList.append([bib,note])
oDict[dID] = tempList
tempList = []
else:
# print('dID: '+str(dID)+' is not in oSet...')
oDict[dID] = [[bib,note]]
oSet.add(dID)
tempList = []
stopper = 'n'
# stopper = input('stop?')
# print(oDict[dID])
if stopper == 'y':
return oDict
print(1/0)
print('done making dictionary...')
return oDict
def compareNotes(debug = 0):
oclcList = readFile()
oDict = buildDict(oclcList)
print('running comparisons...')
for k in oDict.keys():
bList = []
nList = []
oclcNumber = k
x = oDict[k]
matchResult = 0
for notes in x:
testSet = [oclcNumber, notes[0], notes[1]]
if debug == 1:
print(testSet)
if notes[0] in bList:
if notes[1] in nList:
matchResult = 1
else:
nList.append(notes[1])
matchResult = 0
else:
bList.append(notes[0])
if notes[1] in nList:
matchResult = 1
else:
nList.append(notes[1])
matchResult = 0
if debug == 1:
print('Set Result = '+str(matchResult))
writeResultsToCSV(oclcNumber, notes[0], notes[1], matchResult)
stopper = 'n'
# stopper = input('stop?')
if stopper == 'y':
return x
print(1 / 0)
print('done!')
return x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment