Skip to content

Instantly share code, notes, and snippets.

@chauncey
Created December 4, 2012 20:03
Show Gist options
  • Save chauncey/4208100 to your computer and use it in GitHub Desktop.
Save chauncey/4208100 to your computer and use it in GitHub Desktop.
Apache Python dupe catcher
#!/usr/bin/env python
"Searches for KEY in GET request - combined Apache format (I think)"""
KEY = "GET /cgi-bin/blah?var=myvar"
LOG = "tmp.log"
def findDupes(log):
dupes = []
for l in log:
get = l.split('"', 1)[1]
match = 0
for m in log:
if get in m:
match += 1
if match > 1 and l not in dupes:
dupes.append(l)
return dupes
def findKey(log):
recs = []
for line in log:
if KEY in line:
recs.append(line)
return recs
def getLogContents():
f = open(LOG, "r")
all = f.readlines()
f.close()
return all
def writeLog(lines, log="alog"):
f = open(log, "w")
for line in lines:
myno = line.split("MyNo=", 1)[1][:9]
f.write(polno + "\n")
f.write(line)
f.close()
def main():
log = getLogContents()
recs = findKey(log)
dupes = findDupes(recs)
writeLog(dupes)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment