Skip to content

Instantly share code, notes, and snippets.

@jamii
Created June 3, 2011 10:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamii/1006140 to your computer and use it in GitHub Desktop.
Save jamii/1006140 to your computer and use it in GitHub Desktop.
from disco.core import Job
from disco.worker.classic.func import chain_reader
import re
download_pattern = re.compile("{ _id: ObjectId\('([^']*)'\), d: ([^,]*), doi: \"([^\"]*)\", i: \"([^\"]*)\", s: ([^,]*), ip: \"([^\"]*)\" }")
class ParseDownloads(Job):
map_reader = staticmethod(chain_reader)
@staticmethod
def map(line, params):
match = poc.download_pattern.match(line)
if match:
(id, date, doi, _, _, ip) = match.groups()
yield id, json.dumps({'id':id, 'doi':doi, 'date':date, 'ip':ip})
else:
Task.put('nomatch', line)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment