This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Parse each article. Create dictionary journal->MeSH terms | |
# Accumulate MeSH terms by journal | |
# Put every article output in a QUEUE, PICK IT UP with a single process | |
# This way, we can process multiple PubMed files in parallel | |
from read_medline import * | |
import multiprocessing | |
import cPickle as pickle | |
import traceback | |
import sys |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import xmltodict | |
import sys | |
import os | |
import logging | |
import hashlib | |
from gzip import GzipFile | |
from pprint import pprint | |
try: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
medication_parser = re.compile(r"""^\s*(?P<name>.*?) | |
\s+(?P<dose>[0-9\.\/]+) | |
\s*(?P<units>([mck]|mc)g|[md]l) | |
\s*(?P<formulation>.*?) | |
; | |
\s*?(?P<instructions>.*)""", | |
re.IGNORECASE | re.VERBOSE) |