Dunc PB dpenfoldbrown

## date_extract.py
def date_to_datetime(date_str, fstrs=None):
    """
    Given date string, return datetime object
    2015 format: 2/8/13 0:00
    2016 format: 02/08/2013 00:00
                 02/12/2013 20:05
    """
    if fstrs is None:
        fstrs = ["%m/%d/%y %H:%M", "%m/%d/%Y %H:%M"]
    for f in fstrs:

## argparse_sample.py
import argparse

parser = argparse.ArgumentParser(description="Sample usage of argparse")

# Add argument. Do not use all elements as shown. Note: for flags, action="store_true" (?syntax?) is better.
parser.add_argument("-a", "--first", action="store", type=int, dest="first_arg", required=True, default=23,
	help="An example first argument. Generally use only one of required or default. Leave out type for default (str).")

# Defaults to parsing sys.argv list, but can pass a list as well
args = parser.parse_args()

## url_update.py
# Do this to update URL documents (assuming you already have url_dic and all other lists and dicts)

for user in collection.find():
    for tweet in user['tweets']:
        urls = tweet['urls']
        for url in urls:

            url_string = url['url']             # DO NOT FORGET THIS. url is the url OBJECT, not the url string

            if url_string in short_urls:

## url_domain_annotate.py

# List of urls (pretend like it's populated)
urls = []

# Patterns to match in urls (note in some cases including the .org or .com to avoid matching common words or letters
# (eg for npr or slate or today)
# Add whatever other domains you want to match to the re OR (|) string
left_pattern = r"(?P<domain>nytimes|washingtonpost|npr.org|abcnews|nbcnews|huffingtonpost|slate.com|today.com)"
center_pattern = r"(?P<domain>cnn|bbc.co.uk|yahoo)"
right_pattern = r"(?P<domain>foxnews|washingtontimes|usnews|chicagotribune)"

## dict_to_file.py

d = { 'a':1, 'b':2, 'c':3, 'd':4 }
outhandle = open("dictfile.txt", 'w')
for (key, val) in d.items():
    outhandle.write("{0}\t{1}\n".format(key, val))
outhandle.close()

## tweet_url_byday_count.py
import pymongo

# Set up DB client, db, collection

date_tweet_count = {}
date_url_count = {}

for user in collection.find():
    for tweet in user['tweets']:
        datestr = tweet['created_at']

## user_tweet_url_count.py

import pymongo

# Set up mongo DB here (get client, get database, get collection
client = MongoClient("smapp", 27011)
dbh = client['GunControl']
collection = dbh['GunTweetUsers_zephyr']

url_count = {}
tweet_count = {}

## python_re_examples.py
# Simple re examples in python

import re

files = ("re_pro_p00012.txt", "re_neg_p00014.txt", "lrr_p00014.txt")

category_pattern = r"(?P<category>[a-zA-Z]+)_.*"

for file in files:
    match = re.match(category_pattern, file)
	def date_to_datetime(date_str, fstrs=None):
	"""
	Given date string, return datetime object
	2015 format: 2/8/13 0:00
	2016 format: 02/08/2013 00:00
	02/12/2013 20:05
	"""
	if fstrs is None:
	fstrs = ["%m/%d/%y %H:%M", "%m/%d/%Y %H:%M"]
	for f in fstrs:
	import argparse

	parser = argparse.ArgumentParser(description="Sample usage of argparse")

	# Add argument. Do not use all elements as shown. Note: for flags, action="store_true" (?syntax?) is better.
	parser.add_argument("-a", "--first", action="store", type=int, dest="first_arg", required=True, default=23,
	help="An example first argument. Generally use only one of required or default. Leave out type for default (str).")

	# Defaults to parsing sys.argv list, but can pass a list as well
	args = parser.parse_args()
	# Do this to update URL documents (assuming you already have url_dic and all other lists and dicts)

	for user in collection.find():
	for tweet in user['tweets']:
	urls = tweet['urls']
	for url in urls:

	url_string = url['url'] # DO NOT FORGET THIS. url is the url OBJECT, not the url string

	if url_string in short_urls:

	# List of urls (pretend like it's populated)
	urls = []

	# Patterns to match in urls (note in some cases including the .org or .com to avoid matching common words or letters
	# (eg for npr or slate or today)
	# Add whatever other domains you want to match to the re OR (\|) string
	left_pattern = r"(?P<domain>nytimes\|washingtonpost\|npr.org\|abcnews\|nbcnews\|huffingtonpost\|slate.com\|today.com)"
	center_pattern = r"(?P<domain>cnn\|bbc.co.uk\|yahoo)"
	right_pattern = r"(?P<domain>foxnews\|washingtontimes\|usnews\|chicagotribune)"

	d = { 'a':1, 'b':2, 'c':3, 'd':4 }
	outhandle = open("dictfile.txt", 'w')
	for (key, val) in d.items():
	outhandle.write("{0}\t{1}\n".format(key, val))
	outhandle.close()
	import pymongo

	# Set up DB client, db, collection

	date_tweet_count = {}
	date_url_count = {}

	for user in collection.find():
	for tweet in user['tweets']:
	datestr = tweet['created_at']

	import pymongo

	# Set up mongo DB here (get client, get database, get collection
	client = MongoClient("smapp", 27011)
	dbh = client['GunControl']
	collection = dbh['GunTweetUsers_zephyr']

	url_count = {}
	tweet_count = {}
	# Simple re examples in python

	import re

	files = ("re_pro_p00012.txt", "re_neg_p00014.txt", "lrr_p00014.txt")

	category_pattern = r"(?P<category>[a-zA-Z]+)_.*"

	for file in files:
	match = re.match(category_pattern, file)