This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def date_to_datetime(date_str, fstrs=None): | |
""" | |
Given date string, return datetime object | |
2015 format: 2/8/13 0:00 | |
2016 format: 02/08/2013 00:00 | |
02/12/2013 20:05 | |
""" | |
if fstrs is None: | |
fstrs = ["%m/%d/%y %H:%M", "%m/%d/%Y %H:%M"] | |
for f in fstrs: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
parser = argparse.ArgumentParser(description="Sample usage of argparse") | |
# Add argument. Do not use all elements as shown. Note: for flags, action="store_true" (?syntax?) is better. | |
parser.add_argument("-a", "--first", action="store", type=int, dest="first_arg", required=True, default=23, | |
help="An example first argument. Generally use only one of required or default. Leave out type for default (str).") | |
# Defaults to parsing sys.argv list, but can pass a list as well | |
args = parser.parse_args() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Do this to update URL documents (assuming you already have url_dic and all other lists and dicts) | |
for user in collection.find(): | |
for tweet in user['tweets']: | |
urls = tweet['urls'] | |
for url in urls: | |
url_string = url['url'] # DO NOT FORGET THIS. url is the url OBJECT, not the url string | |
if url_string in short_urls: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# List of urls (pretend like it's populated) | |
urls = [] | |
# Patterns to match in urls (note in some cases including the .org or .com to avoid matching common words or letters | |
# (eg for npr or slate or today) | |
# Add whatever other domains you want to match to the re OR (|) string | |
left_pattern = r"(?P<domain>nytimes|washingtonpost|npr.org|abcnews|nbcnews|huffingtonpost|slate.com|today.com)" | |
center_pattern = r"(?P<domain>cnn|bbc.co.uk|yahoo)" | |
right_pattern = r"(?P<domain>foxnews|washingtontimes|usnews|chicagotribune)" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
d = { 'a':1, 'b':2, 'c':3, 'd':4 } | |
outhandle = open("dictfile.txt", 'w') | |
for (key, val) in d.items(): | |
outhandle.write("{0}\t{1}\n".format(key, val)) | |
outhandle.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymongo | |
# Set up DB client, db, collection | |
date_tweet_count = {} | |
date_url_count = {} | |
for user in collection.find(): | |
for tweet in user['tweets']: | |
datestr = tweet['created_at'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymongo | |
# Set up mongo DB here (get client, get database, get collection | |
client = MongoClient("smapp", 27011) | |
dbh = client['GunControl'] | |
collection = dbh['GunTweetUsers_zephyr'] | |
url_count = {} | |
tweet_count = {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simple re examples in python | |
import re | |
files = ("re_pro_p00012.txt", "re_neg_p00014.txt", "lrr_p00014.txt") | |
category_pattern = r"(?P<category>[a-zA-Z]+)_.*" | |
for file in files: | |
match = re.match(category_pattern, file) |