Skip to content

Instantly share code, notes, and snippets.

View kmcelwee's full-sized avatar
🌱

Kevin McElwee kmcelwee

🌱
View GitHub Profile
@kmcelwee
kmcelwee / download-tweet-media.py
Created February 20, 2021 00:09
Download images (JPG, PNG) from tweets
import wget
from os.path import join as pjoin
OUTPUT_DIR = 'tweet-imgs'
media_tweets = [tweet for tweet in tweets if 'media' in tweet['entities']]
for tweet in media_tweets:
for i, media in enumerate(tweet['entities']['media']):
url = media['media_url']
extension = url.split('.')[-1]
assert extension in ['jpg', 'png']
def get_verb(s):
m = [x.root.head.text for x in nlp(s).noun_chunks if x.root.head.pos_ == 'VERB']
standardized = [WordNetLemmatizer().lemmatize(x, 'v') for x in m]
remove = set(['d', "’re", "’m", "’s"])
filtered = [x for x in standardized if x not in remove]
return None if len(filtered) == 0 else list(set(filtered))
reddit = praw.Reddit(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, user_agent=USER_AGENT)
j = []
latest_id = None
for page in range(10):
sub = reddit.subreddit('FloridaMan')
for s in s.top(params={'after': latest_id, 't': 'all'}):
j.append({
# all the data you want
})
def data_transform(data, timesteps, var='x'):
m = []
s = data.to_numpy()
for i in range(s.shape[0]-timesteps):
m.append(s[i:i+timesteps].tolist())
if var == 'x':
t = np.zeros((len(m), len(m[0]), len(m[0][0])))
for i, x in enumerate(m):
for j, y in enumerate(x):
HOURS_AHEAD = 24
s = all_X.shape[1]
model = tf.keras.Sequential()
model.add(layers.Dense(s, activation=tf.nn.relu, input_shape=(HOURS_AHEAD, all_X.shape[1])))
model.add(layers.Dense(s, activation=tf.nn.relu))
model.add(layers.Dense(s, activation=tf.nn.relu))
model.add(layers.Dense(s, activation=tf.nn.relu))
model.add(layers.Dense(s, activation=tf.nn.relu))
model.add(layers.Flatten())
def heat(l, alpha, time_steps):
'''apply the heat equation to list l, given constants alpha and time_steps'''
return_l = []
for t in range(time_steps):
if len(return_l) != 0:
l = return_l
return_l = []
for i, x in enumerate(l):
if i == 0:
diff = (0 - l[i]) - (l[i] - l[i+1])
from scipy.stats import norm
def peak_likelihood(hist=None,
tomorrow=None, tomorrow_std=None,
two_day=None, two_day_std=None,
three_day=None, three_day_std=None):
'''
Given the predictions and standard deviation of the three-day forecast, in
addition to the highest load so far this month, what is the likelihood that
a sample from tomorrow's distribution will be higher than the other three.