Skip to content

Instantly share code, notes, and snippets.

View kmcelwee's full-sized avatar
🌱

Kevin McElwee kmcelwee

🌱
View GitHub Profile
def data_transform(data, timesteps, var='x'):
m = []
s = data.to_numpy()
for i in range(s.shape[0]-timesteps):
m.append(s[i:i+timesteps].tolist())
if var == 'x':
t = np.zeros((len(m), len(m[0]), len(m[0][0])))
for i, x in enumerate(m):
for j, y in enumerate(x):
from scipy.stats import norm
def peak_likelihood(hist=None,
tomorrow=None, tomorrow_std=None,
two_day=None, two_day_std=None,
three_day=None, three_day_std=None):
'''
Given the predictions and standard deviation of the three-day forecast, in
addition to the highest load so far this month, what is the likelihood that
a sample from tomorrow's distribution will be higher than the other three.
def heat(l, alpha, time_steps):
'''apply the heat equation to list l, given constants alpha and time_steps'''
return_l = []
for t in range(time_steps):
if len(return_l) != 0:
l = return_l
return_l = []
for i, x in enumerate(l):
if i == 0:
diff = (0 - l[i]) - (l[i] - l[i+1])
HOURS_AHEAD = 24
s = all_X.shape[1]
model = tf.keras.Sequential()
model.add(layers.Dense(s, activation=tf.nn.relu, input_shape=(HOURS_AHEAD, all_X.shape[1])))
model.add(layers.Dense(s, activation=tf.nn.relu))
model.add(layers.Dense(s, activation=tf.nn.relu))
model.add(layers.Dense(s, activation=tf.nn.relu))
model.add(layers.Dense(s, activation=tf.nn.relu))
model.add(layers.Flatten())
reddit = praw.Reddit(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, user_agent=USER_AGENT)
j = []
latest_id = None
for page in range(10):
sub = reddit.subreddit('FloridaMan')
for s in s.top(params={'after': latest_id, 't': 'all'}):
j.append({
# all the data you want
})
def get_verb(s):
m = [x.root.head.text for x in nlp(s).noun_chunks if x.root.head.pos_ == 'VERB']
standardized = [WordNetLemmatizer().lemmatize(x, 'v') for x in m]
remove = set(['d', "’re", "’m", "’s"])
filtered = [x for x in standardized if x not in remove]
return None if len(filtered) == 0 else list(set(filtered))
@kmcelwee
kmcelwee / download-tweet-media.py
Created February 20, 2021 00:09
Download images (JPG, PNG) from tweets
import wget
from os.path import join as pjoin
OUTPUT_DIR = 'tweet-imgs'
media_tweets = [tweet for tweet in tweets if 'media' in tweet['entities']]
for tweet in media_tweets:
for i, media in enumerate(tweet['entities']['media']):
url = media['media_url']
extension = url.split('.')[-1]
assert extension in ['jpg', 'png']
import pandas as pd
df = pd.read_csv('pgp.csv')
df_multi_type = df[~pd.isna(df['Type']) & df['Type'].str.contains(';')]
df_multi_type['Type'].count() # 148 multi-type PGPIDs
df_multi_type[df_multi_type['Library'] == 'CUL']['Type'].count() # 75 PGPIDs multi-type from CUL
# list 148 of PGPIDs
31166
32188

PostgreSQL & other queries in Dataspace

To enter the Postgres command line, you need to be the dspace user (sudo su - dspace). The command is psql. Here is a link to the database diagram for DSpace 5.

It sometimes may be quicker to use the REST API than creating a complicated query. And the JRuby DSpace wrapper (documentation) may be simpler as well.

Useful commands:

  • \dt: describe all tables
  • \d {TABLE}: describe the given table
  • \copy ({query}) to '{filename}' as CSV HEADER: saves the query to a CSV with a header
@kmcelwee
kmcelwee / twitter-reply-exception.json
Created March 22, 2021 19:12
A tweet that is a reply, but has `in_reply_to_status_id` null because the original user deleted their tweet. (Twitter Dev conversation: https://twittercommunity.com/t/the-commonly-described-ways-of-determining-whether-a-tweet-is-a-reply-seem-wrong/151579)
{
"created_at": "Wed Nov 18 19:02:24 +0000 2020",
"id": 1329137902199005184,
"id_str": "1329137902199005184",
"full_text": "@2legit2dunk https://t.co/4lx6Z4wqAp",
"truncated": false,
"display_text_range": [
13,
36
],