Skip to content

Instantly share code, notes, and snippets.

@josht-jpg
josht-jpg / KNN-linear-alg.rs
Created April 3, 2022 21:34
Linear algebra for Rust implementation of K nearest Neighbors
trait LinearAlg<T>
where
T: Add + Sub,
{
fn dot(&self, w: &[T]) -> T;
fn subtract(&self, w: &[T]) -> Vec<T>;
fn sum_of_squares(&self) -> T;
@josht-jpg
josht-jpg / URL-sanitizer
Created June 28, 2021 17:59
Philippe De Ryck's adaptation of AngularJs's URL sanitizer
const SAFE_URL_PATTERN = /^(?:(?:https?|mailto|ftp|tel|file|sms):|[^&:/?#]*(?:[/?#]|$))/gi;
/** A pattern that matches safe data URLs. It only matches image, video, and audio types. */
const DATA_URL_PATTERN = /^data:(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm)|audio\/(?:mp3|oga|ogg|opus));base64,[a-z0-9+\/]+=*$/i;
function _sanitizeUrl(url: string): string {
url = String(url);
if (url === "null" || url.length === 0 || url === "about:blank") return "about:blank";
if (url.match(SAFE_URL_PATTERN) || url.match(DATA_URL_PATTERN)) return url;
@josht-jpg
josht-jpg / predicting_sentence_sentiment
Last active September 5, 2020 01:34
Predicting sentence sentiment
def predict_sentences(book, stop_words):
#Break up book into sentences
book_sentences = pd.DataFrame(book.split("."), columns = ['sentence'])
#Clean sentences
book_sentences['sentence'] = book_sentences['sentence'].\
apply(lambda x: clean_labelled(x, stop_words))
book_sentences = book_sentences[book_sentences['sentence'].\
str.len() > 0]
@josht-jpg
josht-jpg / cleaning_labelled
Created September 5, 2020 01:08
Cleaning Labelled
def clean_labelled(sentence, stop_words):
sentence = sentence.lower()
sentence_tokens_clean = nltk.tokenize.RegexpTokenizer(r'\w+').\
tokenize(sentence)
sentence_clean = pd.DataFrame(sentence_tokens_clean, columns = ['word'])
sentence_clean = [w for w in sentence_tokens_clean \
if w not in stop_words]
return sentence_clean
@josht-jpg
josht-jpg / loading_labelled_sentences
Created September 5, 2020 00:30
Loading labelled sentences
labelled_train = pd.read_csv("labelled_train.csv")
labelled_train.columns = ['line', 'sentence', 'score']
labelled_train = pd.read_csv("labelled_test.csv")
labelled_test.columns = ['line', 'sentence', 'score']
@josht-jpg
josht-jpg / sample_afinn
Last active September 4, 2020 20:12
Sample AFINN
books_raw = pd.Series(books_raw)
books = books_raw.apply(get_book_contents)
books_bigrams = books.apply(bigram)
books_afinn = books_bigrams.apply(afinn_context)
books_means = books_afinn.apply(lambda x: x['score'].mean())
@josht-jpg
josht-jpg / books_sample
Created September 4, 2020 20:05
Books Sample
#List the the raw contents of each book
#Each element is a string
books_raw = [owl_creek_bridge_raw,
portrait_of_a_lady_raw,
white_company_raw,
ladies_paradise_raw,
private_memoirs_raw,
master_of_ballantrae_raw,
agathas_husband_raw,
@josht-jpg
josht-jpg / Basic_imports
Created September 2, 2020 20:51
Basic imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
@josht-jpg
josht-jpg / plot_NRC
Last active September 7, 2020 19:36
Plot NRC
def plot_nrc(df, title):
i = 0
j = 0
scores = pd.DataFrame(np.zeros((df.shape[0] // 750, 10)), columns = NRC_sentiments)
while i < df.shape[0] - 750:
scores.iloc[j] = df.loc[i:i + 750, 'anger':'trust'].sum()
i += 750
j += 1
@josht-jpg
josht-jpg / plotting_AFINN_scores
Last active September 2, 2020 20:03
Plotting AFINN scores
def plot_afinn(df, title):
i = 0
scores = []
while i < df.shape[0] - 500:
scores.append(df.iloc[i:i + 500].loc[:, 'score'].sum())
i += 500
plt.plot(scores, c=np.random.rand(3,))
plt.ylabel("AFINN score")
plt.title(title)