josht-jpg

## KNN-linear-alg.rs
trait LinearAlg<T>
where
    T: Add + Sub,
{
    fn dot(&self, w: &[T]) -> T;

    fn subtract(&self, w: &[T]) -> Vec<T>;

    fn sum_of_squares(&self) -> T;

## URL-sanitizer
const SAFE_URL_PATTERN = /^(?:(?:https?|mailto|ftp|tel|file|sms):|[^&:/?#]*(?:[/?#]|$))/gi;

/** A pattern that matches safe data URLs. It only matches image, video, and audio types. */
const DATA_URL_PATTERN = /^data:(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm)|audio\/(?:mp3|oga|ogg|opus));base64,[a-z0-9+\/]+=*$/i;

function _sanitizeUrl(url: string): string {
  url = String(url);
  if (url === "null" || url.length === 0 || url === "about:blank") return "about:blank";
  if (url.match(SAFE_URL_PATTERN) || url.match(DATA_URL_PATTERN)) return url;

## predicting_sentence_sentiment
def predict_sentences(book, stop_words):
    #Break up book into sentences
    book_sentences = pd.DataFrame(book.split("."), columns = ['sentence'])

    #Clean sentences
    book_sentences['sentence'] = book_sentences['sentence'].\
        apply(lambda x: clean_labelled(x, stop_words))

    book_sentences = book_sentences[book_sentences['sentence'].\
                                    str.len() > 0]

## cleaning_labelled
def clean_labelled(sentence, stop_words):
    sentence = sentence.lower()
    sentence_tokens_clean = nltk.tokenize.RegexpTokenizer(r'\w+').\
                                                tokenize(sentence)

    sentence_clean = pd.DataFrame(sentence_tokens_clean, columns = ['word'])
    sentence_clean = [w for w in sentence_tokens_clean \
                      if w not in stop_words]

    return sentence_clean

## loading_labelled_sentences
labelled_train = pd.read_csv("labelled_train.csv")
labelled_train.columns = ['line', 'sentence', 'score']

labelled_train = pd.read_csv("labelled_test.csv")
labelled_test.columns = ['line', 'sentence', 'score']

## sample_afinn
books_raw = pd.Series(books_raw)

books = books_raw.apply(get_book_contents)

books_bigrams = books.apply(bigram)

books_afinn = books_bigrams.apply(afinn_context)

books_means = books_afinn.apply(lambda x: x['score'].mean())

## books_sample
#List the the raw contents of each book
#Each element is a string

books_raw = [owl_creek_bridge_raw,
            portrait_of_a_lady_raw,
            white_company_raw,
            ladies_paradise_raw,
            private_memoirs_raw,
            master_of_ballantrae_raw,
            agathas_husband_raw,

## Basic_imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")

## plot_NRC
def plot_nrc(df, title):
    i = 0
    j = 0
    scores = pd.DataFrame(np.zeros((df.shape[0] // 750, 10)), columns = NRC_sentiments)

    while i < df.shape[0] - 750:
        scores.iloc[j] = df.loc[i:i + 750, 'anger':'trust'].sum()
        i += 750
        j += 1


## plotting_AFINN_scores
def plot_afinn(df, title):
    i = 0
    scores = []
    while i < df.shape[0] - 500:
        scores.append(df.iloc[i:i + 500].loc[:, 'score'].sum())
        i += 500

    plt.plot(scores, c=np.random.rand(3,))
    plt.ylabel("AFINN score")
    plt.title(title)
	trait LinearAlg<T>
	where
	T: Add + Sub,
	{
	fn dot(&self, w: &[T]) -> T;

	fn subtract(&self, w: &[T]) -> Vec<T>;

	fn sum_of_squares(&self) -> T;
	const SAFE_URL_PATTERN = /^(?:(?:https?\|mailto\|ftp\|tel\|file\|sms):\|[^&:/?#]*(?:[/?#]\|$))/gi;

	/** A pattern that matches safe data URLs. It only matches image, video, and audio types. */
	const DATA_URL_PATTERN = /^data:(?:image\/(?:bmp\|gif\|jpeg\|jpg\|png\|tiff\|webp)\|video\/(?:mpeg\|mp4\|ogg\|webm)\|audio\/(?:mp3\|oga\|ogg\|opus));base64,[a-z0-9+\/]+=*$/i;

	function _sanitizeUrl(url: string): string {
	url = String(url);
	if (url === "null" \|\| url.length === 0 \|\| url === "about:blank") return "about:blank";
	if (url.match(SAFE_URL_PATTERN) \|\| url.match(DATA_URL_PATTERN)) return url;
	def predict_sentences(book, stop_words):
	#Break up book into sentences
	book_sentences = pd.DataFrame(book.split("."), columns = ['sentence'])

	#Clean sentences
	book_sentences['sentence'] = book_sentences['sentence'].\
	apply(lambda x: clean_labelled(x, stop_words))

	book_sentences = book_sentences[book_sentences['sentence'].\
	str.len() > 0]
	def clean_labelled(sentence, stop_words):
	sentence = sentence.lower()
	sentence_tokens_clean = nltk.tokenize.RegexpTokenizer(r'\w+').\
	tokenize(sentence)

	sentence_clean = pd.DataFrame(sentence_tokens_clean, columns = ['word'])
	sentence_clean = [w for w in sentence_tokens_clean \
	if w not in stop_words]

	return sentence_clean
	labelled_train = pd.read_csv("labelled_train.csv")
	labelled_train.columns = ['line', 'sentence', 'score']

	labelled_train = pd.read_csv("labelled_test.csv")
	labelled_test.columns = ['line', 'sentence', 'score']
	books_raw = pd.Series(books_raw)

	books = books_raw.apply(get_book_contents)

	books_bigrams = books.apply(bigram)

	books_afinn = books_bigrams.apply(afinn_context)

	books_means = books_afinn.apply(lambda x: x['score'].mean())
	#List the the raw contents of each book
	#Each element is a string

	books_raw = [owl_creek_bridge_raw,
	portrait_of_a_lady_raw,
	white_company_raw,
	ladies_paradise_raw,
	private_memoirs_raw,
	master_of_ballantrae_raw,
	agathas_husband_raw,
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	sns.set(style="darkgrid")
	def plot_nrc(df, title):
	i = 0
	j = 0
	scores = pd.DataFrame(np.zeros((df.shape[0] // 750, 10)), columns = NRC_sentiments)

	while i < df.shape[0] - 750:
	scores.iloc[j] = df.loc[i:i + 750, 'anger':'trust'].sum()
	i += 750
	j += 1
	def plot_afinn(df, title):
	i = 0
	scores = []
	while i < df.shape[0] - 500:
	scores.append(df.iloc[i:i + 500].loc[:, 'score'].sum())
	i += 500

	plt.plot(scores, c=np.random.rand(3,))
	plt.ylabel("AFINN score")
	plt.title(title)