Skip to content

Instantly share code, notes, and snippets.

reviews = []
labels = []
for i in tqdm(range(len(encoded_texts))):
if len(encoded_texts[i]) < max_pad_length:
reviews.append(encoded_texts[i])
labels.append(1 if labels_as_list[i] == "positive" else 0)
assert len(reviews) == len(labels), "The labels and feature lists should have the same time"
lengths = [len(i) for i in tqdm(encoded_texts)]
length_as_series = pd.Series(lengths)
plt.title("Probability Density Function for text lengths")
sns.distplot(length_as_series)
max_pad_length = length_as_series.quantile(0.9)
encoder = SpacyEncoder(text_as_list)
#first, encode it
encoded_texts = []
for i in tqdm(range(len(text_as_list))):
encoded_texts.append(encoder.encode(text_as_list[i]))
idx_positive = df[df['labels']=='positive'].index
nbr_to_drop = len(df) - len(idx_positive)
drop_indices = np.random.choice(idx_positive, nbr_to_drop, replace=False)
df = df.drop(drop_indices)
#transform all of it to lists
text_as_list = df['text'].tolist()
labels_as_list = df['labels'].tolist()
#import
df = pd.read_csv('data/reviews.csv')
#drop useless data
df = df.drop(['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator',
'HelpfulnessDenominator', 'Time', 'Summary',], axis=1)
#remove ambiguous 3 and 4 stars for balancing
df = df[df['Score'] != 3]
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchnlp.encoders.text import SpacyEncoder, pad_tensor
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
params = {"ytick.color" : "w",
"xtick.color" : "w",
"axes.labelcolor" : "w",
"axes.edgecolor" : "w"}
plt.rcParams.update(params)
plt.title("IBM Stock prices", color="white")
plt.fill_between(x=idx_pred,
params = {"ytick.color" : "w",
"xtick.color" : "w",
"axes.labelcolor" : "w",
"axes.edgecolor" : "w"}
plt.rcParams.update(params)
plt.title("IBM Stock prices", color="white")
plt.plot(df_pred.index,
df_pred.Close,
y = np.array(df.Close[-750:]).reshape(-1, 1)
under_upper = upper_bound_unscaled > y
over_lower = lower_bound_unscaled < y
total = (under_upper == over_lower)
print("{} our predictions are in our confidence interval".format(np.mean(total)))
future_length=7
sample_nbr=4
ci_multiplier=10
idx_pred, preds_test = pred_stock_future(X_test, future_length, sample_nbr)
pred_mean_unscaled, upper_bound_unscaled, lower_bound_unscaled = get_confidence_intervals(preds_test,
ci_multiplier)