This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Drop the columns where all elements are missing values: | |
df.dropna(axis=1, how='all') | |
# Drop the columns where any of the elements are missing values | |
df.dropna(axis=1, how='any') | |
# Keep only the rows which contain 2 missing values maximum | |
df.dropna(thresh=2) | |
# Drop the columns where any of the elements are missing values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from sklearn.preprocessing import LabelEncoder, OneHotEncoder | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import confusion_matrix | |
from keras.models import Sequential | |
from keras.layers import Dense |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Prepare Scatter Plot with Best Fit Line | |
#Set Working Directory using setwd("") | |
#Read Data | |
data=read.csv("SVM.csv", header=T) | |
head(data) | |
#Scatter Plot | |
plot(data, main ="Scatter Plot") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Hashtags & mentions | |
tag_dict = {} | |
mention_dict = {} | |
for i in tweet_df.index: | |
tweet_text = tweet_df.ix[i]['Tweet text'] | |
tweet = tweet_text.lower() | |
tweet_tokenized = tweet.split() | |
for word in tweet_tokenized: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
import scipy.stats | |
def descriptive_stats(distribution): | |
''' | |
Compute and present simple descriptive stats for a distribution | |
Parameters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Install the Ecdat package | |
install.packages("Ecdat") | |
#Loading the library and the Garch dataset | |
library(Ecdat) | |
mydata=Garch | |
#Look at the dataset | |
str(mydata) | |
#Correct the data types of date and day | |
#Correcting date fixes it to some arbitrary date such that the trend is same but the mapping is different |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras import models | |
from keras.layers import Dense, Dropout | |
from keras.utils import to_categorical | |
from keras.datasets import mnist | |
from keras.utils.vis_utils import model_to_dot | |
from IPython.display import SVG | |
import livelossplot | |
plot_losses = livelossplot.PlotLossesKeras() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
import GA | |
""" | |
The y=target is to maximize this equation ASAP: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
from string import punctuation | |
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS as stop_words | |
import spacy | |
def count_words(tokens): | |
word_counts = {} | |
for token in tokens: | |
if token not in stop_words and token not in punctuation and token is not '\n': | |
if token not in word_counts.keys(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Creates a corpus from Wikipedia dump file. | |
Inspired by: | |
https://github.com/panyang/Wikipedia_Word2vec/blob/master/v1/process_wiki.py | |
""" | |
import sys | |
from gensim.corpora import WikiCorpus |
NewerOlder