Matthew Mayo mmmayo13

## missing-values-basics.py
# Drop the columns where all elements are missing values:
df.dropna(axis=1, how='all')

# Drop the columns where any of the elements are missing values
df.dropna(axis=1, how='any')

# Keep only the rows which contain 2 missing values maximum
df.dropna(thresh=2)

# Drop the columns where any of the elements are missing values

## tic-tac-toe-keras.py
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from keras.models import Sequential
from keras.layers import Dense

## svr.r
## Prepare Scatter Plot with Best Fit Line

#Set Working Directory using setwd("")

#Read Data
data=read.csv("SVM.csv", header=T)
head(data)

#Scatter Plot
plot(data, main ="Scatter Plot")

## twitter-analytics-4.py
# Hashtags & mentions
tag_dict = {}
mention_dict = {}

for i in tweet_df.index:
    tweet_text = tweet_df.ix[i]['Tweet text']
    tweet = tweet_text.lower()
    tweet_tokenized = tweet.split()

    for word in tweet_tokenized:

## descriptive-stats.py
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats

def descriptive_stats(distribution):

	'''
	Compute and present simple descriptive stats for a distribution

	Parameters

## time-series-garch.r
#Install the Ecdat package
install.packages("Ecdat")
#Loading the library and the Garch dataset
library(Ecdat)
mydata=Garch
#Look at the dataset
str(mydata)

#Correct the data types of date and day
#Correcting date fixes it to some arbitrary date such that the trend is same but the mapping is different

## keras-template.py
from keras import models
from keras.layers import Dense, Dropout
from keras.utils import to_categorical
from keras.datasets import mnist
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG

import livelossplot
plot_losses = livelossplot.PlotLossesKeras()

## ahmed-gad-ga.py
import numpy

import GA


"""

The y=target is to maximize this equation ASAP:

## better_text_summarization.py
from collections import Counter
from string import punctuation
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS as stop_words
import spacy

def count_words(tokens):
    word_counts = {}
    for token in tokens:
        if token not in stop_words and token not in punctuation and token is not '\n':
            if token not in word_counts.keys():

## make_wiki_corpus.py
"""
Creates a corpus from Wikipedia dump file.

Inspired by:
https://github.com/panyang/Wikipedia_Word2vec/blob/master/v1/process_wiki.py
"""

import sys
from gensim.corpora import WikiCorpus
	# Drop the columns where all elements are missing values:
	df.dropna(axis=1, how='all')

	# Drop the columns where any of the elements are missing values
	df.dropna(axis=1, how='any')

	# Keep only the rows which contain 2 missing values maximum
	df.dropna(thresh=2)

	# Drop the columns where any of the elements are missing values
	import numpy as np
	import pandas as pd

	from sklearn.preprocessing import LabelEncoder, OneHotEncoder
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import confusion_matrix

	from keras.models import Sequential
	from keras.layers import Dense
	## Prepare Scatter Plot with Best Fit Line

	#Set Working Directory using setwd("")

	#Read Data
	data=read.csv("SVM.csv", header=T)
	head(data)

	#Scatter Plot
	plot(data, main ="Scatter Plot")
	# Hashtags & mentions
	tag_dict = {}
	mention_dict = {}

	for i in tweet_df.index:
	tweet_text = tweet_df.ix[i]['Tweet text']
	tweet = tweet_text.lower()
	tweet_tokenized = tweet.split()

	for word in tweet_tokenized:
	import numpy as np
	import matplotlib.pyplot as plt
	import scipy.stats

	def descriptive_stats(distribution):

	'''
	Compute and present simple descriptive stats for a distribution

	Parameters
	#Install the Ecdat package
	install.packages("Ecdat")
	#Loading the library and the Garch dataset
	library(Ecdat)
	mydata=Garch
	#Look at the dataset
	str(mydata)

	#Correct the data types of date and day
	#Correcting date fixes it to some arbitrary date such that the trend is same but the mapping is different
	from keras import models
	from keras.layers import Dense, Dropout
	from keras.utils import to_categorical
	from keras.datasets import mnist
	from keras.utils.vis_utils import model_to_dot
	from IPython.display import SVG

	import livelossplot
	plot_losses = livelossplot.PlotLossesKeras()
	import numpy

	import GA



	"""

	The y=target is to maximize this equation ASAP:
	from collections import Counter
	from string import punctuation
	from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS as stop_words
	import spacy

	def count_words(tokens):
	word_counts = {}
	for token in tokens:
	if token not in stop_words and token not in punctuation and token is not '\n':
	if token not in word_counts.keys():
	"""
	Creates a corpus from Wikipedia dump file.

	Inspired by:
	https://github.com/panyang/Wikipedia_Word2vec/blob/master/v1/process_wiki.py
	"""

	import sys
	from gensim.corpora import WikiCorpus