Teng Peng tengpeng

## SQL-Movie-Rating.sql
/* Delete the tables if they already exist */
drop table if exists Movie;
drop table if exists Reviewer;
drop table if exists Rating;

/* Create the schema for our tables */
create table Movie(mID int, title text, year int, director text);
create table Reviewer(rID int, name text);
create table Rating(rID int, mID int, stars int, ratingDate date);

## SQL-Social-Network.sql
/* Delete the tables if they already exist */
drop table if exists Highschooler;
drop table if exists Friend;
drop table if exists Likes;

/* Create the schema for our tables */
create table Highschooler(ID int, name text, grade int);
create table Friend(ID1 int, ID2 int);
create table Likes(ID1 int, ID2 int);

## gist:fb6809717361319d8bde
function (data, k = 10, scale = T, meth = "weighAvg", distData = NULL)
{
    n <- nrow(data)
    if (!is.null(distData)) {
        distInit <- n + 1
        data <- rbind(data, distData)
    }
    else distInit <- 1
    N <- nrow(data)
    ncol <- ncol(data)

## a.rb
https://gist.github.com/ac2b8cc202712d12595d

## a.rb
import numpy
#data = numpy.random.random(100)
bins = numpy.linspace(15, 100, 5)
group_names = ['1', '2', '3', '4','5']
#digitized = numpy.digitize(df_all['age'], bins)
categories = pd.cut(df_all['age'], bins, labels=group_names)
df['categories'] = pd.cut(df['postTestScore'], bins, labels=group_names)
categories
#bin_means = [data[digitized == i].mean() for i in range(1, len(bins))]
#df_all['age']

## a.rb
import seaborn as sns
sns.set_style("white", {'ytick.major.size': 10.0})
sns.set_context("poster", font_scale=1.1)
income = df_train.MonthlyIncome.dropna()
income = income[income < 20000]
sns.distplot(income, color='#FD5C64')
#df[(df.T != 0).any()]
plt.xlabel('Income')
sns.despine()

## a.rb
pd.concat((train_users, test_users), axis=0, ignore_index=True)

## a.rb
users.drop('id',axis=1, inplace=True)

## a.rb
users.gender.replace('-unknown-', np.nan, inplace=True)

## a.rb
users_nan = (users.isnull().sum() / users.shape[0]) * 100
users_nan[users_nan > 0].drop('country_destination')
	/* Delete the tables if they already exist */
	drop table if exists Movie;
	drop table if exists Reviewer;
	drop table if exists Rating;

	/* Create the schema for our tables */
	create table Movie(mID int, title text, year int, director text);
	create table Reviewer(rID int, name text);
	create table Rating(rID int, mID int, stars int, ratingDate date);
	/* Delete the tables if they already exist */
	drop table if exists Highschooler;
	drop table if exists Friend;
	drop table if exists Likes;

	/* Create the schema for our tables */
	create table Highschooler(ID int, name text, grade int);
	create table Friend(ID1 int, ID2 int);
	create table Likes(ID1 int, ID2 int);
	function (data, k = 10, scale = T, meth = "weighAvg", distData = NULL)
	{
	n <- nrow(data)
	if (!is.null(distData)) {
	distInit <- n + 1
	data <- rbind(data, distData)
	}
	else distInit <- 1
	N <- nrow(data)
	ncol <- ncol(data)
	import numpy
	#data = numpy.random.random(100)
	bins = numpy.linspace(15, 100, 5)
	group_names = ['1', '2', '3', '4','5']
	#digitized = numpy.digitize(df_all['age'], bins)
	categories = pd.cut(df_all['age'], bins, labels=group_names)
	df['categories'] = pd.cut(df['postTestScore'], bins, labels=group_names)
	categories
	#bin_means = [data[digitized == i].mean() for i in range(1, len(bins))]
	#df_all['age']
	import seaborn as sns
	sns.set_style("white", {'ytick.major.size': 10.0})
	sns.set_context("poster", font_scale=1.1)
	income = df_train.MonthlyIncome.dropna()
	income = income[income < 20000]
	sns.distplot(income, color='#FD5C64')
	#df[(df.T != 0).any()]
	plt.xlabel('Income')
	sns.despine()
	users_nan = (users.isnull().sum() / users.shape[0]) * 100
	users_nan[users_nan > 0].drop('country_destination')