Skip to content

Instantly share code, notes, and snippets.

View tengpeng's full-sized avatar

Teng Peng tengpeng

View GitHub Profile
@tengpeng
tengpeng / SQL-Movie-Rating.sql
Last active September 14, 2015 03:06
My answers to SQL exercises for db-class.org /Part 1/
/* Delete the tables if they already exist */
drop table if exists Movie;
drop table if exists Reviewer;
drop table if exists Rating;
/* Create the schema for our tables */
create table Movie(mID int, title text, year int, director text);
create table Reviewer(rID int, name text);
create table Rating(rID int, mID int, stars int, ratingDate date);
@tengpeng
tengpeng / SQL-Social-Network.sql
Last active September 14, 2015 21:49
My answers to SQL exercises for db-class.org /Part 2/
/* Delete the tables if they already exist */
drop table if exists Highschooler;
drop table if exists Friend;
drop table if exists Likes;
/* Create the schema for our tables */
create table Highschooler(ID int, name text, grade int);
create table Friend(ID1 int, ID2 int);
create table Likes(ID1 int, ID2 int);
function (data, k = 10, scale = T, meth = "weighAvg", distData = NULL)
{
n <- nrow(data)
if (!is.null(distData)) {
distInit <- n + 1
data <- rbind(data, distData)
}
else distInit <- 1
N <- nrow(data)
ncol <- ncol(data)
https://gist.github.com/ac2b8cc202712d12595d
@tengpeng
tengpeng / a.rb
Created January 31, 2016 03:17
test
import numpy
#data = numpy.random.random(100)
bins = numpy.linspace(15, 100, 5)
group_names = ['1', '2', '3', '4','5']
#digitized = numpy.digitize(df_all['age'], bins)
categories = pd.cut(df_all['age'], bins, labels=group_names)
df['categories'] = pd.cut(df['postTestScore'], bins, labels=group_names)
categories
#bin_means = [data[digitized == i].mean() for i in range(1, len(bins))]
#df_all['age']
@tengpeng
tengpeng / a.rb
Created January 31, 2016 22:46
hist python
import seaborn as sns
sns.set_style("white", {'ytick.major.size': 10.0})
sns.set_context("poster", font_scale=1.1)
income = df_train.MonthlyIncome.dropna()
income = income[income < 20000]
sns.distplot(income, color='#FD5C64')
#df[(df.T != 0).any()]
plt.xlabel('Income')
sns.despine()
@tengpeng
tengpeng / a.rb
Created January 31, 2016 22:47
concat py
pd.concat((train_users, test_users), axis=0, ignore_index=True)
@tengpeng
tengpeng / a.rb
Created January 31, 2016 22:47
drop py
users.drop('id',axis=1, inplace=True)
@tengpeng
tengpeng / a.rb
Created January 31, 2016 22:58
replace NA py
users.gender.replace('-unknown-', np.nan, inplace=True)
@tengpeng
tengpeng / a.rb
Created January 31, 2016 22:59
drop NA py
users_nan = (users.isnull().sum() / users.shape[0]) * 100
users_nan[users_nan > 0].drop('country_destination')