Chelsy Xie chelsyx

## agg_prob_vs_percentile.R
library(sn)
library(tidyverse)
perc.rank <- function(x) trunc(rank(x))/length(x)

# Create a skewed normal distribution
X <- seq(-1, 2, 0.001)
dist <- dsn(X, xi = 0.1, omega = 0.3, alpha = 5)
dist <- dist[dist>=0&dist<=1]

# Create dataset with scaled propensity (score) and group

## opening-and-closing-an-ssh-tunnel-in-a-shell-script-the-smart-way.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                chelsyx
                / opening-and-closing-an-ssh-tunnel-in-a-shell-script-the-smart-way.md
            
            
              Created
              July 26, 2017 00:52
                — forked from scy/opening-and-closing-an-ssh-tunnel-in-a-shell-script-the-smart-way.md
            
              
                Opening and closing an SSH tunnel in a shell script the smart way
              
          
    Opening and closing an SSH tunnel in a shell script the smart way

I recently had the following problem:

From an unattended shell script (called by Jenkins), run a command-line tool that accesses the MySQL database on another host.
That tool doesn't know that the database is on another host, plus the MySQL port on that host is firewalled and not accessible from other machines.

We didn't want to open the MySQL port to the network, but it's possible to SSH from the Jenkins machine to the MySQL machine. So, basically you would do something like
ssh -L 3306:localhost:3306 remotehost

  
## upgrade_packages.R
# WMF only:
if (file.exists("/etc/wikimedia-cluster")) {
  message('Detected that this script is being run on a WMF machine ("', Sys.info()["nodename"], '"). Setting proxies...')
  Sys.setenv("http_proxy" = "http://webproxy.eqiad.wmnet:8080")
  Sys.setenv("https_proxy" = "http://webproxy.eqiad.wmnet:8080")
}

# General use:
message("Checking for a personal library...")
if (!dir.exists(Sys.getenv("R_LIBS_USER"))) {

## income.R
# Read Data
library(data.table)
train0 <- fread("adult.data.txt", na.strings="?", stringsAsFactors = T)
test0 <- fread("adult.test.txt", na.strings="?", stringsAsFactors = T)
var_name <- c("age","workclass","fnlwgt","education","education-num","marital-status","occupation","relationship",
              "race","sex","capital-gain","capital-loss","hours-per-week","native-country","income")
colnames(train0) <- var_name
colnames(test0) <- var_name

# Remove duplicates

## git-up-equivalent.txt
git pull --rebase --autostash

## hero_driver.R
#####
# Goal:
# 1) suggest cutoff(no. of trips), and bonus($)
# 2) Expected no of additional trips
# 3) Total expenditure of the promotion
# 4) Other metrics to pay attention to
####

library(dplyr)
library(ggplot2)

## linkedin_resp_rate.R
library(data.table)
library(bit64)
library(plyr)
library(dplyr)
library(Hmisc)
library(mice)
library(vcd)
library(ggplot2)

data0 <- fread("INMAIL_MESSAGE_DATASET.csv")

## PandasTour.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                chelsyx
                / PandasTour.ipynb
            
            
              Created
              April 20, 2016 21:54
                — forked from wesm/PandasTour.ipynb
            
              
                Pandas Tour
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## plot.stacked.2.R
#plot.stacked makes a stacked plot where each y series is plotted on top
#of the each other using filled polygons
#
#Arguments include:
#'x' - a vector of values
#'y' - a matrix of data series (columns) corresponding to x
#'order.method' = c("as.is", "max", "first")
#  "as.is" - plot in order of y column
#  "max" - plot in order of when each y series reaches maximum value
#  "first" - plot in order of when each y series first value > 0

## bnlearn.R
library(bnlearn)
require(Rgraphviz)

spec = "[req_date][category_id|req_date:location_id][location_id|req_date][quo_dum|req_date:category_id:location_id]"
net = model2network(spec)
class(net)
# graphviz.plot(net, shape = "ellipse")


train <- allData[,c("req_date","category_id","location_id","quo_dum")]
	library(sn)
	library(tidyverse)
	perc.rank <- function(x) trunc(rank(x))/length(x)

	# Create a skewed normal distribution
	X <- seq(-1, 2, 0.001)
	dist <- dsn(X, xi = 0.1, omega = 0.3, alpha = 5)
	dist <- dist[dist>=0&dist<=1]

	# Create dataset with scaled propensity (score) and group
	# WMF only:
	if (file.exists("/etc/wikimedia-cluster")) {
	message('Detected that this script is being run on a WMF machine ("', Sys.info()["nodename"], '"). Setting proxies...')
	Sys.setenv("http_proxy" = "http://webproxy.eqiad.wmnet:8080")
	Sys.setenv("https_proxy" = "http://webproxy.eqiad.wmnet:8080")
	}

	# General use:
	message("Checking for a personal library...")
	if (!dir.exists(Sys.getenv("R_LIBS_USER"))) {
	# Read Data
	library(data.table)
	train0 <- fread("adult.data.txt", na.strings="?", stringsAsFactors = T)
	test0 <- fread("adult.test.txt", na.strings="?", stringsAsFactors = T)
	var_name <- c("age","workclass","fnlwgt","education","education-num","marital-status","occupation","relationship",
	"race","sex","capital-gain","capital-loss","hours-per-week","native-country","income")
	colnames(train0) <- var_name
	colnames(test0) <- var_name

	# Remove duplicates
	#####
	# Goal:
	# 1) suggest cutoff(no. of trips), and bonus($)
	# 2) Expected no of additional trips
	# 3) Total expenditure of the promotion
	# 4) Other metrics to pay attention to
	####

	library(dplyr)
	library(ggplot2)
	library(data.table)
	library(bit64)
	library(plyr)
	library(dplyr)
	library(Hmisc)
	library(mice)
	library(vcd)
	library(ggplot2)

	data0 <- fread("INMAIL_MESSAGE_DATASET.csv")
	#plot.stacked makes a stacked plot where each y series is plotted on top
	#of the each other using filled polygons
	#
	#Arguments include:
	#'x' - a vector of values
	#'y' - a matrix of data series (columns) corresponding to x
	#'order.method' = c("as.is", "max", "first")
	# "as.is" - plot in order of y column
	# "max" - plot in order of when each y series reaches maximum value
	# "first" - plot in order of when each y series first value > 0
	library(bnlearn)
	require(Rgraphviz)

	spec = "[req_date][category_id\|req_date:location_id][location_id\|req_date][quo_dum\|req_date:category_id:location_id]"
	net = model2network(spec)
	class(net)
	# graphviz.plot(net, shape = "ellipse")


	train <- allData[,c("req_date","category_id","location_id","quo_dum")]