Manos Parzakonis IronistM

## ip2Int.R
ip2Int <- function(ip){
  split <- as.numeric(strsplit(ip, "\\.")[[1]])
  out <- (split[1] * 256^3) + (split[2] * 256^2) + (split[3] * 256) + (split[4])
  return(out)
}


int2Ip <- function(int){
  split <- NULL
  split[1] <- as.integer(int/256^3)

## rsitecatalyst-anomaly-detection-plot.R
#Plot data using ggplot2
library(ggplot2)

#Combine year/month/day together into POSIX
pageviews_w_forecast$date <- ISOdate(pageviews_w_forecast$year, pageviews_w_forecast$month, pageviews_w_forecast$day)

#Convert columns to numeric
pageviews_w_forecast$pageviews <- as.numeric(pageviews_w_forecast$pageviews)
pageviews_w_forecast$pageviews_upper <- as.numeric(pageviews_w_forecast$pageviews_upper)
pageviews_w_forecast$pageviews_lower <- as.numeric(pageviews_w_forecast$pageviews_lower)

## QlikView_Calculate_Age_Group.qvs
LOAD
if(age<16, 'Under 16',
     if(age>65, 'Over 65',
          text(16+(Div(age-16,10)*10)) & ' - ' & text(25+(Div(age-16,10)*10)))) as age_group

## QlikView_Data_Sampling.qvs
// in this case we want to load a 10% sampling set of the data
data:
LOAD ….<your fields>…
FROM <your QVD file>
WHERE ceil(rand() * 100) <= 10;  // or: rand() <= 0.1

// SAMPLE n LOAD is much faster but allocates the memory of the full data set during the LOAD
// Where rand() <= n allocates only the memory of the smaller result set which could be crucial with Big Data..

## 0_reuse_code.js
// Use Gists to store code you would like to remember later on
console.log(window); // log the "window" object to the console

## anova.r
ANOVA<-function(fit1,fit2){
  temp <- anova(fit2,fit1 )
  fin.aov <- anova(fit1)
  reg <- temp[2,2:6]
  rownames(reg) <- "Regression"
  reg[1,1:2] <- reg[1,2:3]
  reg[1,3] <- reg[1,2]/reg[1,1]
  colnames(reg) <- colnames(fin.aov)
  res <- fin.aov[tail(nrow(fin.aov),1),]
  tot <- cbind(reg[1,1:2]+res[1,1:2],NA,NA,NA)

## anomaly_detection.r
suppressMessages(library(forecast))

data<-read.csv( file('stdin') )

anomaly_detection<-function(data){

  seasonality<-48
  data_series<-ts(data$count,frequency=seasonality)

  train_start<-1 ## train on 1 month of data

## leg_violence_predict.R
####################
# Create relogit predicted probabilities using Zelig and ggplot2
# Two Sword Lengths: Losers' Consent and Violence in National Legislatures (Working Paper 2012)
# Christopher Gandrud
# Updated 26 April 2012
###################

## Load required packages
library(RCurl)
library(Zelig)

## regression_with_dates.R
N <- 10
id  <-  1:10
x  <- 1 + rnorm(N) - 1*id
date  <- seq(as.Date("2013-07-01"), by = "year", along = x)
df  <- data.frame(x = x, date = date)
plot(df$date, df$x)
summary(lm(x ~ date, data = df))


## DNS-Query-NX.r
# Library Loading
library("RPostgreSQL");
library("car");

# Connect to Database
pgDrv <- dbDriver("PostgreSQL")
dbh <- dbConnect(pgDrv, host="localhost", dbname="dnsmonitor", user="dnsmon", password="tooEasy")

# Retrieve Statistics from DB
stats <- dbGetQuery(dbh, "select client.id, client.ip, sum(queries) as queries, sum(nx) as nx, sum(answers) as answers, sum(errors) as errors, count(distinct day) as days_active
	ip2Int <- function(ip){
	split <- as.numeric(strsplit(ip, "\\.")[[1]])
	out <- (split[1] * 256^3) + (split[2] * 256^2) + (split[3] * 256) + (split[4])
	return(out)
	}


	int2Ip <- function(int){
	split <- NULL
	split[1] <- as.integer(int/256^3)
	#Plot data using ggplot2
	library(ggplot2)

	#Combine year/month/day together into POSIX
	pageviews_w_forecast$date <- ISOdate(pageviews_w_forecast$year, pageviews_w_forecast$month, pageviews_w_forecast$day)

	#Convert columns to numeric
	pageviews_w_forecast$pageviews <- as.numeric(pageviews_w_forecast$pageviews)
	pageviews_w_forecast$pageviews_upper <- as.numeric(pageviews_w_forecast$pageviews_upper)
	pageviews_w_forecast$pageviews_lower <- as.numeric(pageviews_w_forecast$pageviews_lower)
	LOAD
	if(age<16, 'Under 16',
	if(age>65, 'Over 65',
	text(16+(Div(age-16,10)10)) & ' - ' & text(25+(Div(age-16,10)10)))) as age_group
	// in this case we want to load a 10% sampling set of the data
	data:
	LOAD ….<your fields>…
	FROM <your QVD file>
	WHERE ceil(rand() * 100) <= 10; // or: rand() <= 0.1

	// SAMPLE n LOAD is much faster but allocates the memory of the full data set during the LOAD
	// Where rand() <= n allocates only the memory of the smaller result set which could be crucial with Big Data..
	// Use Gists to store code you would like to remember later on
	console.log(window); // log the "window" object to the console
	ANOVA<-function(fit1,fit2){
	temp <- anova(fit2,fit1 )
	fin.aov <- anova(fit1)
	reg <- temp[2,2:6]
	rownames(reg) <- "Regression"
	reg[1,1:2] <- reg[1,2:3]
	reg[1,3] <- reg[1,2]/reg[1,1]
	colnames(reg) <- colnames(fin.aov)
	res <- fin.aov[tail(nrow(fin.aov),1),]
	tot <- cbind(reg[1,1:2]+res[1,1:2],NA,NA,NA)
	suppressMessages(library(forecast))

	data<-read.csv( file('stdin') )

	anomaly_detection<-function(data){

	seasonality<-48
	data_series<-ts(data$count,frequency=seasonality)

	train_start<-1 ## train on 1 month of data
	####################
	# Create relogit predicted probabilities using Zelig and ggplot2
	# Two Sword Lengths: Losers' Consent and Violence in National Legislatures (Working Paper 2012)
	# Christopher Gandrud
	# Updated 26 April 2012
	###################

	## Load required packages
	library(RCurl)
	library(Zelig)
	N <- 10
	id <- 1:10
	x <- 1 + rnorm(N) - 1*id
	date <- seq(as.Date("2013-07-01"), by = "year", along = x)
	df <- data.frame(x = x, date = date)
	plot(df$date, df$x)
	summary(lm(x ~ date, data = df))
	# Library Loading
	library("RPostgreSQL");
	library("car");

	# Connect to Database
	pgDrv <- dbDriver("PostgreSQL")
	dbh <- dbConnect(pgDrv, host="localhost", dbname="dnsmonitor", user="dnsmon", password="tooEasy")

	# Retrieve Statistics from DB
	stats <- dbGetQuery(dbh, "select client.id, client.ip, sum(queries) as queries, sum(nx) as nx, sum(answers) as answers, sum(errors) as errors, count(distinct day) as days_active