Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Query vs NX Relationships
# Library Loading
library("RPostgreSQL");
library("car");
# Connect to Database
pgDrv <- dbDriver("PostgreSQL")
dbh <- dbConnect(pgDrv, host="localhost", dbname="dnsmonitor", user="dnsmon", password="tooEasy")
# Retrieve Statistics from DB
stats <- dbGetQuery(dbh, "select client.id, client.ip, sum(queries) as queries, sum(nx) as nx, sum(answers) as answers, sum(errors) as errors, count(distinct day) as days_active
from client
inner join client_stats on client.id = client_stats.client_id
where ip BETWEEN '10.1.0.0' and '10.1.0.255'
group by client.id, client.ip having count(distinct day) > 20")
# Close the Database Connection and free variables
dbDisconnect(dbh)
rm(dbh)
rm(pgDrv)
# Queries will be "x"
x <- stats$queries
x.minusmean = x - mean(x)
x.minusmeansq = x.minusmean ^ 2
# NX Records will be "y"
y <- stats$nx
y.minusmean = y - mean(y)
y.minusmeansq = y.minusmean ^ 2
# Standard Deviations
sd.x = sd(x);
sd.y = sd(y);
# Build the Table as in #5
dns <- data.frame( x, y, row.names = stats$ip )
dns$x_mean <- x.minusmean
dns$x_meansq <- x.minusmeansq
dns$y_mean <- y.minusmean
dns$y_meansq <- y.minusmeansq
# Calculate Cor. Coef. Numerator.
dns$product = x.minusmean * y.minusmean
# Z-Scores
dns$x_zscore = abs(x.minusmean) / sd.x
dns$y_zscore = abs(y.minusmean) / sd.y
# Removing Outliers:
normal <- subset(dns, y_zscore <= 3, select = c( x, y ))
# Scatter Plot
plot( x, y, xlab="Queries", ylab="NX Responses",
main="DNS Queries and NX Responses")
# Regression Line
regression <- lm( y ~ x )
regLine( regression, col="red" )
# Regression line without outliers:
regression_normal <- lm( normal$y ~ normal$x )
regLine( regression_normal, col = "green" )
@reyjrar

This comment has been minimized.

Copy link
Owner Author

commented Jul 8, 2011

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.