Skip to content

Instantly share code, notes, and snippets.

@infectious
Created August 16, 2012 13:10
Show Gist options
  • Save infectious/3370005 to your computer and use it in GitHub Desktop.
Save infectious/3370005 to your computer and use it in GitHub Desktop.
require 'ipaddr'
parameter :today do
2.day.ago.to_date
end
helper :logs_union do
((today - 30.days)..today).map do |date|
"""
SELECT ip
FROM logs.#{date.ymd}
"""
end.join(" UNION ")
end
extract :RDW do
with_sql """
SELECT DISTINCT logs.ip
FROM (#{logs_union}) logs
LEFT OUTER JOIN apn.ips ON ips.ip = logs.ip
WHERE ips.ip IS NULL
limit 1
"""
end
transform do |row|
row=row.values # make row an array of single value - ip address
ip=row[0]
response = Nop.io(:GDS).get(ip.to_s) rescue nil
next unless response
ipinfo = response['ipinfo']
network = ipinfo['Network'] || {}
ip_data = [
ip,
ipinfo['Location'].try(:[], 'CityData').try(:[], 'postal_code'),
network['connection_type'],
network['line_speed'],
network['OrganizationData'].try(:[], 'organization_type'),
network['OrganizationData'].try(:[], 'home'),
network['carrier'],
today
]
# TODO remove this after fix in Nop loader
ip_data.map! {|v| v.respond_to?(:gsub) ? v.gsub('"', "'") : v }
# replace the IP data to the +row+ array
row.replace(ip_data)
p row
end
# reject the rows for which IP data retrieval failed and row wasn't appended
transform :reject do |row|
p row
row.size == 1
end
load :ARC do
into "appnexus/#{today.ym}/ips_#{today.ymd}.txt.gz"
separator "\t"
force true
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment