Skip to content

Instantly share code, notes, and snippets.

Created August 2, 2012 02:12
Show Gist options
  • Save tphummel/3232509 to your computer and use it in GitHub Desktop.
Save tphummel/3232509 to your computer and use it in GitHub Desktop.
_ = require "underscore"
mysql = require "mysql"
mongo = require "mongodb"
humanize = require "./lib/onpage_humanize"
server = new mongo.Server 'localhost', 27017, {auto_reconnect: true}
db = new mongo.Db 'harvest', server (err, mongo_connection) ->
console.log "err: ", err if err?
mongo_connection.collection 'onpage_events', (err, collection) ->
console.log "err: ", err if err?
connection = mysql.createConnection
database: 'thankyou'
user: 'tipin'
password: 't1p1n_#H0wAr3Y0u_aa!'
host: '' # use for local dev
# host: '' # use for prod/stage
port: 3306
str_query = "SELECT * FROM `onpage_2012_08_01` WHERE id > 174030 AND id <= 592079 AND uuid_code_run IN ('hh005af2-3416-45b1-82ab-678e9168c8e0','cm000000-3416-45b1-82ab-678e9168c8e0') LIMIT 1000;"
ts_start = new Date
row_count = 0
doc_count = 0
max_onpage_id = 0
query = connection.query str_query
query.on 'err', (err) ->
console.log "err: ", err
query.on 'result', (row) ->
row_count += 1
max_onpage_id = if > max_onpage_id
# process.stdout.write "."
# this could be a single value, an array of single values, or an array containing arrays
humanized = humanize row
final_events = []
if _.isArray humanized
humanized = _.flatten humanized
for event in humanized
final_events.push event
final_events.push event
for event in final_events
doc_count += 1
collection.insert row
query.on 'end', ->
ts_end = new Date
elapsed = ts_end - ts_start
summary =
elapsed: elapsed / 1000
row_count: row_count
doc_count: doc_count
max_onpage_id: max_onpage_id
console.log summary
process.exit 0
_ = require "underscore"
MappingFactory = require "./onpage_mappings/mapping_factory"
onpage_humanize = (row) ->
script_version = row.trigger6
map = MappingFactory.create script_version
return null if map.pixel_blacklist? and row.pixel in map.pixel_blacklist
human = {}
# all pixels
for old_key, new_key of map.all_pixels.fields
human[new_key] = row[old_key]
for fn_name, fn of map.all_pixels.transforms
human = fn row, human
# pixel specific
if _.has map.pixel_specific, row.pixel
pixel_map = map.pixel_specific[row.pixel]
if _.has pixel_map, "fields"
for old_key, new_key of pixel_map.fields
human[new_key] = row[old_key]
if _.has pixel_map, "transforms"
for fn_name, fn of pixel_map.transforms
human = fn row, human
module.exports = onpage_humanize
Copy link

tphummel commented Aug 2, 2012

looks like between 5.8 and 6.2 seconds to do 1000 onpage rows (13873 documents). This includes the query, transfer, humanize, and insert. A slight Improvement over what we saw before i think. Next i'm gonna get a breakdown of total time spent on each task (ex: humanize, insert).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment