Skip to content

Instantly share code, notes, and snippets.

@tphummel
Created August 2, 2012 02:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tphummel/3232509 to your computer and use it in GitHub Desktop.
Save tphummel/3232509 to your computer and use it in GitHub Desktop.
_ = require "underscore"
mysql = require "mysql"
mongo = require "mongodb"
humanize = require "./lib/onpage_humanize"
server = new mongo.Server 'localhost', 27017, {auto_reconnect: true}
db = new mongo.Db 'harvest', server
db.open (err, mongo_connection) ->
console.log "err: ", err if err?
mongo_connection.collection 'onpage_events', (err, collection) ->
console.log "err: ", err if err?
connection = mysql.createConnection
database: 'thankyou'
user: 'tipin'
password: 't1p1n_#H0wAr3Y0u_aa!'
host: '23.21.69.107' # use for local dev
# host: '10.58.166.230' # use for prod/stage
port: 3306
connection.connect()
str_query = "SELECT * FROM `onpage_2012_08_01` WHERE id > 174030 AND id <= 592079 AND uuid_code_run IN ('hh005af2-3416-45b1-82ab-678e9168c8e0','cm000000-3416-45b1-82ab-678e9168c8e0') LIMIT 1000;"
ts_start = new Date
row_count = 0
doc_count = 0
max_onpage_id = 0
query = connection.query str_query
query.on 'err', (err) ->
console.log "err: ", err
query.on 'result', (row) ->
row_count += 1
max_onpage_id = row.id if row.id > max_onpage_id
# process.stdout.write "."
# this could be a single value, an array of single values, or an array containing arrays
humanized = humanize row
final_events = []
if _.isArray humanized
humanized = _.flatten humanized
for event in humanized
final_events.push event
else
final_events.push event
for event in final_events
doc_count += 1
collection.insert row
query.on 'end', ->
ts_end = new Date
elapsed = ts_end - ts_start
summary =
elapsed: elapsed / 1000
row_count: row_count
doc_count: doc_count
max_onpage_id: max_onpage_id
console.log summary
connection.end()
process.exit 0
_ = require "underscore"
MappingFactory = require "./onpage_mappings/mapping_factory"
onpage_humanize = (row) ->
script_version = row.trigger6
map = MappingFactory.create script_version
return null if map.pixel_blacklist? and row.pixel in map.pixel_blacklist
human = {}
# all pixels
for old_key, new_key of map.all_pixels.fields
human[new_key] = row[old_key]
for fn_name, fn of map.all_pixels.transforms
human = fn row, human
# pixel specific
if _.has map.pixel_specific, row.pixel
pixel_map = map.pixel_specific[row.pixel]
if _.has pixel_map, "fields"
for old_key, new_key of pixel_map.fields
human[new_key] = row[old_key]
if _.has pixel_map, "transforms"
for fn_name, fn of pixel_map.transforms
human = fn row, human
human
module.exports = onpage_humanize
@tphummel
Copy link
Author

tphummel commented Aug 2, 2012

looks like between 5.8 and 6.2 seconds to do 1000 onpage rows (13873 documents). This includes the query, transfer, humanize, and insert. A slight Improvement over what we saw before i think. Next i'm gonna get a breakdown of total time spent on each task (ex: humanize, insert).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment