Skip to content

Instantly share code, notes, and snippets.

@reconbot
Created October 19, 2018 14:11
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save reconbot/80b345902fc6115a49308ea38d97b43f to your computer and use it in GitHub Desktop.
This dumps every table in every mysql db to a file in ndjson format for loading into bigquery
const { pipe, transform } = require('bluestream')
const mysql = require('mysql')
const { createWriteStream } = require('fs')
const connection = mysql.createConnection({
socketPath: '/var/run/mysqld/mysqld.sock',
user: 'root',
password: 'root',
dateStrings: true,
})
const asyncQuery = (...args) => {
return new Promise((resolve, reject) => {
connection.query(...args, (error, data) => (error ? reject(error) : resolve(data)))
})
}
async function getTableIds() {
const rows = await asyncQuery('SELECT table_schema, table_name FROM information_schema.tables')
return rows
.filter(({ table_schema: db }) => db !== 'test' && db !== 'vividcortex' && db !== 'information_schema')
.map(({ table_schema: db, table_name: tableName }) => `${db}.${tableName}`)
}
async function writeTableToFile(tableID) {
console.log(`Loading ${tableID}`)
const query = connection.query(`SELECT * from ${tableID}`).stream({ highWaterMark: 100 })
const serialize = transform(row => JSON.stringify(row) + '\n')
const file = createWriteStream(`./${tableID}.ndjson`)
await pipe(
query,
serialize,
file
)
console.log(`Done loading ${tableID}`)
}
async function run() {
connection.connect()
const tableIds = await getTableIds()
for (const tableId of tableIds) {
await writeTableToFile(tableId)
}
connection.end()
}
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment