Skip to content

Instantly share code, notes, and snippets.

@justsml
Last active January 15, 2020 18:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save justsml/75c0e2fe54646a6cd5a85a0d1049b382 to your computer and use it in GitHub Desktop.
Save justsml/75c0e2fe54646a6cd5a85a0d1049b382 to your computer and use it in GitHub Desktop.
For SailsJS.
var ctx = {};
var _ = require('lodash');
var assert = require('assert');
var Promise = require('bluebird');
var mongodb = Promise.promisifyAll(require('mongodb'));
var MongoClient = Promise.promisifyAll(mongodb.MongoClient);
var Collection = Promise.promisifyAll(mongodb.Collection);
const MONGO_BOOLEAN_TYPE = { 'type': 'Boolean', default: null };
const MONGO_NUMBER_TYPE = { 'type': 'Number', default: null };
const MONGO_STRING_TYPE = { 'type': 'String', default: null };
const MONGO_ENUM_TYPE = { 'type': 'String', default: null, enum: [] };
const MONGO_ARRAY_TYPE = { 'type': 'Array', default: null };
const MONGO_OBJECT_TYPE = { 'type': 'Object', default: null };
const MONGO_DATE_TYPE = { 'type': 'Date', default: null };
const MONGO_DEFAULT_TYPE = MONGO_BOOLEAN_TYPE;
// Connection URL
var url = 'mongodb://localhost:27017/dbname';
// Use connect method to connect to the Server
var results = Promise
.resolve(url)
// .bind(ctx)
.then(url => MongoClient.connectAsync(url))
.then(db => {
ctx.db = db;
return db;
})
.then(db => db.collectionAsync('products'))
.then(col => {
// col.find({}).toArray().then(console.log.bind(console, 'db fns'));
return Promise.resolve(buildSchema('products', col));
})
.catch(err => console.error('MongoErrr!!!!!!', err, err.stack))
.then(() => {
!(ctx && ctx.db && ctx.db.close());
// Should be Exiting...
});
function buildSchema(name, collection) {
console.warn('funcs', _.functionsIn(ctx.db.collection('products')).sort().join(', '));
ctx.db.collection('products').listIndexes().toArray()
.then(console.log.bind(console, 'collection.indexes'));
var detectedSchema = {'_uniques': {}, '_totalRecords': 0};
return collection
.find({})
.limit(500)
.toArray()
.then(docs => {
return docs.reduce(evaluateSchemaLevel, detectedSchema);
})
.then(condenseSchemaLevel)
.then(genSchema => {
console.log('genSchema', genSchema);
return [genSchema];
})
}
function evaluateSchemaLevel(schema, obj) {
schema = schema || {};
schema._uniques = schema._uniques || {};
schema._totalRecords = schema._totalRecords === undefined ? 0 : schema._totalRecords;
Object.keys(obj)
.forEach(key => {
schema._uniques[key] = schema._uniques[key] || new Set();
schema._uniques[key].add(obj[key]);
schema._totalRecords += 1;
schema[key] = checkUpgradeType({schema, currentType: schema[key], currentValue: obj[key], key: key});
})
return schema;
}
function condenseSchemaLevel(schema) {
// cleanup the schema
Object.keys(schema._uniques)
.map(k => {
//TODO: Add null counter to prevent false-positive enum detections
let setToEnumLimit = (schema._totalRecords * 0.5);// 5% default
if (['number', 'string'].indexOf(schema[k].type) > -1 && schema._uniques[k].size <= setToEnumLimit) {
schema[k] = MONGO_ENUM_TYPE;
schema[k].enum = Array.from(schema._uniques[k]).sort();
console.log(`Enumified ${k}=${schema[k].enum.join(', ')}`);
} else {
schema._uniques[k] = null;//Array.from(schema._uniques[k]).sort().join(', '); //temp for debugging// set to null or remove later
}
})
return schema;
}
const priority = [MONGO_BOOLEAN_TYPE, MONGO_NUMBER_TYPE, MONGO_STRING_TYPE, MONGO_ARRAY_TYPE, MONGO_OBJECT_TYPE, MONGO_DATE_TYPE, MONGO_ENUM_TYPE];
function guessTypeSimple({currentType, currentValue}) {
if (currentValue === null || currentValue === '') {
return currentType || MONGO_DEFAULT_TYPE;
} else if (typeof(currentValue) === 'boolean') {
return MONGO_BOOLEAN_TYPE;
} else if (typeof(currentValue) === 'number') {
return MONGO_NUMBER_TYPE;
} else if (_.isDate(currentValue)) {
return MONGO_DATE_TYPE;
} else if (typeof(currentValue) === 'string') {
// double check if it's really number-ish
if (_.isNumber(currentValue).toString() === currentValue) { return MONGO_NUMBER_TYPE; }
return MONGO_STRING_TYPE;
} else if (Array.isArray(currentValue)) {
return MONGO_ARRAY_TYPE;
} else if (typeof(currentValue) === 'object') {
return MONGO_OBJECT_TYPE;
} else if (typeof(currentValue) === 'string') {
return MONGO_STRING_TYPE;
}
}
function checkUpgradeType({currentType, currentValue, key, schema}) {
var typeGuess = guessTypeSimple({currentType, currentValue});
// console.log(`Guessed type for ${key}=${typeGuess.type}`);
if (typeof(currentValue) === 'object' && currentValue.toString() === '[object Object]' && Object.keys(currentValue).length >= 2) {
return evaluateSchemaLevel(schema[key], currentValue)
}
if (priority.indexOf(typeGuess) >= priority.indexOf(currentType)) {
return typeGuess;
} else {
return currentType;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment