Skip to content

Instantly share code, notes, and snippets.

@PandaWhisperer
Created January 20, 2015 06:48
Show Gist options
  • Save PandaWhisperer/3576290fef258f8f0f5a to your computer and use it in GitHub Desktop.
Save PandaWhisperer/3576290fef258f8f0f5a to your computer and use it in GitHub Desktop.
var async= require('async'),
cv= require('opencv'),
daoUtil= require('../dao/util'),
skybio= require('../lib/skybio'),
kairos= require('../lib/kairos'),
fre= require('../lib/fre'),
_= require('underscore'),
stats= require('stats-lite'),
fs = require('fs'),
request = require('request'),
uuid= require('node-uuid').v4,
done= console.log,
util= require('../lib/util');
var s3= require('../lib/s3'),
db= require('../lib/dynamo').db,
pkBucket= s3('photokharma-files'),
bucket= s3('photokharma-users');
var user_id= 'af0a8a88-f560-45db-b8a2-a47c29b49624',
//user_id= '4267ad25-48cc-4ad3-ad1f-2496171d4ed6',
//dir_out= '/home/ec2-user/skybio_exp/' + user_id + '/',
dir_out= '/Users/agr/Documents/sm14/photokharma/tmp/skybio/' + user_id + '/',
namespace= 'clustering_5';
var context= {};
async.waterfall([
function(done)
{
if(!fs.existsSync(dir_out))
fs.mkdirSync(dir_out);
done();
},
function(done)
{
return done();
db.face.queryIndex(['user_id','face_id'], user_id,
function (err, faces)
{
if (err) return done(err);
context.pkFaces= faces;
console.log('faces.length=' + faces.length);
done();
}, ['photo_id','face_id','hash','drop_reason','facebook','facebook_tag_date','facebook_tag_id','facebook_tag_name']);
},
function(done)
{
return done();
async.eachLimit(_.range(context.pkFaces.length), 1,
function(idx, done)
{
var face= context.pkFaces[idx];
if(!face.drop_reason)
{
pkBucket.getBuffer(['face',face.hash+'.jpg'],
function(err, buff)
{
if(err) return done(err);
fs.writeFileSync(dir_out + face.hash + '.jpg', buff);
fs.writeFileSync(dir_out + face.hash + '.json', JSON.stringify(face));
done();
});
}
else done();
}, done);
},
function(done)
{
var fileNames= fs.readdirSync(dir_out);
fileNames= _.filter(fileNames, function(file_name) { return file_name.indexOf('.json') != -1; });
context.fileNames= fileNames;
dir_out = dir_out + 'skybio/';
if(!fs.existsSync(dir_out))
fs.mkdirSync(dir_out);
if(!fs.existsSync(dir_out+'responses/'))
fs.mkdirSync(dir_out+'responses/');
if(!fs.existsSync(dir_out+'low_confidence/'))
fs.mkdirSync(dir_out+'low_confidence/');
if(!fs.existsSync(dir_out+'too_small/'))
fs.mkdirSync(dir_out+'too_small/');
done();
},
function(done)
{
return done();
// detect and add faces
async.eachLimit(_.range(context.fileNames.length), 1,
function(idx, done)
{
var dbface= JSON.parse(fs.readFileSync(dir_out + '../' + context.fileNames[idx]).toString());
url= util.faceUrl(dbface);
console.log(url);
if(fs.existsSync(dir_out + 'responses/' + dbface.hash + '.json'))
{
console.log('Skipping.');
return done();
}
var buff, im;
skybio.facesRecognize(namespace, ['all'], [url], 10000, true,
function(err,res)
{
if(err)
{
//return done();
res= { photos: [{ tags: [] }] };
}
fs.writeFileSync(dir_out + 'responses/' + dbface.hash + '.json', JSON.stringify(res));
var detections= res.photos[0].tags;
console.log('detections.length=' + detections.length);
async.eachLimit(detections, 1,
function(detection, done)
{
detection.url= res.photos[0].url;
async.waterfall([
function(done)
{
if(!buff)
{
request.get({ url: url, encoding: null },
function (err, res, body)
{
if (err) return done(err);
buff= body;
cv.readImage(buff,
function(err, _im)
{
if (err) return done(err);
im= _im;
done();
});
});
}
else done();
},
function(done)
{
var detection_box= { x: detection.center.x - detection.width/2,
y: detection.center.y - detection.height/2,
w: detection.width,
h: detection.height };
var cd= fre.cropDetection(im, detection_box, 0.6),
crop= cd.crop;
crop.buff= crop.toBuffer();
fs.writeFileSync(dir_out + detection.tid + '.jpg', crop.buff);
fs.writeFileSync(dir_out + detection.tid + '.json', JSON.stringify(detection));
var matches= detection.uids ? detection.uids : [];
console.log('matches.length=' + matches.length);
matches= _.map(matches,
function(_match)
{
var face_uid= _match.uid.substring(0, _match.uid.indexOf('@')),
match= { face_uid: face_uid, confidence: _match.confidence };
return match;
});
fs.writeFileSync(dir_out + detection.tid + '_matches.json', JSON.stringify(matches));
done();
}], done);
}, done);
});
}, done);
},
function(done)
{
console.log('Generating similarity matrix...');
var detection_uids= fs.readdirSync(dir_out);
detection_uids= _.filter(detection_uids, function(file_name) { return file_name.indexOf('.jpg') != -1; });
var face_uids= [],
face_attributes= [],
uidToIndexMap= {};
async.eachLimit(_.range(detection_uids.length), 1,
function(i, done)
{
var detection_uid= detection_uids[i];
detection_uid= detection_uid.substring(0, detection_uid.indexOf('.jpg'));
var detection= JSON.parse(fs.readFileSync(dir_out + detection_uid + '.json').toString());
if(detection.attributes.face.confidence > 50)
{
var buff= fs.readFileSync(dir_out + detection_uid + '.jpg');
cv.readImage(buff,
function(err, _im)
{
if (err) return done(err);
var size= _im.size(), h= size[0], w= size[1];
if(Math.min(w,h) > 125)
{
face_uids.push(detection_uid);
face_attributes.push(detection.attributes);
uidToIndexMap[detection_uid]= face_uids.length - 1;
}
else
{
fs.createReadStream(dir_out + detection_uid + '.jpg').pipe(fs.createWriteStream(dir_out + 'too_small/' + detection_uid + '.jpg'));
}
done();
});
}
else
{
fs.createReadStream(dir_out + detection_uid + '.jpg').pipe(fs.createWriteStream(dir_out + 'low_confidence/' + detection_uid + '.jpg'));
done();
}
},
function(err)
{
if(err) return done(err);
context.face_uids= face_uids;
context.face_attributes= face_attributes;
context.uidToIndexMap= uidToIndexMap;
done();
});
},
function(done)
{
var face_uids= context.face_uids,
face_attributes= context.face_attributes,
uidToIndexMap= context.uidToIndexMap;
console.log('D');
var npoints = face_uids.length,
D = [];
_.range(npoints-1).forEach(
function(i)
{
D[i]= _.map(_.range(npoints-i-1), function(j) { return 0; });
});
console.log('D matches');
async.eachLimit(_.range(face_uids.length), 1,
function(i, done)
{
var face_uid= face_uids[i],
index1= uidToIndexMap[face_uid];
//var matches= JSON.parse(fs.readFileSync(dir_out + 'matches/' + face_uid + '.json').toString());
var detection= JSON.parse(fs.readFileSync(dir_out + face_uid + '.json').toString()),
matches= detection.uids ? detection.uids : [];
matches= _.map(matches,
function(_match)
{
var face_uid= _match.uid.substring(0, _match.uid.indexOf('@')),
match= { face_uid: face_uid, confidence: _match.confidence };
return match;
});
matches.forEach(
function(match)
{
var index2= uidToIndexMap[match.face_uid];
if(index2 && index1 != index2)
{
var minIndex= Math.min(index1, index2),
maxIndex= Math.max(index1, index2);
//if(D[minIndex][maxIndex - minIndex - 1] != 0)
// console.log(D[minIndex][maxIndex - minIndex - 1] + ' <= ' + match.confidence);
D[minIndex][maxIndex - minIndex - 1]= match.confidence;
}
});
done();
},
function(err)
{
if(err) return done(err);
console.log('D done.');
context.D= D;
done();
});
},
function(done)
{
var face_uid= '001d0021_0060d7e5a738e';
fre.printMatchScores(context.D, context.face_uids, context.uidToIndexMap[face_uid]);
done();
},
function(done)
{
var params = { minMergeSimilarity1: 60, minMergeSimilarity2: 100, N: 10 };
params.face_uids= context.face_uids; // for debugging only
params.uidToIndexMap= context.uidToIndexMap; // for debugging only
var clustering_0_fileName= dir_out + 'clustering_' + 68 + '_' + 100 + '_sz/clustering.json';
//if(fs.existsSync(clustering_0_fileName)) params.clustering_0 = JSON.parse(fs.readFileSync(clustering_0_fileName).toString());
context.params= params;
//context.D.forEach(function(row) { console.log(row.join(', ')); });
console.time('Clustering.');
var clustering = fre.clusterLibrary(context.D, params);
console.timeEnd('Clustering.');
var clustering_dir= dir_out + 'clustering_' + params.minMergeSimilarity1 + '_' + params.minMergeSimilarity2 + '_sz';
if(!fs.existsSync(clustering_dir))
fs.mkdirSync(clustering_dir);
fs.writeFileSync(clustering_dir + '/clustering.json', JSON.stringify(clustering));
done();
},
function(done)
{
var params = context.params,
clustering_dir= dir_out + 'clustering_' + params.minMergeSimilarity1 + '_' + params.minMergeSimilarity2 + '_sz';
var clustering= JSON.parse(fs.readFileSync(clustering_dir + '/clustering.json').toString());
context.clustering_dir= clustering_dir;
console.log('_.unique(clustering.clusters).length=' + _.unique(clustering.clusters).length);
console.log('clustering.medoids.length=' + clustering.medoids.length);
console.log(clustering.clusters);
console.log(clustering.medoids);
// get cluster sizes
var clusterSizes= {};
_.range(clustering.clusters.length).forEach(
function(i)
{
var cluster_id= clustering.clusters[i];
if(clusterSizes[cluster_id])
clusterSizes[cluster_id]= clusterSizes[cluster_id] + 1;
else
clusterSizes[cluster_id]= 1;
});
console.log('copying files...');
//_.range(clustering.clusters.length).forEach(
//function(i)
async.eachLimit(_.range(clustering.clusters.length), 10,
function(i, done)
{
var face_uid= context.face_uids[i],
cluster_id= clustering.clusters[i],
cluster_dir= clustering_dir + '/cluster_' + cluster_id,
unclustered_dir= clustering_dir + '/unclustered';
if(clusterSizes[cluster_id] >= 3)
{
if(!fs.existsSync(cluster_dir))
fs.mkdirSync(cluster_dir);
//fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(cluster_dir + '/' + face_uid + '.jpg'));
var buff= fs.readFileSync(dir_out + face_uid + '.jpg');
fs.writeFileSync(cluster_dir + '/' + face_uid + '.jpg', buff);
}
else
{
if(!fs.existsSync(unclustered_dir))
fs.mkdirSync(unclustered_dir);
//fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(unclustered_dir + '/' + face_uid + '.jpg'));
var buff= fs.readFileSync(dir_out + face_uid + '.jpg');
fs.writeFileSync(unclustered_dir + '/' + face_uid + '.jpg', buff);
}
done();
}, done);
/*_.range(clustering.medoids.length).forEach(
function(i)
{
var face_uid= context.face_uids[clustering.medoids[i]],
cluster_id= i,
cluster_dir= clustering_dir + '/cluster_' + cluster_id;
if(!fs.existsSync(cluster_dir))
fs.mkdirSync(cluster_dir);
fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(cluster_dir + '/' + face_uid + '_medoid.jpg'));
});*/
//done();
},
function(done)
{
// compute age and gender for each cluster
var cluster_dirs= fs.readdirSync(context.clustering_dir);
cluster_dirs= _.filter(cluster_dirs, function(file_name) { return file_name.indexOf('cluster_') == 0; });
async.eachLimit(_.range(cluster_dirs.length), 1,
function(i, done)
{
var cluster_dir= context.clustering_dir + '/' + cluster_dirs[i],
face_files= fs.readdirSync(cluster_dir);
face_files= _.filter(face_files, function(file_name) { return file_name.indexOf('.jpg') != -1; });
var ages= [],
maleAcc= 0, femaleAcc= 0;
face_files.forEach(
function(file_name)
{
var face_uid= file_name.substring(0, file_name.length-4),
detection= JSON.parse(fs.readFileSync(context.clustering_dir + '/../' + face_uid + '.json').toString());
ages.push(+detection.attributes.age_est.value);
if(detection.attributes.gender.value == 'male') maleAcc+= detection.attributes.gender.confidence;
else femaleAcc+= detection.attributes.gender.confidence;
});
var clusterAttributes= {};
clusterAttributes.age= _.reduce(ages, function(memo, num) { return memo + num; }, 0) / ages.length;
clusterAttributes.male= maleAcc / ages.length;
clusterAttributes.female= femaleAcc / ages.length;
clusterAttributes.gender= maleAcc > femaleAcc ? 'male' : 'female';
fs.writeFileSync(cluster_dir + '/clusterAttributes.json', JSON.stringify(clusterAttributes));
done();
}, done);
}],
function(err)
{
console.log(err);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment