Skip to content

Instantly share code, notes, and snippets.

@hemache

hemache/index.js Secret

Last active June 15, 2017 17:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hemache/4fd2a155672e4cd1fc9571a8cd2693a6 to your computer and use it in GitHub Desktop.
Save hemache/4fd2a155672e4cd1fc9571a8cd2693a6 to your computer and use it in GitHub Desktop.
const _ = require('lodash')
const fs = require('fs')
const readline = require('readline')
const repl = require('repl')
let lineReader = readline.createInterface({
input: require('fs').createReadStream('2017-06-15T13-32-21.json')
})
let items = []
lineReader.on('line', (line) => {
try{
items.push(JSON.parse(line))
} catch(e) {
console.error(`error parsing line ${line}` )
}
})
lineReader.on('close', () => {
let r = repl.start({ prompt: '> ' })
r.context.items = items
r.context._ = _
})
{
"name": "avitodataset",
"version": "0.0.1",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "Adil Hemache <adilhemache+avitodataset@gmail.com",
"license": "ISC",
"dependencies": {
"lodash": "^4.17.4"
}
}
_(items).filter(x => typeof x.price == 'number').map(x => x.price).mean()
// price average/mean => 336957.5419090337
_(items).filter(x => typeof x.price == 'number').orderBy('price', 'desc').take(3).map(x => [x.subject, x.price]).value()
/* 3 most expensive items =>
[[ 'Appartement de 185 m2 à Tétouan Wilaya', 1850000000 ],
[ 'Congfgjdeelateur utuligtserdtf 5 mgfauwois', 1100884645 ],
[ 'villa avendre', 1001000000 ]]
*/
_(items).groupBy(x => x.category).mapValues(y => y.length).value()
/* number of items per category =>
{ 'maisons et villas': 21655,
appartements: 76861,
'téléphones': 88029,
voitures: 87100,
animaux: 11355,
'ordinateurs portables': 26905,
'ordinateurs de bureau': 57209,
services: 18440,
'cours et formations': 11046,
'terrains et fermes': 37752,
'magasins, commerces et locaux industriels': 10285,
'offres d\'emploi': 99078,
'vêtements': 8478,
'locations de vacances': 16779,
'accessoires informatique et gadgets': 34907,
'stocks et vente en gros': 6136,
'meubles et décoration': 39078,
'matériels professionnels': 20528,
'electroménager et vaisselles': 13294,
'pièces et accessoires pour véhicules': 13293,
'equipements pour enfant et bébé': 5728,
chaussures: 6302,
autres: 4604,
'télévisions': 3740,
'appareils photo et caméras': 6310,
'montres et bijoux': 6719,
'sports et loisirs': 8995,
tablettes: 4822,
'business et affaires commerciales': 2995,
colocations: 2112,
'autre immobilier': 1051,
motos: 23502,
bateaux: 436,
'produits de beauté': 3970,
'jeux vidéo et consoles': 11157,
'bureaux et plateaux': 5241,
'vélos': 7349,
'image & son': 4933,
stages: 223,
'films, livres, magazines': 740,
'sacs et accessoires': 3109,
'instruments de musique': 3733,
null: 6709,
'vêtements pour enfant et bébé': 809,
'jardin et outils de bricolage': 1770,
'art et collections': 2337,
'voyages et billetterie': 4716 }
*/
$ cd /path/to/avitodataset
$ node index.js
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment