Skip to content

Instantly share code, notes, and snippets.

@erickedji
Created March 15, 2019 20:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erickedji/e56b205b446282ad93d60279d4c4674e to your computer and use it in GitHub Desktop.
Save erickedji/e56b205b446282ad93d60279d4c4674e to your computer and use it in GitHub Desktop.
Script de traitement du dataset gsmarena de kaggle pour brain.js
const fs = require("fs")
const parse = require("csv-parse")
const fileName = "gsmarena-phone-dataset.csv"
const fileContent = fs.readFileSync(fileName).toString()
parse(fileContent, {
columns: true,
skip_lines_with_error: true,
skip_empty_lines: true
}, function (err, records) {
let data = []
for (item of records) {
for (key in item) {
item[key] = toNumber(item[key])
}
let price = item.approx_price_EUR
delete item.approx_price_EUR
data.push({
input: item,
output: { price }
})
}
console.log("Got", data.length, "exemples")
normalize(data.map(r => r.input))
normalize(data.map(r => r.output))
fs.writeFileSync(
"normalized-gsmarena-training-data.json",
JSON.stringify(data)
)
console.log(data[0])
})
function normalize(data) {
let max = {};
for (key in data[0]) {
max[key] = Math.max(...data.map(x => x[key]))
console.log("max", key, max[key])
}
for (item of data) {
for (key in item) {
item[key] /= max[key]
if (item[key] !== 0 && !item[key])
delete item[key]
}
}
}
function toNumber(str) {
let parts = str
.split(/[^0-9]+/)
.map(x => parseFloat(x))
.filter(x => !Number.isNaN(x))
return parts.reduce((a, b) => a + b, 1)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment