Skip to content

Instantly share code, notes, and snippets.

@iamarchisha
Last active March 11, 2021 07:29
Show Gist options
  • Save iamarchisha/60268853dfab478d1c440e16cc72a175 to your computer and use it in GitHub Desktop.
Save iamarchisha/60268853dfab478d1c440e16cc72a175 to your computer and use it in GitHub Desktop.
JS script to prepare json files for performing object detection on AWS Sage Maker. It needs a csv input containing: image_name with extensions (str), img-height (int) , img-width (int), depth (int), left (int), top (int), width (int), height (int), label (str), label_id (int). This csv can have duplicate image_path value to represent different o…
const fs = require("fs");
const path = require("path");
const csv = require("fast-csv");
let megaJSON = {}; //will contain a json of multiple jsons
let aws_s3_URL = "<s3-data-url>"; //s3 url
// csv header image_path,img-height,img-width,depth,left,top,width,height,label,label_id
// csv can have duplicate image_path for multiple ODs (left,top,width,height) in one image
// <"image-name-1"> | 200 | 300 | 3 | 45 | 100 | 56 | 200 | <"label-1"> | 0
// <"image-name-1"> | 200 | 300 | 3 | 50 | 200 | 70 | 300 | <"label-2"> | 1
fs.createReadStream(path.resolve(__dirname, "annotations.csv"))
.pipe(
csv.parse({
headers: true,
})
)
.on("error", (error) => console.error(error))
.on("data", (row) => {
let annotations_obj = {
class_id: row.label_id,
left: row.left,
top: row.top,
width: row.width,
height: row.height,
};
let categoriesObj = {
class_id: row.label_id,
name: row.label,
};
let imagePath = String(row.image_path).substring(0, String(row.image_path).lastIndexOf(".")); // remove .jpg or other extensions
// the final.json created will have key as just the image-name (without extensions)
if (megaJSON[imagePath]) {
megaJSON[imagePath]["annotations"].push(annotations_obj);
let class_ids_available = [];
for (let i of megaJSON[imagePath]["categories"]) {
class_ids_available.push(i["class_id"]);
}
let search_result = class_ids_available.indexOf(row.label_id);
if (search_result < 0) {
megaJSON[imagePath]["categories"].push(categoriesObj);
}
} else {
megaJSON[imagePath] = {};
megaJSON[imagePath]["file"] = String(row.image_path); //image file name with extensions
megaJSON[imagePath]["annotations"] = [];
megaJSON[imagePath]["categories"] = [];
megaJSON[imagePath]["image_size"] = [];
let imageSizeObject = {
width: row["img_width"],
height: row["img_height"],
depth: row.channel,
};
// push the objects respectively
megaJSON[imagePath]["image_size"].push(imageSizeObject);
megaJSON[imagePath]["annotations"].push(annotations_obj);
megaJSON[imagePath]["categories"].push(categoriesObj);
}
})
.on("end", (rowCount) => {
console.log("ALL DONE GG ");
fs.writeFileSync(
path.resolve(__dirname, "final.json"), //json containing image jsons
JSON.stringify(megaJSON)
);
jsonWalk(megaJSON);
});
// breaking final json into separate image.json and storing in JASONS_ALL/
function jsonWalk(obj) {
for (var key in obj) {
if (obj.hasOwnProperty(key)) {
var val = obj[key];
fs.writeFileSync(
path.resolve(__dirname, "./JSONS_ALL/" + key + ".json"),
JSON.stringify(val),
{ flag: "w" }
);
}
}
console.log("All JSON Files Ceated --");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment