Skip to content

Instantly share code, notes, and snippets.

Last active August 12, 2020 16:17
Show Gist options
  • Save bala-codes/f3a4f52b11cf01db974788cca7a117bb to your computer and use it in GitHub Desktop.
Save bala-codes/f3a4f52b11cf01db974788cca7a117bb to your computer and use it in GitHub Desktop.
# Dataset Extraction from github
!git clone ''
import os, sys, random, shutil
import xml.etree.ElementTree as ET
from glob import glob
import pandas as pd
from shutil import copyfile
import pandas as pd
from sklearn import preprocessing, model_selection
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import patches
import numpy as np
annotations = sorted(glob('/content/BCCD_Dataset/BCCD/Annotations/*.xml'))
df = []
cnt = 0
for file in annotations:
prev_filename = file.split('/')[-1].split('.')[0] + '.jpg'
filename = str(cnt) + '.jpg'
row = []
parsedXML = ET.parse(file)
for node in parsedXML.getroot().iter('object'):
blood_cells = node.find('name').text
xmin = int(node.find('bndbox/xmin').text)
xmax = int(node.find('bndbox/xmax').text)
ymin = int(node.find('bndbox/ymin').text)
ymax = int(node.find('bndbox/ymax').text)
row = [prev_filename, filename, blood_cells, xmin, xmax, ymin, ymax]
cnt += 1
data = pd.DataFrame(df, columns=['prev_filename', 'filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax'])
data[['prev_filename','filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax']].to_csv('/content/blood_cell_detection.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment