Skip to content

Instantly share code, notes, and snippets.

@bala-codes
Last active August 12, 2020 16:17
Show Gist options
  • Save bala-codes/f3a4f52b11cf01db974788cca7a117bb to your computer and use it in GitHub Desktop.
Save bala-codes/f3a4f52b11cf01db974788cca7a117bb to your computer and use it in GitHub Desktop.
BCC-Datapreprocess
# Dataset Extraction from github
!git clone 'https://github.com/Shenggan/BCCD_Dataset.git'
import os, sys, random, shutil
import xml.etree.ElementTree as ET
from glob import glob
import pandas as pd
from shutil import copyfile
import pandas as pd
from sklearn import preprocessing, model_selection
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import patches
import numpy as np
annotations = sorted(glob('/content/BCCD_Dataset/BCCD/Annotations/*.xml'))
df = []
cnt = 0
for file in annotations:
prev_filename = file.split('/')[-1].split('.')[0] + '.jpg'
filename = str(cnt) + '.jpg'
row = []
parsedXML = ET.parse(file)
for node in parsedXML.getroot().iter('object'):
blood_cells = node.find('name').text
xmin = int(node.find('bndbox/xmin').text)
xmax = int(node.find('bndbox/xmax').text)
ymin = int(node.find('bndbox/ymin').text)
ymax = int(node.find('bndbox/ymax').text)
row = [prev_filename, filename, blood_cells, xmin, xmax, ymin, ymax]
df.append(row)
cnt += 1
data = pd.DataFrame(df, columns=['prev_filename', 'filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax'])
data[['prev_filename','filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax']].to_csv('/content/blood_cell_detection.csv', index=False)
data.head(10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment