bala-codes/preprocess1.py

## preprocess1.py
# Dataset Extraction from github
!git clone 'https://github.com/Shenggan/BCCD_Dataset.git'

import os, sys, random, shutil
import xml.etree.ElementTree as ET
from glob import glob
import pandas as pd
from shutil import copyfile
import pandas as pd
from sklearn import preprocessing, model_selection
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import patches
import numpy as np

annotations = sorted(glob('/content/BCCD_Dataset/BCCD/Annotations/*.xml'))

df = []
cnt = 0
for file in annotations:
  prev_filename = file.split('/')[-1].split('.')[0] + '.jpg'
  filename = str(cnt) + '.jpg'
  row = []
  parsedXML = ET.parse(file)
  for node in parsedXML.getroot().iter('object'):
    blood_cells = node.find('name').text
    xmin = int(node.find('bndbox/xmin').text)
    xmax = int(node.find('bndbox/xmax').text)
    ymin = int(node.find('bndbox/ymin').text)
    ymax = int(node.find('bndbox/ymax').text)

    row = [prev_filename, filename, blood_cells, xmin, xmax, ymin, ymax]
    df.append(row)
  cnt += 1

data = pd.DataFrame(df, columns=['prev_filename', 'filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax'])

data[['prev_filename','filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax']].to_csv('/content/blood_cell_detection.csv', index=False)
data.head(10)
	# Dataset Extraction from github
	!git clone 'https://github.com/Shenggan/BCCD_Dataset.git'

	import os, sys, random, shutil
	import xml.etree.ElementTree as ET
	from glob import glob
	import pandas as pd
	from shutil import copyfile
	import pandas as pd
	from sklearn import preprocessing, model_selection
	import matplotlib.pyplot as plt
	%matplotlib inline
	from matplotlib import patches
	import numpy as np

	annotations = sorted(glob('/content/BCCD_Dataset/BCCD/Annotations/*.xml'))

	df = []
	cnt = 0
	for file in annotations:
	prev_filename = file.split('/')[-1].split('.')[0] + '.jpg'
	filename = str(cnt) + '.jpg'
	row = []
	parsedXML = ET.parse(file)
	for node in parsedXML.getroot().iter('object'):
	blood_cells = node.find('name').text
	xmin = int(node.find('bndbox/xmin').text)
	xmax = int(node.find('bndbox/xmax').text)
	ymin = int(node.find('bndbox/ymin').text)
	ymax = int(node.find('bndbox/ymax').text)

	row = [prev_filename, filename, blood_cells, xmin, xmax, ymin, ymax]
	df.append(row)
	cnt += 1

	data = pd.DataFrame(df, columns=['prev_filename', 'filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax'])

	data[['prev_filename','filename', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax']].to_csv('/content/blood_cell_detection.csv', index=False)
	data.head(10)