Skip to content

Instantly share code, notes, and snippets.

@CognitiveDave
Created January 3, 2021 17:25
Show Gist options
  • Save CognitiveDave/476d4c8ba693f528f68e3e77557bc26e to your computer and use it in GitHub Desktop.
Save CognitiveDave/476d4c8ba693f528f68e3e77557bc26e to your computer and use it in GitHub Desktop.
# use Tesseract to OCR the image
text = pytesseract.image_to_string(image)
results = []
#print(text)
tdData = text.replace(',','').replace('@','0').replace('‘','').replace(')','').split('\n')
for data in tdData[3:]:
#print(data)
rec = {}
elems = data.split(' ')
rec['county'] = elems[0]
rec['todays'] = toNumber(elems[1])
rec['forteen'] = toNumber(elems[2])
rec['newforteen'] = toNumber(elems[3])
results.append(rec)
df = pd.DataFrame(results)
df.info()
print(df['county'].unique())
print(df[(df['todays'] == 0) | (df['forteen'] == 0) | (df['newforteen'] == 0)])
day = df.at[0,'todays']
f14 = df.at[0,'forteen']
newf14 = df.at[0,'newforteen']
print(day,f14, newf14)
dayTot = df['todays'].sum()-day
f14Tot = df['forteen'].sum()-f14
newf14Tot = df['newforteen'].sum()-newf14
print(dayTot-day,f14Tot-f14,newf14Tot-newf14)
df.to_csv('results.csv')
fd = pd.read_csv('resultsE.csv')
print(fd[(fd['todays'] == 0) | (fd['forteen'] == 0) | (fd['newforteen'] == 0)])
day = fd.at[0,'todays']
f14 = fd.at[0,'forteen']
newf14 = fd.at[0,'newforteen']
print(day,f14, newf14)
dayTot = fd['todays'].sum()-day
f14Tot = fd['forteen'].sum()-f14
newf14Tot = (fd['newforteen'].sum()-newf14)/26
print(day,f14, newf14)
ff = fd[fd['county'] != 'Ireland'].copy()
print(ff['forteen'].median())
print(ff['forteen'].min())
print(ff['forteen'].max())
@CognitiveDave
Copy link
Author

based on pyimagesearch.com base code
a script that converts an OCR text string to a 2d matrix

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment