Skip to content

Instantly share code, notes, and snippets.

@rcsmit
Created April 27, 2022 04:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rcsmit/b98b4a3907578e694058739f051bda75 to your computer and use it in GitHub Desktop.
Save rcsmit/b98b4a3907578e694058739f051bda75 to your computer and use it in GitHub Desktop.
import sqlite3 as sl
import pandas as pd
import streamlit as st
import os
from PIL import Image
def delete_records_from_db(dir):
sql_statement = f"DELETE FROM txt_from_images WHERE directory = '{dir}'"
db_name = dir + os.sep + "my_test.db"
con = sl.connect(db_name)
cur = con.cursor()
st.write(f" PROCESSING {sql_statement}")
cur.execute(sql_statement)
con.commit()
st.write(f" DONE {sql_statement}")
def read_db(filter_file_name, filter_keyword):
"""Search the database for a certain keyword and/or directory
"""
#TODO implement filter for directory
sql_statement = f" SELECT directory || '\\' || filename AS complete_path, text_in_image FROM txt_from_images "
if filter_keyword is not None and filter_file_name is not None:
sql_statement = sql_statement + f"WHERE (text_in_image LIKE '%{filter_keyword}%' AND complete_path LIKE '%{filter_file_name}%' )"
elif filter_keyword is None and filter_file_name is not None:
sql_statement = sql_statement + f"WHERE (complete_path LIKE '%{filter_file_name}%' )"
elif filter_keyword is not None and filter_file_name is None:
sql_statement = sql_statement + f"WHERE (text_in_image LIKE '%{filter_keyword}%' )"
dir_name = r"C:\Users\rcxsm\Pictures\ocr_test"
db_name = dir_name + os.sep + "my_test.db"
con = sl.connect(db_name)
try:
df = pd.read_sql(sql_statement, con)
if len(df)== 0:
st.write("No items found")
else:
st.write(f"{len(df)} items found")
st.dataframe(data=df, width=900, height=None)
except:
st.write("Error reading the data. Did you use forbidden characters ?")
if len(df)< 101:
# iterrows is very slow when having too much records
for index, row in df.iterrows():
url = (f"{row['complete_path']}")
text= row['text_in_image']
try:
image = Image.open(url)
st.image(image, width=300, caption=url)
except:
st.write (f"{url} - not found")
if len(row['text_in_image'])>400:
with st.expander(text[0:100] +" ..."):
st.write(text)
else:
st.write(text)
st.markdown("<hr>", unsafe_allow_html=True)
else:
st.write("Too much results to show the images")
def main():
action = "show"
if action == "show":
col1,col2,col3 = st.columns(3)
with col1:
filter_text = st.text_input("Filter text")
with col2:
filter_file_name = st.text_input("Filter directory/file_name")
read_db(filter_file_name, filter_text)
elif action == "delete":
# delete records from a specific directory
dir = r"C:\Users\rcxsm\Pictures\ocr_test"
delete_records_from_db(dir)
else:
st.write("Give a valid action")
if __name__ == "__main__":
st.set_page_config(layout="wide")
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment