Skip to content

Instantly share code, notes, and snippets.

@akash-ch2812
akash-ch2812 / PDF_to_Image.py
Created July 1, 2020 06:44
Code for converting PDF file to Image in Python
from pdf2image import convert_from_path
pdfs = r"provide path to pdf file"
pages = convert_from_path(pdfs, 350)
i = 1
for page in pages:
image_name = "Page_" + str(i) + ".jpg"
page.save(image_name, "JPEG")
i = i+1
@akash-ch2812
akash-ch2812 / Marking_ROI.py
Last active January 17, 2024 06:11
Python code for marking regions of interest in an image for OCR
# use this command to install open cv2
# pip install opencv-python
# use this command to install PIL
# pip install Pillow
import cv2
from PIL import Image
def mark_region(imagE_path):
@akash-ch2812
akash-ch2812 / Crop_and_OCR.py
Last active May 4, 2023 10:27
Crop and image first and then apply OCR
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\Akash.Chauhan1\AppData\Local\Tesseract-OCR\tesseract.exe'
# load the original image
image = cv2.imread('Original_Image.jpg')
# get co-ordinates to crop the image
c = line_items_coordinates[1]
# cropping image img = image[y0:y1, x0:x1]
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
# creating a sequential model
cnn = tf.keras.models.Sequential()
# adding convolution layer to network
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[64, 64, 3]))
# adding pooling layer to network
# adding a flatten layer to CNN
cnn.add(tf.keras.layers.Flatten())
# adding fully connected layers
cnn.add(tf.keras.layers.Dense(128, activation='relu'))
cnn.add(tf.keras.layers.Dense(64, activation='relu'))
# output layer -> 6 Neurons for 6 different classes
# activation function used for multiclass classification is softmax, for binary use sigmoid as activation fxn
cnn.add(tf.keras.layers.Dense(6, activation='softmax'))
# import statements
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
# loading training data
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
import tensorflow as tf
from keras.preprocessing import image
import numpy as np
# function to extract features from image
def extract_image_features():
model = tf.keras.models.Sequential()
import tensorflow as tf
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.models import Model
# load the ResNet50 Model
feature_extractor = ResNet50(weights='imagenet', include_top=False)
feature_extractor_new = Model(feature_extractor.input, feature_extractor.layers[-2].output)
feature_extractor_new.summary()
# loading captions from captions file
import pandas as pd
# loading captions.txt
captions = pd.read_csv('/kaggle/input/flickr8k/captions.txt', sep=",")
captions = captions.rename(columns=lambda x: x.strip().lower())
captions['image'] = captions['image'].apply(lambda x: x.split(".")[0])
captions = captions[['image', 'caption']]
# adding <start> and <end> to every caption
captions['caption'] = "<start> " + captions['caption'] + " <end>"
import spacy
nlp = spacy.load('en', disable=['tagger', 'parser', 'ner'])
# tokenize evry captions, remove punctuations, lowercase everything
for key, value in train_image_captions.items():
ls = []
for v in value:
doc = nlp(v)
new_v = " "
for token in doc: