Skip to content

Instantly share code, notes, and snippets.

View ttchengab's full-sized avatar

Tim Cheng ttchengab

View GitHub Profile
VOCAB= ascii_uppercase+digits+punctuation+" \t\n"
#Change to CUDA to run using GPU
device = 'cpu'
def get_test_data(etfo):
text = etfo
text_tensor = torch.zeros(len(text), 1, dtype=torch.long)
text_tensor[:, 0] = torch.LongTensor([VOCAB.find(c) for c in text])
return text_tensor.to(device)
"""
The following LSTM is the model to use after the OCR extraction, where it predicts the key-value pairs after the texts
are extracted using the previous get_info() method.
"""
class ExtractLSTM(nn.Module):
def __init__(self, vocab_size, embed_size, hidden_size):
super().__init__()
self.embed = nn.Embedding(vocab_size, embed_size)
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers=2, bidirectional=True)
self.linear = nn.Linear(hidden_size * 2, 5)
"""
get_info() function reads the image using openCV and performs thresholding, dilation, noise removal, and
contouring to finally retrieve bounding boxes from the contour.
Below are some of the additional available functions from openCV for preprocessing:
Median filter: median filter blurs out noises by taking the medium from a set of pixels
cv2.medianBlur()
# Import PyTorch Library
import torch
from torch import nn
# Import external libraries
import argparse
import numpy as np
import opencv_wrapper as cvw
from skimage.filters import threshold_local
import json
pip install pytesseract
pip install opencv-python
# Creating a simple network
class LeNet5(torch.nn.Module):
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 6, 5, padding=2)
self.conv2 = torch.nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
#include <stdio.h>
#include <iostream>
#include <algorithm>
#include <cmath>
#include <cassert>
#include <cstring>
#include <cstdint>
#include <libbase/k60/mcg.h>
#include <libsc/system.h>
#include <libsc/k60/ov7725.h>
#include <cmath>
#include <cassert>
#include <cstring>
#include <cstdint>
#include <libbase/k60/mcg.h>
#include <libsc/system.h>
#include <libsc/k60/ov7725.h>
#include <libsc/led.h>
#include <libsc/st7735r.h>
#include <libutil/misc.h>
#include <cmath>
#include <cassert>
#include <cstring>
#include <cstdint>
#include <libbase/k60/mcg.h>
#include <libsc/system.h>
#include <libsc/k60/ov7725.h>
#include <libsc/led.h>
#include <libsc/st7735r.h>
#include <libutil/misc.h>
#include "main.h"
u32 ticks_img = 0;
u32 ticks_sec_img = 0;
u16 servo_pos = 750;
u8 speed = 20;
u16 speed_indic[3] = {RGB888TO565(0xC72929), RGB888TO565(0xFFC72C), RGB888TO565(0x40CA77)};
//Button listeners
/*