TK microcoder-py

## layoutlm_text_labelling.py
from transformers import LayoutLMTokenizer, LayoutLMForTokenClassification
import torch

tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")

words = ["Hello", "world"]
normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

token_boxes = []

## layoutlm_doc_classification_example.py
from transformers import LayoutLMTokenizer, LayoutLMForSequenceClassification
import torch

tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased")

words = ["Hello", "world"]
normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

token_boxes = []

## layoutlm_normalise_bbox.py
def normalize_bbox(bbox, width, height):
    return [
        int(1000 * (bbox[0] / width)),
        int(1000 * (bbox[1] / height)),
        int(1000 * (bbox[2] / width)),
        int(1000 * (bbox[3] / height)),
    ]

## layoutlm_install.py
>>pip install torch
>>pip install transformers

## sentence_similarity.py
# Make sure you have torch and sentence_transformers libraries installed
# 1 - Import Libraries & Install Files
import torch
from sentence_transformers import SentenceTransformer
from sentence_transformers import util

#We will find similarity for these two sentences
sentence1 = "This is a sentence"
sentence2 = "This is also a sentence"

## vvdn.js
var request = require('request')
var fs = require('fs')

const form_data = {'file':  fs.createReadStream("./TestInvoice1.png") }
const options = {
    url: 'https://app.nanonets.com/api/v2/OCR/Model/a2072fdb-6950-4c20-ab86-614358c035d6/LabelFile/',
    formData: form_data,
    headers: {
        'Authorization': 'Basic ' + Buffer.from('{ADD_API_KEY}' + ':').toString('base64')
    }

## nanonets_coordinate_system.md

      
        
          
            
              
              1 file
            
          
          
            
              
              0 forks
            
          
            
              
                
                0 comments
              
            
          
            
              
              0 stars
            
          
        
        
          
              
          
          
            
                microcoder-py
                / nanonets_coordinate_system.md
            
            
              Created
              January 5, 2022 11:57
            
          
        
      
        
  
      
    Coordinate Systems with Nanonets OCR

After processing any given file, the Nanonets model ascertains bounding boxes for each value to be detected.

BOUNDING BOX

This is the box that defines the boundaries of the text that was detected.
As an example, we can see in this particular processed invoice that for each value detected, there is a rectangular box drawn around it. Each box is a bounding box
	from transformers import LayoutLMTokenizer, LayoutLMForTokenClassification
	import torch

	tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
	model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")

	words = ["Hello", "world"]
	normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

	token_boxes = []
	def normalize_bbox(bbox, width, height):
	return [
	int(1000 * (bbox[0] / width)),
	int(1000 * (bbox[1] / height)),
	int(1000 * (bbox[2] / width)),
	int(1000 * (bbox[3] / height)),
	]
	# Make sure you have torch and sentence_transformers libraries installed
	# 1 - Import Libraries & Install Files
	import torch
	from sentence_transformers import SentenceTransformer
	from sentence_transformers import util

	#We will find similarity for these two sentences
	sentence1 = "This is a sentence"
	sentence2 = "This is also a sentence"
	var request = require('request')
	var fs = require('fs')

	const form_data = {'file': fs.createReadStream("./TestInvoice1.png") }
	const options = {
	url: 'https://app.nanonets.com/api/v2/OCR/Model/a2072fdb-6950-4c20-ab86-614358c035d6/LabelFile/',
	formData: form_data,
	headers: {
	'Authorization': 'Basic ' + Buffer.from('{ADD_API_KEY}' + ':').toString('base64')
	}