Raghvendra Jain raghavendrajain

## okta.py
from flask import Flask, render_template, g, redirect, url_for
from flask_oidc import OpenIDConnect
from okta import UsersClient, UserGroupsClient
import requests
from oauth2client.client import OAuth2Credentials
import json

app = Flask(__name__)

app.config.update({

## dataextractor.py
import pandas as pd
import os
df = pd.read_csv('Flight-1-export.csv')
df["label"] = df["label"].str.lower()
groups = df.groupby(['image', "xmin", "ymin", "xmax", "ymax"]).groups
group_values = list(groups.values())
grouped_indices = list(map(lambda x: x.to_list(), group_values))
for index in grouped_indices:
    df.loc[index[0], "labels"] = ",".join([df.loc[idx, "label"] for idx in index])


## datatable-callback.py
import jupyterlab_dash
import dash
import dash_html_components as html
import dash_core_components as dcc
import dash_table as dt
import pandas as pd
import pickle

from dash.dependencies import Input, Output, State

## webcame_recorder.py
# Code snippet posted on OpenCV website.
# Jain tested on Ubuntu 18.04.
# For windows, follow this blog https://www.codepool.biz/web-camera-recorder-oepncv-flask.html
import numpy as np
import cv2

cap = cv2.VideoCapture(0)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')

## multi-face.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                raghavendrajain
                / multi-face.ipynb
            
            
              Created
              September 25, 2019 01:35
                — forked from yang-zhang/multi-face.ipynb
            
              
                Multi-task Deep Learning Experiment using fastai Pytorch
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## simulation.py
Check if result_ext folder exists inside the folder. If not make the folder
Read the original file
Unlock the file in all cases, even if it is unlocked. It is fairly inexpensive operation.
Split the files. Each page is separated into a temporary file.
Read from each of the split file into a dataframe and then delete that file.
If the page has table then read it from the dataframe and extract the data. Camelot is used to do that.
Read all the text using Tika.
Split the text based on newline character. This will give many lists. Use regular expression to get lists that begin with 4 digits.
Concatenate all the dataframes .
Save all the concatenated dataframes into a final dataframe if the number of columns is 10.

## extract-tables.py
import tika
tika.initVM()
from tika import parser
import camelot
import pandas as pd
import re

#filename = "Sample3_unconsolidated.pdf"
filename = "Sample3_consolidated.pdf"

## extract_pages_pdf.py
#install camelot or something like that for using PyPDF2

from PyPDF2 import PdfFileWriter, PdfFileReader

inputpdf = PdfFileReader(open("1_extractable.pdf", "rb"))

for i in range(inputpdf.numPages):
    output = PdfFileWriter()
    output.addPage(inputpdf.getPage(i))
    with open("document-page%s.pdf" % i, "wb") as outputStream:

## unlock.py
# install pikepdf using `pip install pikepdf`

import pikepdf

pdf = pikepdf.open('unextractable.pdf')
pdf.save('extractable.pdf')

## download.js
// pull down jquery into the JavaScript console
var script = document.createElement('script');
script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js";
document.getElementsByTagName('head')[0].appendChild(script);
// grab the URLs
var urls = $('.rg_di .rg_meta').map(function() { return JSON.parse($(this).text()).ou; });
// write the URls to file (one per line)
var textToSave = urls.toArray().join('\n');
var hiddenElement = document.createElement('a');
hiddenElement.href = 'data:attachment/text,' + encodeURI(textToSave);
	from flask import Flask, render_template, g, redirect, url_for
	from flask_oidc import OpenIDConnect
	from okta import UsersClient, UserGroupsClient
	import requests
	from oauth2client.client import OAuth2Credentials
	import json

	app = Flask(__name__)

	app.config.update({
	import pandas as pd
	import os
	df = pd.read_csv('Flight-1-export.csv')
	df["label"] = df["label"].str.lower()
	groups = df.groupby(['image', "xmin", "ymin", "xmax", "ymax"]).groups
	group_values = list(groups.values())
	grouped_indices = list(map(lambda x: x.to_list(), group_values))
	for index in grouped_indices:
	df.loc[index[0], "labels"] = ",".join([df.loc[idx, "label"] for idx in index])
	import jupyterlab_dash
	import dash
	import dash_html_components as html
	import dash_core_components as dcc
	import dash_table as dt
	import pandas as pd
	import pickle

	from dash.dependencies import Input, Output, State
	# Code snippet posted on OpenCV website.
	# Jain tested on Ubuntu 18.04.
	# For windows, follow this blog https://www.codepool.biz/web-camera-recorder-oepncv-flask.html
	import numpy as np
	import cv2

	cap = cv2.VideoCapture(0)

	# Define the codec and create VideoWriter object
	fourcc = cv2.VideoWriter_fourcc(*'XVID')
	Check if result_ext folder exists inside the folder. If not make the folder
	Read the original file
	Unlock the file in all cases, even if it is unlocked. It is fairly inexpensive operation.
	Split the files. Each page is separated into a temporary file.
	Read from each of the split file into a dataframe and then delete that file.
	If the page has table then read it from the dataframe and extract the data. Camelot is used to do that.
	Read all the text using Tika.
	Split the text based on newline character. This will give many lists. Use regular expression to get lists that begin with 4 digits.
	Concatenate all the dataframes .
	Save all the concatenated dataframes into a final dataframe if the number of columns is 10.
	import tika
	tika.initVM()
	from tika import parser
	import camelot
	import pandas as pd
	import re

	#filename = "Sample3_unconsolidated.pdf"
	filename = "Sample3_consolidated.pdf"
	#install camelot or something like that for using PyPDF2

	from PyPDF2 import PdfFileWriter, PdfFileReader

	inputpdf = PdfFileReader(open("1_extractable.pdf", "rb"))

	for i in range(inputpdf.numPages):
	output = PdfFileWriter()
	output.addPage(inputpdf.getPage(i))
	with open("document-page%s.pdf" % i, "wb") as outputStream:
	# install pikepdf using `pip install pikepdf`

	import pikepdf

	pdf = pikepdf.open('unextractable.pdf')
	pdf.save('extractable.pdf')
	// pull down jquery into the JavaScript console
	var script = document.createElement('script');
	script.src = "https://ajax.googleapis.com/ajax/libs/jquery/2.2.0/jquery.min.js";
	document.getElementsByTagName('head')[0].appendChild(script);
	// grab the URLs
	var urls = $('.rg_di .rg_meta').map(function() { return JSON.parse($(this).text()).ou; });
	// write the URls to file (one per line)
	var textToSave = urls.toArray().join('\n');
	var hiddenElement = document.createElement('a');
	hiddenElement.href = 'data:attachment/text,' + encodeURI(textToSave);