elnazsn1988

## pypdfx.py
# pdfx usage: http://pdfx.cs.man.ac.uk/usage
# requests docs: http://docs.python-requests.org/en/latest/user/quickstart/#post-a-multipart-encoded-file
import requests # get it from http://python-requests.org or do 'pip install requests'

url = "http://pdfx.cs.man.ac.uk"

def pypdfx(filename):
  '''
	Filename is a name of a pdf file WITHOUT the extension
	The function will print messages, including the status code,

## fonts.py
def fonts(doc, granularity=False):
    """Extracts fonts and their usage in PDF documents.

    :param doc: PDF document to iterate through
    :type doc: <class 'fitz.fitz.Document'>
    :param granularity: also use 'font', 'flags' and 'color' to discriminate text
    :type granularity: bool

    :rtype: [(font_size, count), (font_size, count}], dict
    :return: most used fonts sorted by count, font style information

## object_map_generation.py
import os
import cv2
from itertools import chain
import base64
import pandas as pd
import requests
import json

def ocr_using_google_api(image_path, request_url):
    '''

## object_map_generation.py
import os
import cv2
from itertools import chain
import base64
import pandas as pd
import requests
import json

def ocr_using_google_api(image_path, request_url):
    '''

## myTestLSTM.py
from btgym import BTgymEnv
import IPython.display as Display
import PIL.Image as Image
from gym import spaces


import gym
import numpy as np
import random

## crop_morphology.py
#!/usr/bin/env python
'''Crop an image to just the portions containing text.

Usage:

    ./crop_morphology.py path/to/image.jpg

This will place the cropped image in path/to/image.crop.png.

For details on the methodology, see
	# pdfx usage: http://pdfx.cs.man.ac.uk/usage
	# requests docs: http://docs.python-requests.org/en/latest/user/quickstart/#post-a-multipart-encoded-file
	import requests # get it from http://python-requests.org or do 'pip install requests'

	url = "http://pdfx.cs.man.ac.uk"

	def pypdfx(filename):
	'''
	Filename is a name of a pdf file WITHOUT the extension
	The function will print messages, including the status code,
	def fonts(doc, granularity=False):
	"""Extracts fonts and their usage in PDF documents.

	:param doc: PDF document to iterate through
	:type doc: <class 'fitz.fitz.Document'>
	:param granularity: also use 'font', 'flags' and 'color' to discriminate text
	:type granularity: bool

	:rtype: [(font_size, count), (font_size, count}], dict
	:return: most used fonts sorted by count, font style information
	import os
	import cv2
	from itertools import chain
	import base64
	import pandas as pd
	import requests
	import json

	def ocr_using_google_api(image_path, request_url):
	'''
	from btgym import BTgymEnv
	import IPython.display as Display
	import PIL.Image as Image
	from gym import spaces


	import gym
	import numpy as np
	import random
	#!/usr/bin/env python
	'''Crop an image to just the portions containing text.

	Usage:

	./crop_morphology.py path/to/image.jpg

	This will place the cropped image in path/to/image.crop.png.

	For details on the methodology, see