Ahan M R Ahanmr

## gist:0c4e9f94eea3277787b51435e279911a

      
        
          
            
              
              0 files
            
          
          
            
              
              0 forks
            
          
          
            
              
              0 comments
            
          
          
            
              
              0 stars
            
          
        
        
          
              
          
          
            
                Ahanmr
                / gist:0c4e9f94eea3277787b51435e279911a
            
            
              Created
              March 30, 2020 19:18
            
          
        
      
        
          
            
              
              We couldn’t find that file to show.
              
            
          
        
    

## spelling.py
import re, os
from collections import Counter
from symspellpy.symspellpy import SymSpell as SymSpellPy, Verbosity

class SpellCheck:
    def __init__(self, dictionary=None, verbose=0):
        self.verbose = verbose
        self.dictionary = dictionary

    def correction(self, text):

## imagemagick.py
"""
Problem:
  How to Convert PDF to Image with Python Script ?

  $ sudo apt-get install libmagickwand-dev
  $ pip install Wand

"""

from PIL import Image as Img

## draw_rect.py
import PIL.Image
import PIL.ImageDraw
import wand.image
import sys, os
from io import BytesIO

    def draw_rect(self, bbox_or_obj,
        fill=DEFAULT_FILL,
        stroke=DEFAULT_STROKE,
        stroke_width=DEFAULT_STROKE_WIDTH):

## pytesseract.py
import io
from PIL import Image
import pytesseract
from wand.image import Image as wi

pdf = wi(filename = "sample2.pdf", resolution = 300)
pdfImage = pdf.convert('jpeg')

imageBlobs = []

## TableFinder.py
class TableFinder(object):
    """
    Given a PDF page, finds table structures.
    """
    def __init__(self, page, settings={}):
        for k in settings.keys():
            if k not in DEFAULT_TABLE_SETTINGS:
                raise ValueError("Unrecognized table setting: '{0}'".format(
                    k
                ))

## pages.py
    def find_tables(self, table_settings={}):
        return TableFinder(self, table_settings).tables

    def extract_tables(self, table_settings={}):
        tables = self.find_tables(table_settings)
        return [ table.extract() for table in tables ]

    def extract_table(self, table_settings={}):
        tables = self.find_tables(table_settings)
        # Return the largest table, as measured by number of cells.

## def.py
def modelfit(alg, dtrain, predictors,useTrainCV=True, cv_folds=5, early_stopping_rounds=50):

    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
            metrics='auc', early_stopping_rounds=early_stopping_rounds, show_progress=False)
        alg.set_params(n_estimators=cvresult.shape[0])

    #Fit the algorithm on the data

## import.py
#Import libraries:
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn import cross_validation, metrics   #Additional scklearn functions
from sklearn.grid_search import GridSearchCV   #Perforing grid search

import matplotlib.pylab as plt
%matplotlib inline

## detect.txt
car : 52.74809002876282
car : 54.43572402000427
car : 61.86940670013428
car : 64.99541997909546
car : 54.53670620918274
car : 54.111236333847046
car : 55.92341423034668
person : 54.37796711921692
person : 61.132240295410156
car : 70.4900324344635
	import re, os
	from collections import Counter
	from symspellpy.symspellpy import SymSpell as SymSpellPy, Verbosity

	class SpellCheck:
	def __init__(self, dictionary=None, verbose=0):
	self.verbose = verbose
	self.dictionary = dictionary

	def correction(self, text):
	"""
	Problem:
	How to Convert PDF to Image with Python Script ?

	$ sudo apt-get install libmagickwand-dev
	$ pip install Wand

	"""

	from PIL import Image as Img
	import PIL.Image
	import PIL.ImageDraw
	import wand.image
	import sys, os
	from io import BytesIO

	def draw_rect(self, bbox_or_obj,
	fill=DEFAULT_FILL,
	stroke=DEFAULT_STROKE,
	stroke_width=DEFAULT_STROKE_WIDTH):
	import io
	from PIL import Image
	import pytesseract
	from wand.image import Image as wi

	pdf = wi(filename = "sample2.pdf", resolution = 300)
	pdfImage = pdf.convert('jpeg')

	imageBlobs = []
	class TableFinder(object):
	"""
	Given a PDF page, finds table structures.
	"""
	def __init__(self, page, settings={}):
	for k in settings.keys():
	if k not in DEFAULT_TABLE_SETTINGS:
	raise ValueError("Unrecognized table setting: '{0}'".format(
	k
	))
	def find_tables(self, table_settings={}):
	return TableFinder(self, table_settings).tables

	def extract_tables(self, table_settings={}):
	tables = self.find_tables(table_settings)
	return [ table.extract() for table in tables ]

	def extract_table(self, table_settings={}):
	tables = self.find_tables(table_settings)
	# Return the largest table, as measured by number of cells.
	def modelfit(alg, dtrain, predictors,useTrainCV=True, cv_folds=5, early_stopping_rounds=50):

	if useTrainCV:
	xgb_param = alg.get_xgb_params()
	xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values)
	cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
	metrics='auc', early_stopping_rounds=early_stopping_rounds, show_progress=False)
	alg.set_params(n_estimators=cvresult.shape[0])

	#Fit the algorithm on the data
	#Import libraries:
	import pandas as pd
	import numpy as np
	import xgboost as xgb
	from xgboost.sklearn import XGBClassifier
	from sklearn import cross_validation, metrics #Additional scklearn functions
	from sklearn.grid_search import GridSearchCV #Perforing grid search

	import matplotlib.pylab as plt
	%matplotlib inline
	car : 52.74809002876282
	car : 54.43572402000427
	car : 61.86940670013428
	car : 64.99541997909546
	car : 54.53670620918274
	car : 54.111236333847046
	car : 55.92341423034668
	person : 54.37796711921692
	person : 61.132240295410156
	car : 70.4900324344635