Skip to content

Instantly share code, notes, and snippets.

View Ahanmr's full-sized avatar
🎯
Focusing

Ahan M R Ahanmr

🎯
Focusing
  • Bangalore, India
View GitHub Profile
We couldn’t find that file to show.
import re, os
from collections import Counter
from symspellpy.symspellpy import SymSpell as SymSpellPy, Verbosity
class SpellCheck:
def __init__(self, dictionary=None, verbose=0):
self.verbose = verbose
self.dictionary = dictionary
def correction(self, text):
"""
Problem:
How to Convert PDF to Image with Python Script ?
$ sudo apt-get install libmagickwand-dev
$ pip install Wand
"""
from PIL import Image as Img
import PIL.Image
import PIL.ImageDraw
import wand.image
import sys, os
from io import BytesIO
def draw_rect(self, bbox_or_obj,
fill=DEFAULT_FILL,
stroke=DEFAULT_STROKE,
stroke_width=DEFAULT_STROKE_WIDTH):
import io
from PIL import Image
import pytesseract
from wand.image import Image as wi
pdf = wi(filename = "sample2.pdf", resolution = 300)
pdfImage = pdf.convert('jpeg')
imageBlobs = []
class TableFinder(object):
"""
Given a PDF page, finds table structures.
"""
def __init__(self, page, settings={}):
for k in settings.keys():
if k not in DEFAULT_TABLE_SETTINGS:
raise ValueError("Unrecognized table setting: '{0}'".format(
k
))
def find_tables(self, table_settings={}):
return TableFinder(self, table_settings).tables
def extract_tables(self, table_settings={}):
tables = self.find_tables(table_settings)
return [ table.extract() for table in tables ]
def extract_table(self, table_settings={}):
tables = self.find_tables(table_settings)
# Return the largest table, as measured by number of cells.
def modelfit(alg, dtrain, predictors,useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
if useTrainCV:
xgb_param = alg.get_xgb_params()
xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain[target].values)
cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
metrics='auc', early_stopping_rounds=early_stopping_rounds, show_progress=False)
alg.set_params(n_estimators=cvresult.shape[0])
#Fit the algorithm on the data
#Import libraries:
import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn import cross_validation, metrics #Additional scklearn functions
from sklearn.grid_search import GridSearchCV #Perforing grid search
import matplotlib.pylab as plt
%matplotlib inline
car : 52.74809002876282
car : 54.43572402000427
car : 61.86940670013428
car : 64.99541997909546
car : 54.53670620918274
car : 54.111236333847046
car : 55.92341423034668
person : 54.37796711921692
person : 61.132240295410156
car : 70.4900324344635