Skip to content

Instantly share code, notes, and snippets.

@Ahanmr
Created March 29, 2020 20:50
Show Gist options
  • Save Ahanmr/d7fd78a0a49e2d2f040c153957d9d173 to your computer and use it in GitHub Desktop.
Save Ahanmr/d7fd78a0a49e2d2f040c153957d9d173 to your computer and use it in GitHub Desktop.
def find_tables(self, table_settings={}):
return TableFinder(self, table_settings).tables
def extract_tables(self, table_settings={}):
tables = self.find_tables(table_settings)
return [ table.extract() for table in tables ]
def extract_table(self, table_settings={}):
tables = self.find_tables(table_settings)
# Return the largest table, as measured by number of cells.
sorter = lambda x: (-len(x.cells), x.bbox[1], x.bbox[0])
largest = list(sorted(tables, key=sorter))[0]
return largest.extract()
def extract_text(self,
x_tolerance=utils.DEFAULT_X_TOLERANCE,
y_tolerance=utils.DEFAULT_Y_TOLERANCE):
return utils.extract_text(self.chars,
x_tolerance=x_tolerance,
y_tolerance=y_tolerance)
def extract_words(self,
x_tolerance=utils.DEFAULT_X_TOLERANCE,
y_tolerance=utils.DEFAULT_Y_TOLERANCE,
keep_blank_chars=False):
return utils.extract_words(self.chars,
x_tolerance=x_tolerance,
y_tolerance=y_tolerance,
keep_blank_chars=keep_blank_chars)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment