Tristyn Alxander TristynAlxander

## find_longest_similar.py
def find_longest_similar(a,b,is_similar=None,is_ignored = None):
  '''
  This function defines the longest similar sequence between given iterables.
  If no similarity function is given the function uses equality.
  '''

  # Default Functions
  if(not is_similar): is_similar = lambda x,y: x==y
  if(not is_ignored): is_ignored = lambda _:False


## spacy_paragraphs_pipeline.py
@spacy.language.Language.component("paragraph_parser")
def define_paragraphs(document):
  """
  DESCRIPTION:
    A SpaCy pipeline that adds a paragraphs parser (python generator) as a document attribute.
    The pipeline identifies paragraphs either by single-line-break or multi-line-break depending on what's used less frequently for sentences.
    The defined paragraphs include the whitespace tokens on either side to allow users to use the built-in sents parser.
  USAGE:
    nlp.add_pipe('paragraph_parser')
    doc = nlp(document_str)

## print_syntactic_tree.py
# Imports
import benepar, spacy
# Load Parser
nlp = spacy.load("en_core_web_lg")
nlp.add_pipe('benepar', config={'model': 'benepar_en3'})

# Define Print Syntactic Tree
def print_tree(phrase_list,i=0):
  # Correct Input
  if( isinstance(phrase_list,spacy.tokens.span.Span) ):

## akta_pure_csv_to_dataframe.py
# AKTA Pure
def akta_pure_csv_to_dataframe(csv_file_name):
  """
  DESCRIPTION:
    This Function uses the pandas python module to convert csv files produced by akta pure machines into dictionaries of dataframes.
  ARGUMENTS:
    csv_file_name = str: path to csv file produced by akta pure machine.
  ERRORS:
    None?
  RETURN:
	def find_longest_similar(a,b,is_similar=None,is_ignored = None):
	'''
	This function defines the longest similar sequence between given iterables.
	If no similarity function is given the function uses equality.
	'''

	# Default Functions
	if(not is_similar): is_similar = lambda x,y: x==y
	if(not is_ignored): is_ignored = lambda _:False
	@spacy.language.Language.component("paragraph_parser")
	def define_paragraphs(document):
	"""
	DESCRIPTION:
	A SpaCy pipeline that adds a paragraphs parser (python generator) as a document attribute.
	The pipeline identifies paragraphs either by single-line-break or multi-line-break depending on what's used less frequently for sentences.
	The defined paragraphs include the whitespace tokens on either side to allow users to use the built-in sents parser.
	USAGE:
	nlp.add_pipe('paragraph_parser')
	doc = nlp(document_str)
	# Imports
	import benepar, spacy
	# Load Parser
	nlp = spacy.load("en_core_web_lg")
	nlp.add_pipe('benepar', config={'model': 'benepar_en3'})

	# Define Print Syntactic Tree
	def print_tree(phrase_list,i=0):
	# Correct Input
	if( isinstance(phrase_list,spacy.tokens.span.Span) ):
	# AKTA Pure
	def akta_pure_csv_to_dataframe(csv_file_name):
	"""
	DESCRIPTION:
	This Function uses the pandas python module to convert csv files produced by akta pure machines into dictionaries of dataframes.
	ARGUMENTS:
	csv_file_name = str: path to csv file produced by akta pure machine.
	ERRORS:
	None?
	RETURN: