DardanX dardanxhymshiti

## generate_tables_in_msword.py
from docx import Document
from docx.shared import Cm, Pt


article_1 = """Bayern Munich came out on top in a thrilling German Cup final, beating Bayer Leverkusen 4-2 to secure its 20th title and remain on course for an historic treble.
David Alaba's stunning free kick and Serge Gnabry's clinical finish gave Bayern a commanding lead heading into half time and Hans-Dieter Flick's side seemingly already had one hand on the trophy.
However, Leverkusen responded well early in the second half and had a golden opportunity to halve the deficit through substitute Kevin Volland."""

article_2 = """(CNN)Liverpool got its Premier League title-winning celebrations back on track with a 2-0 win over Aston Villa, just days after being on the receiving end of a record-equaling defeat.
Many had suggested Jurgen Klopp's side was suffering from something of a hangover during Thursday's 4-0 demolition at the hands of Manchester City -- the joint-heaviest defeat by a team already crowned Premier League champion -- but Liverpool re

## describe_text.py
def describe_text(text):
    import re, string

    description = dict()

    # remove punctuation marks
    text_wo_punctuation_marks = re.sub(f'[%s]' % re.escape(string.punctuation), '', text)

    # tokens of the text without punctuation marks
    tokens_of_text_wo_punctuation_marks = text_wo_punctuation_marks.split(' ')

## remove_punctuation_marks.py
def remove_punctuation_marks(text):
    import string
    import re
    pattern = f'[%s]' % re.escape(string.punctuation)
    text_wo_punctuation_marks = re.sub(pattern, '', text)
    return text_wo_punctuation_marks


# Test
text = """Hello, World!"""

## get_text_within_brackets.py
def get_text_within_brackets(text):
    import re
    pattern = r"[\(|\[|\{](.*?)[\)|\]|\}]"
    list_of_findings = re.findall(pattern, text)
    return list_of_findings


# Test
text = '''I was very surprised (and it's pretty hard to surprise me!)... He [Felix] is a gret friends of me...'''
get_text_within_brackets(text)

## get_consequent_title_words.py
def get_consequent_title_words(text):
    import re
    pattern_compiled = re.compile(r'([A-Z][^\.!?]*[\.!?])', re.M)
    list_of_sentences = re.findall(pattern_compiled, text)
    list_of_sentence_tokens = [sentence.split(' ') for sentence in list_of_sentences]

    list_of_consequent_tokens = list()
    for tokens in list_of_sentence_tokens:
        temp_list_of_title_tokens = list()
        for index, t in enumerate(tokens):

## get_context.py
def get_context(text, list_of_tokens, context_span=20):
    import re
    context = []
    for token in list_of_tokens:
        all_occurences_indices = [m.start() for m in re.finditer(token, text)]
        for index in all_occurences_indices:
            left_index = max(index - context_span, 0)
            right_index = min(index + context_span, len(text))
            substring = text[left_index: right_index].strip()


## get_sentences.py
def get_sentences(text):
    import re
    pattern = r'([A-Z][^\.!?]*[\.!?])'
    pattern_compiled = re.compile(pattern, re.M)
    list_of_sentences = re.findall(pattern, text)
    return list_of_sentences


# Test
text = """This is the most frequent question we're asked by prospective students. And our response? Absolutely! We've trained people from all walks of life."""

## get_capital_words.py
def get_capital_words(text):
    import re
    pattern = r'(\b[A-Z]{2,}\b)'
    list_of_capital_words = re.findall(pattern, text)
    return list_of_capital_words;


# Test
text = """Thank you! Your customer service request has been logged. A specialist will reach out by EOD"""
get_capital_words(text)

## get_text_within_quotes.py
def get_text_within_quotes(text):
    import re
    pattern = "\"(.*?)\""
    list_of_findings = re.findall(pattern, text)
    return list_of_findings


# Test
text = """The sign said, "Walk". Then it said, "Don't Walk" then, "Walk" all within thirty seconds"""
get_text_within_quotes(text)

## get_numbers_from_text.py
def get_numbers_from_text(text):
    import re
    pattern = '[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?'
    list_of_numbers = re.findall(pattern, text)
    return list_of_numbers


# Test
text = """A rise in cases was re[prted acrpss a staggering 36 US states last week. In Florida, officals recorded 9,585 new cases on Saturday."""
get_numbers_from_text(text)
	from docx import Document
	from docx.shared import Cm, Pt


	article_1 = """Bayern Munich came out on top in a thrilling German Cup final, beating Bayer Leverkusen 4-2 to secure its 20th title and remain on course for an historic treble.
	David Alaba's stunning free kick and Serge Gnabry's clinical finish gave Bayern a commanding lead heading into half time and Hans-Dieter Flick's side seemingly already had one hand on the trophy.
	However, Leverkusen responded well early in the second half and had a golden opportunity to halve the deficit through substitute Kevin Volland."""

	article_2 = """(CNN)Liverpool got its Premier League title-winning celebrations back on track with a 2-0 win over Aston Villa, just days after being on the receiving end of a record-equaling defeat.
	Many had suggested Jurgen Klopp's side was suffering from something of a hangover during Thursday's 4-0 demolition at the hands of Manchester City -- the joint-heaviest defeat by a team already crowned Premier League champion -- but Liverpool re
	def describe_text(text):
	import re, string

	description = dict()

	# remove punctuation marks
	text_wo_punctuation_marks = re.sub(f'[%s]' % re.escape(string.punctuation), '', text)

	# tokens of the text without punctuation marks
	tokens_of_text_wo_punctuation_marks = text_wo_punctuation_marks.split(' ')
	def remove_punctuation_marks(text):
	import string
	import re
	pattern = f'[%s]' % re.escape(string.punctuation)
	text_wo_punctuation_marks = re.sub(pattern, '', text)
	return text_wo_punctuation_marks


	# Test
	text = """Hello, World!"""
	def get_text_within_brackets(text):
	import re
	pattern = r"[\(\|\[\|\{](.*?)[\)\|\]\|\}]"
	list_of_findings = re.findall(pattern, text)
	return list_of_findings


	# Test
	text = '''I was very surprised (and it's pretty hard to surprise me!)... He [Felix] is a gret friends of me...'''
	get_text_within_brackets(text)
	def get_consequent_title_words(text):
	import re
	pattern_compiled = re.compile(r'([A-Z][^\.!?]*[\.!?])', re.M)
	list_of_sentences = re.findall(pattern_compiled, text)
	list_of_sentence_tokens = [sentence.split(' ') for sentence in list_of_sentences]

	list_of_consequent_tokens = list()
	for tokens in list_of_sentence_tokens:
	temp_list_of_title_tokens = list()
	for index, t in enumerate(tokens):
	def get_context(text, list_of_tokens, context_span=20):
	import re
	context = []
	for token in list_of_tokens:
	all_occurences_indices = [m.start() for m in re.finditer(token, text)]
	for index in all_occurences_indices:
	left_index = max(index - context_span, 0)
	right_index = min(index + context_span, len(text))
	substring = text[left_index: right_index].strip()
	def get_sentences(text):
	import re
	pattern = r'([A-Z][^\.!?]*[\.!?])'
	pattern_compiled = re.compile(pattern, re.M)
	list_of_sentences = re.findall(pattern, text)
	return list_of_sentences


	# Test
	text = """This is the most frequent question we're asked by prospective students. And our response? Absolutely! We've trained people from all walks of life."""
	def get_capital_words(text):
	import re
	pattern = r'(\b[A-Z]{2,}\b)'
	list_of_capital_words = re.findall(pattern, text)
	return list_of_capital_words;


	# Test
	text = """Thank you! Your customer service request has been logged. A specialist will reach out by EOD"""
	get_capital_words(text)
	def get_text_within_quotes(text):
	import re
	pattern = "\"(.*?)\""
	list_of_findings = re.findall(pattern, text)
	return list_of_findings


	# Test
	text = """The sign said, "Walk". Then it said, "Don't Walk" then, "Walk" all within thirty seconds"""
	get_text_within_quotes(text)
	def get_numbers_from_text(text):
	import re
	pattern = '[-+]?[.]?[\d]+(?:,\d\d\d)[\.]?\d(?:[eE][-+]?\d+)?'
	list_of_numbers = re.findall(pattern, text)
	return list_of_numbers


	# Test
	text = """A rise in cases was re[prted acrpss a staggering 36 US states last week. In Florida, officals recorded 9,585 new cases on Saturday."""
	get_numbers_from_text(text)