Skip to content

Instantly share code, notes, and snippets.

@dardanxhymshiti
Last active July 3, 2020 15:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dardanxhymshiti/7c174abd5776d8d640e0f571df8f81f0 to your computer and use it in GitHub Desktop.
Save dardanxhymshiti/7c174abd5776d8d640e0f571df8f81f0 to your computer and use it in GitHub Desktop.
def get_consequent_title_words(text):
import re
pattern_compiled = re.compile(r'([A-Z][^\.!?]*[\.!?])', re.M)
list_of_sentences = re.findall(pattern_compiled, text)
list_of_sentence_tokens = [sentence.split(' ') for sentence in list_of_sentences]
list_of_consequent_tokens = list()
for tokens in list_of_sentence_tokens:
temp_list_of_title_tokens = list()
for index, t in enumerate(tokens):
if t.title() == t:
temp_list_of_title_tokens.append(t)
elif len(temp_list_of_title_tokens) != 0 or index == len(tokens):
list_of_consequent_tokens.append(' '.join(temp_list_of_title_tokens))
temp_list_of_title_tokens = list()
if len(temp_list_of_title_tokens) != 0:
list_of_consequent_tokens.append(' '.join(temp_list_of_title_tokens))
return list_of_consequent_tokens
# Test
text = """The Reds, who were give a guard of honour by the hosts beffore kick-off, could have taken the lead at Etihad Stadium but Mohamed Salah's low drive cannoned off the post."""
get_consequent_title_words(text)
# """
# ['The Reds,', 'Etihad Stadium', "Mohamed Salah's"]
# """
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment