Created
April 27, 2014 23:16
-
-
Save armant/11357872 to your computer and use it in GitHub Desktop.
Shortest snippet of content that contains all N words in any order
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Given a page of content with alphanumeric words, and a search phrase of N words, write an algorithm that will return the shortest | |
#snippet of content that contains all N words in any order. | |
#Example: The George Washington Bridge in New York City is one of the oldest bridges ever constructed. It is now being remodeled | |
#because the bridge is a landmark. City officials say that the landmark bridge effort will create a lot of new jobs in the city. | |
#Search Terms: Landmark City Bridge | |
#Result: bridge is a landmark. City | |
import sys | |
import re | |
def FindShortest(CurrentTerm): | |
for MatchIndex in MatchIndexes[CurrentTerm]: | |
#check | |
#print MatchIndexes[CurrentTerm].index(MatchIndex) | |
global LeftBorder | |
global RightBorder | |
global FinalLeftBorder | |
global FinalRightBorder | |
if CurrentTerm == 0: | |
LeftBorder = MatchIndex | |
RightBorder = MatchIndex + len(SearchTerms[0]) - 1 | |
#check | |
#print ("With MatchIndex ", MatchIndex, " assigned LeftBorder to ", | |
# LeftBorder, " and RightBorder to ", RightBorder) | |
if len(MatchIndexes) > 1: | |
FindShortest(1) | |
else: | |
if FinalRightBorder - FinalLeftBorder > RightBorder - LeftBorder: | |
FinalLeftBorder = LeftBorder | |
FinalRightBorder = RightBorder | |
#check | |
#print ("Changed values with MatchIndex ", MatchIndex, | |
#" and CurrentTerm ", CurrentTerm, " New FinalLeftBorder is ", | |
#FinalLeftBorder, " and new FinalRightBorder is ", FinalRightBorder) | |
else: | |
OptimalRightBorderFound = False | |
OldLeftBorder = LeftBorder | |
OldRightBorder = RightBorder | |
if MatchIndex < LeftBorder: | |
LeftBorder = MatchIndex | |
#check | |
#print "With MatchIndex ", MatchIndex, " assigned LeftBorder to ", LeftBorder | |
elif MatchIndex + len(SearchTerms[CurrentTerm]) - 1 > RightBorder: | |
RightBorder = MatchIndex + len(SearchTerms[CurrentTerm]) - 1 | |
OptimalRightBorderFound = True | |
#check | |
#print "With MatchIndex ", MatchIndex, " assigned RightBorder to ", RightBorder | |
else: | |
OptimalRightBorderFound = True | |
#print "OptimalRightBorderFound is True with MatchIndex ", MatchIndex | |
if CurrentTerm < len(SearchTerms) - 1: | |
FindShortest(CurrentTerm + 1) | |
else: | |
if FinalRightBorder - FinalLeftBorder > RightBorder - LeftBorder: | |
FinalLeftBorder = LeftBorder | |
FinalRightBorder = RightBorder | |
#check | |
#print ("Changed values with MatchIndex ", MatchIndex, | |
# " and CurrentTerm ", CurrentTerm, " New FinalLeftBorder is ", | |
# FinalLeftBorder, " and new FinalRightBorder is ", FinalRightBorder) | |
LeftBorder = OldLeftBorder | |
RightBorder = OldRightBorder | |
#check | |
#print "LeftBorder became ", LeftBorder, " again, and RightBorder became ", RightBorder, " again" | |
if OptimalRightBorderFound: | |
break | |
f = open('input.txt', 'r') | |
#put all text in the file in the string Text | |
Text = "" | |
for line in f: | |
Text += line | |
#check | |
#print Text | |
#remove the last line, containing the search terms, from Text | |
Text = Text[:len(Text) - len(line)] | |
#check | |
#print Text | |
#put search terms, which are on the last line of the text, in the SearchTerms list | |
SearchTerms = re.findall(r'\w+', line) | |
#check | |
#print SearchTerms | |
#record the indexes of all word matches | |
MatchIndexes = [] | |
for Term in SearchTerms: | |
#check | |
#print Term | |
TempList = [] | |
if Term.lower() == Text[:len(Term)].lower(): | |
TempList.append(0) | |
RegexVariable = r"\W" + Term + r"\W" | |
p = re.compile(RegexVariable, re.IGNORECASE) | |
#print p | |
for Matches in p.finditer(Text): | |
#check | |
#print Matches | |
#print Matches.start(), Matches.group() | |
TempList.append(Matches.start() + 1) | |
#check | |
#print TempList | |
if TempList == []: | |
print "At least one of the search terms is not present in the text." | |
sys.exit() | |
MatchIndexes.append(TempList) | |
#check | |
#print MatchIndexes | |
#find the shortest snippet | |
FinalLeftBorder = 0 | |
FinalRightBorder = len(Text) - 1 | |
LeftBorder = 0 | |
RightBorder = 0 | |
FindShortest(0) | |
#check | |
#print FinalLeftBorder, " ", FinalRightBorder | |
#print the result | |
for i in range(FinalLeftBorder, FinalRightBorder + 1): | |
sys.stdout.write(Text[i]) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment