Skip to content

Instantly share code, notes, and snippets.

@armant

armant/gist:11357872

Created Apr 27, 2014
Embed
What would you like to do?
Shortest snippet of content that contains all N words in any order
#Given a page of content with alphanumeric words, and a search phrase of N words, write an algorithm that will return the shortest
#snippet of content that contains all N words in any order.
#Example: The George Washington Bridge in New York City is one of the oldest bridges ever constructed. It is now being remodeled
#because the bridge is a landmark. City officials say that the landmark bridge effort will create a lot of new jobs in the city.
#Search Terms: Landmark City Bridge
#Result: bridge is a landmark. City
import sys
import re
def FindShortest(CurrentTerm):
for MatchIndex in MatchIndexes[CurrentTerm]:
#check
#print MatchIndexes[CurrentTerm].index(MatchIndex)
global LeftBorder
global RightBorder
global FinalLeftBorder
global FinalRightBorder
if CurrentTerm == 0:
LeftBorder = MatchIndex
RightBorder = MatchIndex + len(SearchTerms[0]) - 1
#check
#print ("With MatchIndex ", MatchIndex, " assigned LeftBorder to ",
# LeftBorder, " and RightBorder to ", RightBorder)
if len(MatchIndexes) > 1:
FindShortest(1)
else:
if FinalRightBorder - FinalLeftBorder > RightBorder - LeftBorder:
FinalLeftBorder = LeftBorder
FinalRightBorder = RightBorder
#check
#print ("Changed values with MatchIndex ", MatchIndex,
#" and CurrentTerm ", CurrentTerm, " New FinalLeftBorder is ",
#FinalLeftBorder, " and new FinalRightBorder is ", FinalRightBorder)
else:
OptimalRightBorderFound = False
OldLeftBorder = LeftBorder
OldRightBorder = RightBorder
if MatchIndex < LeftBorder:
LeftBorder = MatchIndex
#check
#print "With MatchIndex ", MatchIndex, " assigned LeftBorder to ", LeftBorder
elif MatchIndex + len(SearchTerms[CurrentTerm]) - 1 > RightBorder:
RightBorder = MatchIndex + len(SearchTerms[CurrentTerm]) - 1
OptimalRightBorderFound = True
#check
#print "With MatchIndex ", MatchIndex, " assigned RightBorder to ", RightBorder
else:
OptimalRightBorderFound = True
#print "OptimalRightBorderFound is True with MatchIndex ", MatchIndex
if CurrentTerm < len(SearchTerms) - 1:
FindShortest(CurrentTerm + 1)
else:
if FinalRightBorder - FinalLeftBorder > RightBorder - LeftBorder:
FinalLeftBorder = LeftBorder
FinalRightBorder = RightBorder
#check
#print ("Changed values with MatchIndex ", MatchIndex,
# " and CurrentTerm ", CurrentTerm, " New FinalLeftBorder is ",
# FinalLeftBorder, " and new FinalRightBorder is ", FinalRightBorder)
LeftBorder = OldLeftBorder
RightBorder = OldRightBorder
#check
#print "LeftBorder became ", LeftBorder, " again, and RightBorder became ", RightBorder, " again"
if OptimalRightBorderFound:
break
f = open('input.txt', 'r')
#put all text in the file in the string Text
Text = ""
for line in f:
Text += line
#check
#print Text
#remove the last line, containing the search terms, from Text
Text = Text[:len(Text) - len(line)]
#check
#print Text
#put search terms, which are on the last line of the text, in the SearchTerms list
SearchTerms = re.findall(r'\w+', line)
#check
#print SearchTerms
#record the indexes of all word matches
MatchIndexes = []
for Term in SearchTerms:
#check
#print Term
TempList = []
if Term.lower() == Text[:len(Term)].lower():
TempList.append(0)
RegexVariable = r"\W" + Term + r"\W"
p = re.compile(RegexVariable, re.IGNORECASE)
#print p
for Matches in p.finditer(Text):
#check
#print Matches
#print Matches.start(), Matches.group()
TempList.append(Matches.start() + 1)
#check
#print TempList
if TempList == []:
print "At least one of the search terms is not present in the text."
sys.exit()
MatchIndexes.append(TempList)
#check
#print MatchIndexes
#find the shortest snippet
FinalLeftBorder = 0
FinalRightBorder = len(Text) - 1
LeftBorder = 0
RightBorder = 0
FindShortest(0)
#check
#print FinalLeftBorder, " ", FinalRightBorder
#print the result
for i in range(FinalLeftBorder, FinalRightBorder + 1):
sys.stdout.write(Text[i])
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment