Skip to content

Instantly share code, notes, and snippets.

@CHBaker
Created February 8, 2018 02:05
Show Gist options
  • Save CHBaker/bbf56d01795ef7eee7f27267d4fccf00 to your computer and use it in GitHub Desktop.
Save CHBaker/bbf56d01795ef7eee7f27267d4fccf00 to your computer and use it in GitHub Desktop.
indexing function for storing website keywords and URL's
# skips repeat keywords and URL's
index = []
def find(keyword, index):
for i, item in enumerate(index):
try:
r = item.index(keyword)
except ValueError:
continue
yield i, r
def add_to_index(index,keyword,url):
pos = [match for match in find(keyword, index)]
if pos == []:
return index.append([keyword, [url]])
try:
is_url = index[(pos[0][0])][1].index(url)
except ValueError:
is_url = False
return
if not is_url:
return index[(pos[0][0])][1].append(url)
# 1
add_to_index(index,'cats','http://cats.com')
# 2
add_to_index(index,'cats','http://cats.com')
# 3
add_to_index(index,'dogs','http://dogs.org')
# 4
add_to_index(index,'chewed-gum','http://chewed-gum.org')
# RESULTS:
# 1 >>> [['cats', ['http://cats.com', 'http://cats.com']]
# 2 >>> [['cats', ['http://cats.com', 'http://cats.com']]
# 3 >>> [
# ['cats', ['http://cats.com', 'http://cats.com'],
# ['dogs', ['http://dogs.com']]
# ]
# 4 >>> [
# ['cats', ['http://cats.com', 'http://cats.com'],
# ['dogs', ['http://dogs.com']],
# ['chewed-gum', ['http://chewed-gum.org']]
# ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment