Skip to content

Instantly share code, notes, and snippets.

@ihebski
Created October 20, 2020 20:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ihebski/3073daff8e831ae899ab9049d57a651e to your computer and use it in GitHub Desktop.
Save ihebski/3073daff8e831ae899ab9049d57a651e to your computer and use it in GitHub Desktop.
import re
from furl import furl
def param_extract(response, level, black_list, placeholder):
'''
regexp : r'.*?:\/\/.*\?.*\=[^$]'
regexp : r'.*?:\/\/.*\?.*\='
'''
parsed = list(set(re.findall(r'.*?:\/\/.*\?.*\=[^$]' , response)))
final_uris = []
for i in parsed:
delim = i.find('=')
second_delim = i.find('=', i.find('=') + 1)
if len(black_list) > 0:
words_re = re.compile("|".join(black_list))
if not words_re.search(i):
f = furl(i)
if len(f.args) > 0 :
for k in f.args:
f.args[k] = placeholder
final_uris.append(f.url)
#final_uris.append((i[:delim+1] + placeholder))
if level == 'high':
final_uris.append(i[:second_delim+1] + placeholder)
else:
final_uris.append((i[:delim+1] + placeholder))
if level == 'high':
final_uris.append(i[:second_delim+1] + placeholder)
# for i in final_uris:
# k = [ele for ele in black_list if(ele in i)]
return list(set(final_uris))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment