Skip to content

Instantly share code, notes, and snippets.

@teticio
Last active February 14, 2022 15:06
Show Gist options
  • Save teticio/88f760869ba7e7f2570e3474da0eafab to your computer and use it in GitHub Desktop.
Save teticio/88f760869ba7e7f2570e3474da0eafab to your computer and use it in GitHub Desktop.
Rank competitors by leveraging Google's autocomplete function while searching for "<target> vs"
import argparse
import requests
from tqdm import tqdm
from pprint import pprint
from itertools import islice
from urllib.parse import quote
from functools import lru_cache
from collections import Counter, OrderedDict
@lru_cache
def autocomplete(request):
response = requests.get(
f"http://google.com/complete/search?output=toolbar&client=chrome&q={quote(request)}"
)
return [item[len(request) + 1:] for item in response.json()[1]
], response.json()[4]['google:suggestrelevance']
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('target', type=str)
parser.add_argument('--top_k', type=int)
parser.add_argument('--iterations', type=int)
args = parser.parse_args()
top_k = args.top_k or 10
iterations = args.iterations or 10
items = Counter({args.target: 0})
for iteration in tqdm(range(iterations)):
for keyword in items.copy():
try:
request = f"{keyword} vs"
keywords, scores = autocomplete(request)
new_items = Counter(dict(zip(keywords, scores)))
items = Counter(dict((items + new_items).most_common(top_k)))
except:
continue
pprint(items)
@teticio
Copy link
Author

teticio commented Feb 14, 2022

e.g.
python competitive-landscape.py mlflow

Counter({'airflow': 68241,
         'kubeflow': 41095,
         'mlflow': 36177,
         'sagemaker': 24768,
         'argo': 21076,
         'prefect': 16668,
         'weights and biases': 12518,
         'luigi': 11286,
         'scale and polish': 11277,
         'metaflow': 11259})

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment