Skip to content

Instantly share code, notes, and snippets.

import itertools
def grouper(n, iterable):
it = iter(iterable)
while True:
chunk = tuple(itertools.islice(it, n))
if not chunk:
return
yield chunk
import json
import time
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
def process_browser_log_entry(entry):
entry["message"] = json.loads(entry["message"])["message"]
return entry
# https://www.elastic.co/blog/loading-wikipedia
# https://github.com/wikimedia/search-extra
# bin/elasticsearch-plugin install analysis-icu
# bin/elasticsearch-plugin install org.wikimedia.search:extra:6.3.1.2
# Variables
es="host.docker.internal:9200"
site="en.wikipedia.org"
index="enwiki"