Karl Lorey lorey

lorey /
Last active Apr 10, 2021
Access Chrome's network tab (e.g. XHR requests) with Selenium
# This small example shows you how to access JS-based requests via Selenium
# Like this, one can access raw data for scraping,
# for example on many JS-intensive/React-based websites
from time import sleep
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
lorey / block-slack-user.js
Last active Apr 16, 2020
Block a user in Slack
// This will hide all messages from a specific user in Slack. Enjoy the silence.
// get the owner id of a message
// -> loops back through list to find owner
function getOwnerId(i) {
var current = i
var sender = current.querySelector(".c-message__sender_link");
var ownerId = sender ? sender.dataset.messageSender : null;
lorey /
Created Jul 11, 2019
Delete all files that contain a specific string via command line
# say we want to delete all files that contain the string "trash"
# source:
# 1) create a file that lists all files to delete
find .cache/ | xargs grep -l "trash" | awk '{print "rm "$1}' >
# 2) check for errors and stuff
# 3) make the file executable and execute
lorey /
Created May 29, 2019
Function to flatten hierarchical parameters when training a pandas pipeline
def hierarchical_to_flattened_parameters(parameters_dict):
Flatten an hierarchical dict to an sklearn parameter set.
:param parameters_dict: hierarchical dict
:return: flattened dict
return json_normalize(parameters_dict, sep='__').to_dict(orient='records')[0]
lorey /
Created May 21, 2019
Dealing with HTTPSConnectionPool errors in requests with adapters and backoff
# this snippet will deal with errors like HTTPSConnectionPool: Max retries exceeded with url...
# by using a backoff factor
# further reading:
# - docs:
# - stack overflow issue:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
lorey /
Created Mar 16, 2019
Keeping Pandas DataFrames clean when importing JSON
from import json_normalize
df = json_normalize(data)
lorey /
Created Aug 27, 2017
Markdown to Plaintext in Python
from bs4 import BeautifulSoup
from markdown import markdown
def markdown_to_text(markdown_string):
""" Converts a markdown string to plaintext """
# md -> html -> text since BeautifulSoup can extract text cleanly
html = markdown(markdown_string)
# remove code snippets
lorey /
Last active Jul 24, 2017
PostgreSQL tables to csv (Backup database tables as CSV with Pandas)
import os
import pandas as pd
import psycopg2
from dotenv import find_dotenv
from dotenv import load_dotenv
from psycopg2.extras import DictCursor
def main():
lorey /
Created Apr 1, 2017
Selenium: Prevent download dialog and download file automatically
# adapted from
profile = webdriver.FirefoxProfile()
profile.set_preference('', 2) # custom location
profile.set_preference('', False)
profile.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/vcard') # type of file to download
# use the out folder of the script path
profile.set_preference('', os.path.join(os.path.dirname(os.path.abspath(__file__)), 'out'))
mkdir -p ./rs/
for semester in $(seq -f "%02g" 4 15); do
wget -O ./rs/20${semester}_SS_aufgaben.pdf --user "rs" --password "rechner.strukturen"${semester}/kl/aufgaben.pdf ;
wget -O ./rs/20${semester}_WS_aufgaben.pdf --user "rs" --password "rechner.strukturen"${semester}/kl/aufgaben.pdf ;
wget -O ./rs/20${semester}_SS_loesung.pdf --user "rs" --password "rechner.strukturen"${semester}/kl/loesung.pdf ;
wget -O ./rs/20${semester}_WS_loesung.pdf --user "rs" --password "rechner.strukturen"${semester}/kl/loesung.pdf ;