Skip to content

Instantly share code, notes, and snippets.

View clbarnes's full-sized avatar

Chris Barnes clbarnes

View GitHub Profile
@clbarnes
clbarnes / zellij
Created March 10, 2024 21:58
Wrapper script for zellij
#!/bin/bash
# Script which uses the cached zellij if it exists;
# otherwise downloads a fresh copy.
# Deletes the cached copy every month.
set -euo pipefail
launchpath="/tmp/zellij/launch"
timepath="/tmp/zellij/last_updated"
@clbarnes
clbarnes / httpdir.py
Created January 17, 2024 19:03
Given a manifest file, copy a directory tree from a URL base to a local directory
#!/usr/bin/env python3
"""
Copy a directory tree of files over HTTPS.
If HTTP basic auth is required, use an environment variable like
`HTTP_BASIC_AUTH="myuser:mypassword"`.
"""
import os
import sys
from urllib.request import Request, urlopen
from base64 import b64encode
@clbarnes
clbarnes / euler_scraper.py
Created November 3, 2023 13:41
Scrape Project Euler and create stub python scripts for each (ancient code, transferred from old account)
#!/usr/bin/python
from bs4 import BeautifulSoup
import urllib
path = '/home/tunisia/Desktop/Project Euler/'
for probnum in range(1,444):
html = BeautifulSoup(urllib.urlopen('http://projecteuler.net/problem=%d' % probnum))
title = html.h2
@clbarnes
clbarnes / rosalind_scraper.py
Last active November 3, 2023 13:41
Scrape all of the project rosalind problems and create python stubs for each (obsolete code transferred from an old account)
#!/usr/bin/python2
from bs4 import BeautifulSoup
import urllib
path = '/home/tunisia/Projects/rosalind/'
html = BeautifulSoup(urllib.urlopen('http://rosalind.info/problems/list-view/'))
tr_all = html.find_all('tr')
@clbarnes
clbarnes / browser_pool.py
Created August 18, 2023 18:45
Async client for fetching HTML content from pages requiring javascript execution, using a pool of tabs
from contextlib import asynccontextmanager
import asyncio as aio
from playwright.async_api import async_playwright
class BrowserPool:
def __init__(self, n_tabs=10, executable=None) -> None:
self.executable = executable
self.n_tabs = n_tabs
self.tabs_remaining = n_tabs
@clbarnes
clbarnes / aio_ratelimits.py
Created August 9, 2023 11:08
Semaphore-based asyncio rate limiters
import asyncio as aio
from collections import deque
from typing import Awaitable, Iterable, TypeVar
T = TypeVar("T")
class BaseLimit:
async def limit(self, awa: Awaitable[T]) -> T:
@clbarnes
clbarnes / rate_limited_httpx.py
Created July 8, 2023 01:52
httpx.AsyncClient subclass with semaphore-based rate limiting
import asyncio
import datetime as dt
from functools import wraps
from typing import Union
from httpx import AsyncClient
# unless you keep a strong reference to a running task, it can be dropped during execution
# https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task
_background_tasks = set()
@clbarnes
clbarnes / nx_node_link_data_update.py
Created June 1, 2023 11:15
Update networkx v1 node_link_data to v2+
@clbarnes
clbarnes / nx_coalesce_nodes.py
Last active June 1, 2023 11:08
Group nodes together in a networkx.DiGraph
#!/usr/bin/env python3
"""
Group nodes together in a networkx DiGraph.
Requires networkx.
"""
from typing import Hashable, Any, Callable, Optional
import networkx as nx
@clbarnes
clbarnes / dask_open.py
Created February 3, 2023 10:44
Improve ergonomics for creating a dask array from a stack or chunking of files
#!/usr/bin/env python3
"""Load a list of images in dask."""
from pathlib import Path
import re
from typing import Callable, Iterable, Optional
import dask.array as da
from dask import delayed
import numpy as np