Skip to content

Instantly share code, notes, and snippets.

@bchess
bchess / serialize_mp.py
Last active April 25, 2024 21:00
Example tensorizer serialization in a subprocess to avoid GIL contention
import torch
from tensorizer import TensorSerializer
from transformers import AutoModelForCausalLM
import torch.multiprocessing as mp
def do_serialize(uri, model):
serializer = TensorSerializer(uri)
serializer.write_module(model)
serializer.close()
@bchess
bchess / subprocess_tee.py
Created November 12, 2019 20:37
subprocess.check_output while tee-ing output to stdout/stderr
def check_output_splitlines(args: List[str]) -> List[str]:
"""This is like subprocess.check_output().split('\n'), however it also
copies all stderr and stdout to our own sys.stderr/stdout
"""
stdout_buffer = io.StringIO(newline=None)
# use pty instead of pipes so python subprocesses do not buffer
stdout_ours, stdout_theirs = pty.openpty()
stderr_ours, stderr_theirs = pty.openpty()
""" Emit all nodes (with an optional label selector) that have no pods running except for daemonsets
"""
import sys
from kubernetes import client, config
config.load_kube_config()
node_label_selector = ''
if len(sys.argv) == 2:
node_label_selector = sys.argv[1]
@bchess
bchess / ls_archived_files.py
Created February 12, 2019 21:28
List GS objects that have been deleted
from urllib.parse import urlparse
from google.cloud import storage
import sys
if len(sys.argv) != 2:
print('Usage: %s gs://BUCKETNAME/PREFIX' % (sys.argv[0]), file=sys.stderr)
sys.exit(1)
parse_result = urlparse(sys.argv[1])
@bchess
bchess / bq_expand.py
Created February 6, 2019 06:15
Generate a query that expands repeated fields in bigquery tables
import string
from google.cloud import bigquery
client = bigquery.Client()
DATASET_NAME = 'TODO'
TABLE_NAME = 'TODO'
dataset_ref = client.dataset(DATASET_NAME)
table_ref = dataset_ref.table(TABLE_NAME)
@bchess
bchess / normalize_csv.py
Created January 23, 2019 07:36
Re-format a bunch of CSVs into one giant CSV with all fields
import csv
import sys
all_keys = []
for filename in sys.argv[1:]:
with open(filename) as csvfile:
reader = csv.DictReader(csvfile)
for new_key in reader.fieldnames:
if new_key not in all_keys:
all_keys.append(new_key)
### Keybase proof
I hereby claim:
* I am bchess on github.
* I am bchess (https://keybase.io/bchess) on keybase.
* I have a public key ASBXQb1520fzF187iQXBiuhOs3Jv0N6eNRiUwlqfKgUBBQo
To claim this, I am signing this object:
diff --git a/paasta_tools/utils.py b/paasta_tools/utils.py
index 061a87a..1fecb7c 100644
--- a/paasta_tools/utils.py
+++ b/paasta_tools/utils.py
@@ -1969,3 +1969,10 @@ class _Timeout(object):
exc_info = ret[1]
six.reraise(*exc_info)
raise TimeoutError(self.error_message)
+
+def services_running_here_via_docker():
@bchess
bchess / argparse_test.py
Created February 23, 2017 14:34
argparse parse_known_args()
import argparse
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('--env', '-e', action='append')
parser.add_argument('--hostname', '-h')
known, unknown = parser.parse_known_args(['-e', 'key=value'])
print '1. known', known
print '1. unknown', unknown
assert known.env == ['key=value']