Skip to content

Instantly share code, notes, and snippets.

@hughdbrown
hughdbrown / count_stable_particles.py
Last active September 1, 2021 02:47
Come sort of code challenge
from itertools import groupby
def cumulative(arr):
return [arr[i] - arr[i - 1] for i in range(1, len(arr))]
def stable_particle(arr):
"""
>>> stable_particle([-1, 1, 3, 3, 3, 2, 3, 2, 1, 0])
5
@hughdbrown
hughdbrown / median_of_sorted_arrays.py
Last active May 13, 2020 16:35
Median of two sorted arrays
from typing import List
from random import randint
def findMedianSortedArrays(nums1: List[int], nums2: List[int]) -> float:
n1: int = len(nums1)
n2: int = len(nums2)
if n1 > n2:
return findMedianSortedArrays(nums2, nums1)
@hughdbrown
hughdbrown / tfidf.py
Created May 8, 2020 18:13
TFIDF implementation
# Reimplementation of tfidf.py here:
# https://gist.github.com/sloria/6407257
from math import log
from collections import Counter
class TFIDF(object):
def __init__(self, corpus):
self.corpus = corpus
self.ndocs = len(corpus)
# Based on this repository of coronavirus data:
# git clone git@github.com:nytimes/covid-19-data.git
from csv import DictReader
from collections import defaultdict
from pprint import pprint
# fields = ['date', 'state', 'cases', 'deaths']
# Selecting on 'cases' gives results that appear influenced by testing.
# Using 'deaths' appears less influenced by noise and human factors, in my opinion.
@hughdbrown
hughdbrown / check-create-index.py
Created January 9, 2020 18:09
Code to check that create_index.js has collections listed in sequence
def load():
with open("config/create_index.js") as handle:
data = [line.rstrip() for line in handle]
coll_iter = (line.split('.') for line in data)
return [
(i, coll[0] + '.' + coll[1])
for i, coll in enumerate(coll_iter)
if len(coll) >= 2 and coll[0] == 'db'
]
@hughdbrown
hughdbrown / create_table_stmt.py
Created September 5, 2019 18:32
Make a postgresql CREATE TABLE statement by inferring types in a CSV file
import os.path
from csv import DictReader
datafile = os.path.normpath(os.path.expanduser("~/workspace/DataRobot/tests/testdata/10k_diabetes.csv"))
with open(datafile) as handle:
reader = DictReader(handle)
fields = reader.fieldnames
rows = list(reader)
@hughdbrown
hughdbrown / maximum_area.py
Last active August 12, 2019 12:53
Find all connected areas and print size of largest
from queue import Queue
from pprint import pprint
def neighbors(coord):
x, y = coord
return {
(x - 1, y),
(x + 1, y),
(x, y - 1),
(x, y + 1),
@hughdbrown
hughdbrown / absolute_import.py
Last active August 9, 2019 19:34
All your absolute_importing needs
#!/usr/bin/env python
import os
class SedFile(object):
def __init__(self, filename):
self.filename = filename
self.modified = False
with open(self.filename, "r") as handle:
self.data = [line.rstrip() for line in handle]
@hughdbrown
hughdbrown / fixture-resolve.py
Created June 20, 2019 17:04
Script to resolve where a pytest fixture lives. I wish I were joking.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Resolve where some test fixture lives
Provides CLI:
* start -- location in tree of directories to start
* resolve -- resource to resolve
"""
from glob import glob
@hughdbrown
hughdbrown / fixture-resolve.py
Created June 20, 2019 17:04
Script to resolve where a pytest fixture lives. I wish I were joking.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Resolve where some test fixture lives
Provides CLI:
* start -- location in tree of directories to start
* resolve -- resource to resolve
"""
from glob import glob