Skip to content

Instantly share code, notes, and snippets.

miraculixx

  • Switzerland
View GitHub Profile
View gitdeploy
#!/usr/bin/env bash
# adopted from https://raw.githubusercontent.com/X1011/git-directory-deploy/master/deploy.sh
#
# changes
# * deploy from current directory is possible
# * set options from either .deploy file or parameters
set -o errexit #abort if any command fails
PREFIX=.
CONFIG=.deploy
@miraculixx
miraculixx / readcsv.py
Created Mar 1, 2019
read a large csv file and subset on datetime column
View readcsv.py
from dateutil.parser import parse
with open('data.csv') as fin:
for line in csv.reader(fin):
date1 = parse(row[1])
date2 = datetime.now() - timedelta(days=3)
if date1 > date2:
continue
process(row) # whatever your logic
@miraculixx
miraculixx / sample.py
Last active Feb 27, 2019
Read arbitrary formatted file into pandas dataframe
View sample.py
import pandas as pd
text = """
Men super men size Energy (J) type num g
----------------------------------------------------------------------
50 1 1 1.0234E+03 A abcd 12.1
20 7 4 5.0211E+02 A2 C agcd 14.1
10 2 3 -1.0347E+02 B2 abkd 72.1
"""
@miraculixx
miraculixx / socratapager.py
Created Feb 22, 2019
sodapy.Socrata extenion to page through results using a single call to get
View socratapager.py
from sodapy import Socrata
class SocrataPager(Socrata):
def get(self, dataset, *args, limit=None, offset=0, where=None, order=None, page_size=None, **kwargs):
""" get dataset by pages or within limit
uses Socrata.get to page through results until limit is reached. This adds the page_size parameter
which is passed as the limit kwarg to Socrata.get. limit is the total number of records to
return, across all pages. Note that the method is a generator.
@miraculixx
miraculixx / requirements.txt
Created Nov 28, 2018
Python multiprocess parallel selenium web scraping with improved performance
View requirements.txt
beautifulsoup4==4.6.3
certifi==2018.10.15
chardet==3.0.4
idna==2.7
lxml==4.2.5
requests==2.20.1
selenium==3.141.0
urllib3==1.24.1
@miraculixx
miraculixx / cities_match.py
Last active Jun 19, 2018
map cities by rotated letters
View cities_match.py
from itertools import combinations
# quick and dirty implementation, not optimized
cities = ['Tokyo', 'London', 'Rome', 'Donlon', 'Kyoto', 'Paris']
normalized = [(c, ''.join(sorted(c.lower()))) for c in cities]
cartesian = list(combinations(normalized, 2))
matches = [[l[0], r[0]] for l, r in cartesian if l[1] == r[1]]
matches.extend([c for c in cities if c not in [m[0] for m in matches] + [m[1] for m in matches]])
matches
# output [['Tokyo', 'Kyoto'], ['London', 'Donlon'], 'Rome', 'Paris']
@miraculixx
miraculixx / pickablecoll.py
Created Apr 30, 2018
pickable mongodb connection object
View pickablecoll.py
class PickableCollection(object):
def __init__(self, collection):
super(PickableCollection, self).__setattr__('collection', collection)
def __getattr__(self, k):
return getattr(self.collection, k)
def __setattr__(self, k, v):
return setattr(self.collection, k, v)
@miraculixx
miraculixx / example.py
Last active Dec 29, 2017
type checking for python functions. this is way better than PEP 484 type hints and it works for Python 2.x and Python 3.x
View example.py
from typecheck import typecheck
@typecheck((int, int), dict(c=(int, type(None))), int)
def foo(a, b, c=None):
print("hello", a, b)
return 5
typecheck.active = False
@typecheck((int, int), dict(c=(int, type(None))), str)
@miraculixx
miraculixx / getlicense.py
Created Jun 29, 2017
getlicense -- collect license information in your project
View getlicense.py
def getlicenses(dir=None):
"""
simple licence collector
"""
import re
dir = dir or '.'
LICENSE_FILES = r'LICENSE.*'
SOURCE_FILES = r'(.*css$)|(.*js$)|(.*min$)|(.*json$)'
LICENSE_IDS = r'.*(MIT|BSD|GPL|GNU.GPL|LPGL|APACHE).*'
SOURCE_LINKS = r'.*(http.*://\W*\s)'
@miraculixx
miraculixx / README.md
Created Sep 5, 2016
start ipython notebook including django
View README.md
import os
if os.environ.get('DJANGO_SETTINGS_MODULE'):
  print "Initializing Django..."
  if not os.path.exists('app/settings.py'):
      os.chdir('..')
  print "(in directory %s)" % os.getcwd()
  try:
    from django_extensions import management
  except BaseException as e:
You can’t perform that action at this time.