Skip to content

Instantly share code, notes, and snippets.

@SuryaSankar
Last active August 29, 2015 14:15
Show Gist options
  • Save SuryaSankar/8e9a4bcd6007d8506290 to your computer and use it in GitHub Desktop.
Save SuryaSankar/8e9a4bcd6007d8506290 to your computer and use it in GitHub Desktop.
Python Utils
#########################################################
# Generic methods independent of any App logic here.
# This module is meant to be a Python tool set.
#########################################################
from itertools import chain, groupby
from operator import attrgetter
from contextlib import contextmanager
from inspect import ismethod
import re
from datetime import datetime, timedelta
from decimal import Decimal
import uuid
import os
from werkzeug.utils import secure_filename
import math
from flask import current_app
EMAIL_REGEX = re.compile(r"[^@]+@[^@]+\.[^@]+")
def union(list_of_lists):
return list(set.union(*[set(l) for l in list_of_lists]))
def intersection(list_of_lists):
return list(set.intersection(*[set(l) for l in list_of_lists]))
def difference(list1, list2):
return list(set(list1).difference(set(list2)))
def random_string(length=None):
string = str(uuid.uuid4()).replace('-', '')
if length:
string = string[:length]
return string
def npartition(string, n=1, delimiter=' '):
"""
Similar to python's built in partition method. But will
split at the nth occurence of delimiter
"""
groups = string.split(delimiter)
return (delimiter.join(groups[:n]), delimiter, delimiter.join(groups[n:]))
def percentage(numerator, denominator):
value = float(numerator)*100/float(denominator)
return math.ceil(value*100)/100
def generate_unique_file_name(seed_name):
return "%s_%s_%s" % (ist_now().strftime("%Y%m%d_%H%M%S%f"),
uuid.uuid4().hex[0:6],
secure_filename(seed_name))
def save_with_unique_name(_file, location):
filename = generate_unique_file_name(_file.filename)
_file.save(os.path.join(location, filename))
return (filename, _file)
def is_email(mailstr):
"""
Checks if a string matches the Email regex
"""
return ((isinstance(mailstr, str) or isinstance(mailstr, unicode))
and bool(EMAIL_REGEX.match(mailstr)))
def place_nulls(key, input_keyvals, output_results):
"""
Useful in a specific case when you want to verify that a mapped list
object returns objects corresponding to input list.
Hypothetical example:
Customer.raw_get_all([1,2,3]) returns [C1, C3]
There is no customer with id 2 in db. But this is bad for us becaause
we will want to iterate like zip (input, output) if possible. So we
need to place nulls wherever the list is missing an output value. This
function will make the list as [C1, None, C3]
"""
if len(input_keyvals) != len(output_results):
for index, keyval in enumerate(input_keyvals):
try:
if getattr(output_results[index], key) != keyval:
output_results.insert(index, None)
except IndexError:
output_results.insert(index, None)
return output_results
def subdict(dictionary, keys):
"""
>>>a={1:3, 4:5, 6:7}
>>>subdict(a, [4,6])
{4: 5, 6: 7}
"""
return (dict((k, dictionary[k]) for k in keys if k in dictionary)
if len(keys) > 0 else dictionary)
def add_kv_to_dict(dictionary, key, value):
"""
>>> a={1:3, 4:5, 6:7}
>>> add_kv_to_dict(a, 6, 9)
{1: 3, 4: 5, 6: 9}
"""
return dict(chain(dictionary.items(), [(key, value)]))
def merge(dict1, dict2):
"""
>>> a={1:2, 3:4}
>>> b={5:6, 7:8}
>>> merge(a,b)
{1: 2, 3: 4, 5: 6, 7: 8}
"""
return dict(chain(dict1.iteritems(), dict2.iteritems()))
def add_kv_if_absent(dictionary, key, value):
if key not in dictionary:
dictionary[key] = value
return dictionary
def has_duplicates(l):
return len(l) != len(set(l))
def monetize(number):
"""
Function used for rounding off numbers to a fixed number of
places.
>>> monetize(3.4389)
Decimal('3.44')
>>> monetize(3.4334)
Decimal('3.43')
"""
return Decimal(number).quantize(Decimal('.01'))
def sanitize(source_dict, whitelist, additional_params={}, keys_to_modify={},
value_filterer=lambda v: False):
"""
>>> raw={'name': 'surya', 'email': 'surya@s.com', 'bad_param': '<SCRIPT></SCRIPT>'}
>>> raw={'name': 'surya', 'email': 'surya@s.com', 'bad_param': '<SCRIPT></SCRIPT>', 'postalcode': '55544'}
>>> sanitize(raw, whitelist=['name', 'email', 'postalcode'], additional_params={'state': 'Wyoming', 'country': 'US'}, keys_to_modify={'postalcode':'zipcode'})
{'email': 'surya@s.com', 'state': 'Wyoming', 'country': 'US', 'zipcode': '55544', 'name': 'surya'}
"""
items = source_dict.items()
for k, v in items:
if k not in whitelist or value_filterer(v):
del source_dict[k]
elif k in keys_to_modify:
source_dict[keys_to_modify[k]] = v
del source_dict[k]
return merge(source_dict, additional_params)
def is_int(s):
if isinstance(s, int):
return True
else:
try:
int(s)
return True
except:
return False
def flatten(list_of_lists):
"""
>>> flatten([[1,2], [3,4,5]])
[1, 2, 3, 4, 5]
"""
return [item for sublist in list_of_lists for item in sublist]
def filtered_list(olist, exclude_list):
return filter(lambda i: i not in exclude_list, olist)
def getattr_safe(obj, attr):
attr = getattr(obj, attr)
if ismethod(attr):
return attr()
return attr
def serialize_attrs(obj, *args):
"""
>>> serialize_attrs(Customer.first(), 'id', 'email')
{'id': 1L, 'email': u'mr.@howe.com'}
"""
return dict((arg, getattr_safe(obj, arg)) for arg in args)
def grouplist(olist, key):
"""
>>> customers
[fulton@hills.com, maximus@collins.com, metta@nienow.com, mr.@howe.com, ruby@bogisich-watsica.biz]
>>> customers[0].city="Delhi"
>>> customers[1].city="Delhi"
>>> customers[2].city="Mumbai"
>>> customers[3].city="Mumbai"
>>> customers[4].city="Chennai"
>>> grouplist(customers, 'city')
{'Chennai': [ruby@bogisich-watsica.biz],
'Delhi': [fulton@hills.com, maximus@collins.com],
'Mumbai': [metta@nienow.com, mr.@howe.com]}
"""
return dict(
(k, list(items))
for k, items in group(olist, key))
def group(olist, key):
"""
Same functionality as grouplist. But returns iterator instead of list
"""
return groupby(olist, key=attrgetter(key))
def deep_group(olist, keys, sort_attr=None, serializer=None,
attr_to_show=None,
serializer_args=[], serializer_kwargs={},
strip_single_object_lists=False):
"""
>>> customers[0].country="India"
>>> customers[0].state="UP"
>>> customers[0].city="Delhi"
>>> customers[1].country="India"
>>> customers[1].state="UP"
>>> customers[1].city="Delhi"
>>> customers[2].country="India"
>>> customers[2].state="UP"
>>> customers[2].city="Agra"
>>> customers[3].country="India"
>>> customers[3].state="TN"
>>> customers[3].city="Chennai"
>>> customers[4].country="China"
>>> customers[4].state="Tibet"
>>> customers[4].city="Lhasa"
>>> deep_group(customers, keys=['country', 'state', 'city'])
{'China': {'Tibet': {'Lhasa': [ruby@bogisich-watsica.biz]}},
'India': {'TN': {'Chennai': [mr.@howe.com]},
'UP': {'Delhi': [fulton@hills.com, maximus@collins.com],
'Agra': [metta@nienow.com]}}}
"""
if len(keys) == 0:
return olist
if sort_attr:
olist.sort(key=attrgetter(sort_attr))
result = {}
for k, items in group(olist, keys[0]):
items = list(items)
if len(keys) == 1:
if strip_single_object_lists and len(items) == 1:
if serializer:
result[k] = getattr(
items[0], serializer)(*serializer_args,
**serializer_kwargs)
elif attr_to_show:
result[k] = getattr(
items[0], attr_to_show)
else:
result[k] = items[0]
else:
if serializer:
result[k] = [getattr(
item, serializer)(*serializer_args,
**serializer_kwargs)
for item in items]
elif attr_to_show:
result[k] = [getattr(
items, attr_to_show) for item in items]
else:
result[k] = items
else:
result[k] = deep_group(
items, keys[1:],
serializer=serializer,
serializer_args=serializer_args,
serializer_kwargs=serializer_kwargs,
strip_single_object_lists=strip_single_object_lists)
return result
def strip_bad_chars(string, chars=[' ', '_', ';', ',', '"', "'"]):
for char in chars:
string = string.replace(char, '')
return string
def append_if_absent(l, i):
if i not in l:
l.append(i)
return l
def add_to_list_in_dict(item, list_name, dictionary):
append_if_absent(set_if_absent_and_get(dictionary, list_name, []), item)
@contextmanager
def use_and_throw(d, k):
if k in d:
yield getattr(d, k)
del d.k
def unix_time(dt):
epoch = datetime.utcfromtimestamp(0)
delta = dt - epoch
return delta.total_seconds()
def unix_time_millis(dt):
return int(unix_time(dt) * 1000)
def ist_now():
return datetime.utcnow()+timedelta(minutes=330)
def local_time():
return datetime.utcnow() + timedelta(minutes=current_app.config[
'TIME_DELTA'])
def public_dict(obj):
d = dict((k, v) for k, v in obj.__dict__.iteritems()
if not k.startswith('_'))
return d
def pop_items(l):
while len(l) > 0:
yield l.pop()
def putdict(d, k, dv):
if k not in d:
d[k] = dv
return d[k]
def set_if_absent_and_get(d, k, dv):
if k not in d:
setattr(d, k, dv)
return getattr(d, k)
def get_if(obj, attr):
return getattr(obj, attr) if obj else None
def join(words, delimiter=","):
return delimiter.join([str(w)for w in words if w is not None])
def abbreviated_name(name, append_digit=None):
"""
Returns a readable abbreviated name by removing vowels from the middle of
the string and keeping the first and last letters of words.
The final result is trimmed to 6 characters
>>>abbreviated_name("I convert caffeine to code")
'ICNVRT'
"""
vowels = ['A', 'E', 'I', 'O', 'U']
def words_of(name):
return name.split()
def strip_vowels(word):
if len(word) > 2:
return word[0]+filter(lambda l:
l not in vowels, word[1:-1]) + word[-1]
else:
return word
concatenate = ''.join
abbr = concatenate([strip_vowels(word) for word in words_of(
name.upper())])[0:6]
if append_digit:
abbr = abbr+str(append_digit)
return abbr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment