Skip to content

Instantly share code, notes, and snippets.

@ifduyue
Created December 18, 2011 12:51
Show Gist options
  • Save ifduyue/1493327 to your computer and use it in GitHub Desktop.
Save ifduyue/1493327 to your computer and use it in GitHub Desktop.
some convenient functions
#coding: utf8
from __future__ import with_statement, absolute_import
import os
import re
import sys
import signal
import string
import random
def joinpath(*args):
return os.path.join(*args)
def writeto(path, data):
with open(path, 'wb') as f:
f.write(data)
def appendto(path, data):
with open(path, 'ab') as f:
f.write(data)
def readfrom(path):
with open(path, 'rb') as f:
return f.read()
def dumpto(path, obj, method='marshal'):
if method == 'marshal':
from marshal import dump
elif method == 'pickle':
from pickle import dump
elif method == 'json':
from json import dump
else:
raise Exception('unknown method: ' + method)
with open(path, 'wb') as f:
dump(obj, f)
def loadfrom(path, method='marshal'):
if method == 'marshal':
from marshal import load
elif method == 'pickle':
from pickle import load
elif method == 'json':
from json import dump
else:
raise Exception('unknown method: ' + method)
try:
with open(path, 'rb') as f:
return load(f)
except: pass
return None
def isreadablefile(path):
return os.access(path, os.R_OK) and os.path.isfile(path)
def touch(path):
try:
os.utime(path, None)
except:
open(path, 'a').close()
def mv(f, t):
import shutil
shutil.move(f, t)
# 把进程变为daemon, 并且防止zombie的产生
def daemon():
import os
import sys
pid = os.fork()
if pid != 0:
sys.exit(0)
os.chdir("/")
os.setsid() # Create new session and sets process group.
os.umask(0)
pid = os.fork() # Will have INIT (pid 1) as parent process...
if pid != 0: # if pid is not child...
sys.exit(0)
sys.stdout.flush()
sys.stderr.flush()
si = file("/dev/null", "r")
so = file("/dev/null", "a+")
se = file("/dev/null", "a+", 0)
os.dup2(si.fileno(), sys.stdin.fileno())
os.dup2(so.fileno(), sys.stdout.fileno())
os.dup2(se.fileno(), sys.stderr.fileno())
def is_ipv4(str):
pattern = re.compile(r"""
^
(?:
# Dotted variants:
(?:
# Decimal 1-255 (no leading 0's)
[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}
|
0x0*[0-9a-f]{1,2} # Hexadecimal 0x0 - 0xFF (possible leading 0's)
|
0+[1-3]?[0-7]{0,2} # Octal 0 - 0377 (possible leading 0's)
)
(?: # Repeat 0-3 times, separated by a dot
\.
(?:
[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}
|
0x0*[0-9a-f]{1,2}
|
0+[1-3]?[0-7]{0,2}
)
){0,3}
|
0x0*[0-9a-f]{1,8} # Hexadecimal notation, 0x0 - 0xffffffff
|
0+[0-3]?[0-7]{0,10} # Octal notation, 0 - 037777777777
|
# Decimal notation, 1-4294967295:
429496729[0-5]|42949672[0-8]\d|4294967[01]\d\d|429496[0-6]\d{3}|
42949[0-5]\d{4}|4294[0-8]\d{5}|429[0-3]\d{6}|42[0-8]\d{7}|
4[01]\d{8}|[1-3]\d{0,9}|[4-9]\d{0,8}
)
$
""", re.VERBOSE | re.IGNORECASE)
return pattern.match(str) is not None
def is_ipv6(ip):
"""Validates IPv6 addresses.
"""
pattern = re.compile(r"""
^
\s* # Leading whitespace
(?!.*::.*::) # Only a single whildcard allowed
(?:(?!:)|:(?=:)) # Colon iff it would be part of a wildcard
(?: # Repeat 6 times:
[0-9a-f]{0,4} # A group of at most four hexadecimal digits
(?:(?<=::)|(?<!::):) # Colon unless preceeded by wildcard
){6} #
(?: # Either
[0-9a-f]{0,4} # Another group
(?:(?<=::)|(?<!::):) # Colon unless preceeded by wildcard
[0-9a-f]{0,4} # Last group
(?: (?<=::) # Colon iff preceeded by exacly one colon
| (?<!:) #
| (?<=:) (?<!::) : #
) # OR
| # A v4 address with NO leading zeros
(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)
(?: \.
(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)
){3}
)
\s* # Trailing whitespace
$
""", re.VERBOSE | re.IGNORECASE | re.DOTALL)
return pattern.match(ip) is not None
def is_ip(str):
return is_ipv4(str) or is_ipv6(str)
def ip2string( ip ):
a = (ip & 0xff000000) >> 24
b = (ip & 0x00ff0000) >> 16
c = (ip & 0x0000ff00) >> 8
d = ip & 0x000000ff
return "%d.%d.%d.%d" % (a,b,c,d)
def string2ip( str ):
ss = str.split('.')
ip = 0L
for s in ss: ip = (ip << 8) + int(s)
return ip
def mb_code(s, coding=None):
if isinstance(s, unicode):
return s if coding is None else s.encode(coding)
for c in ('utf-8', 'gb2312', 'gbk', 'gb18030', 'big5'):
try:
s = s.decode(c)
return s if coding is None else s.encode(coding)
except: pass
return s
def decodeHtmlentities(string):
entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
def substitute_entity(match):
from htmlentitydefs import name2codepoint as n2cp
ent = match.group(2)
if match.group(1) == "#":
return unichr(int(ent))
else:
cp = n2cp.get(ent)
if cp:
return unichr(cp)
else:
return match.group()
return entity_re.subn(substitute_entity, string)[0]
def strip_tags(html, blank=True):
if blank == True:
data = re.sub("<.*?>", "\0", html)
data = re.sub("\0+", " ", data)
else:
data = re.sub("<.*?>", "", html)
return data
def decompress(s, encoding='gzip'):
import zlib
if encoding == 'gzip':
return zlib.decompress(s, 16+zlib.MAX_WBITS)
elif encoding == 'raw_deflate':
return zlib.decompress(s, -zlib.MAX_WBITS)
# 'deflate'
return zlib.decompress(s)
def sigint():
def handler(n, f):
os.kill(0, signal.SIGTERM)
sys.exit()
signal.signal(signal.SIGINT, handler)
def randstr(l=4, h=12, chars="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"):
i = random.randint(l, h)
return ''.join(random.choice(chars) for c in xrange(i))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment