David Chen lucemia

## gist:6987780

import dev_appserver
dev_appserver.fix_sys_path()

def get_auth():
    import getpass
    return raw_input('Username:'), getpass.getpass('Password:')

def connect(app_id):
    from google.appengine.ext.remote_api import remote_api_stub

## gist:7019481
from lxml.html import parse
from lxml import etree
import cStringIO

def remove_tags(html, strip_tags = ["script"]):
    b = cStringIO.StringIO(html)
    root = parse(b).getroot()
    for tag in strip_tags:
        for element in root.iter(tag):
            element.drop_tree()

## gist:7026005
import re
re_pure_text = re.compile(ur'[\u4e00-\u9fff\w]+', re.UNICODE)

## gist:7052466
#!/usr/bin/env python
#
# Copyright 2010 Facebook
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#

## gist:7208870
SELECT * FROM ProductAd where __key__ = Key('ProductAd', 'yahoo:product:2265602')

## gist:7262390
<iframe frameBorder="0" scrolling="no" width="300" height="250" marginwidth="0" marginheight="0" style="display: visible" src="http://ad.tagtoo.co/ad_g_300x250?pb=66&id=4#q=http%3A%2F%2Fwww.mayuki.com.tw%2F&p=%%SITE%%&cachebuster=%%CACHEBUSTER%%&click=%%CLICK_URL_ESC%%"></iframe>

## gist:7315857

import sys

def extract_jpg(ifilepath):
        ofile = 'test.jpg'
        with open(ifilepath, 'rb') as ifile:
                icontent = ifile.read()
                index  = icontent.index("</panorama>") + len("</panorama>")
#               print index
                icontent = icontent[index:]

## gist:7366507
import random

x = ['x', 2]
y = ['y', 3]
z = ['z', 4]

SAMPLES = 100000
choices = [x,y,z]
total_weight = float(sum([k[1] for k in choices]))
vs = []

## gist:7371993
import random

SAMPLES = 10000
NUM = 5
options = [(k, random.randint(0, 100)) for k in range(NUM)]

choices = list(options)
total_weight = float(sum([k[1] for k in choices]))
vs = []
for i in range(SAMPLES):

## file_split.py

class FileSplitPipe(base_handler.PipelineBase):
    def run(self, input_path, output, shards):
        # from google.appengine.api import files
        # from cStringIO import StringIO
        import time
        import logging

        def readline(_file):
            # TODO: Need to fix it

	import dev_appserver
	dev_appserver.fix_sys_path()

	def get_auth():
	import getpass
	return raw_input('Username:'), getpass.getpass('Password:')

	def connect(app_id):
	from google.appengine.ext.remote_api import remote_api_stub
	from lxml.html import parse
	from lxml import etree
	import cStringIO

	def remove_tags(html, strip_tags = ["script"]):
	b = cStringIO.StringIO(html)
	root = parse(b).getroot()
	for tag in strip_tags:
	for element in root.iter(tag):
	element.drop_tree()
	import re
	re_pure_text = re.compile(ur'[\u4e00-\u9fff\w]+', re.UNICODE)
	#!/usr/bin/env python
	#
	# Copyright 2010 Facebook
	#
	# Licensed under the Apache License, Version 2.0 (the "License"); you may
	# not use this file except in compliance with the License. You may obtain
	# a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#

	import sys

	def extract_jpg(ifilepath):
	ofile = 'test.jpg'
	with open(ifilepath, 'rb') as ifile:
	icontent = ifile.read()
	index = icontent.index("</panorama>") + len("</panorama>")
	# print index
	icontent = icontent[index:]
	import random

	x = ['x', 2]
	y = ['y', 3]
	z = ['z', 4]

	SAMPLES = 100000
	choices = [x,y,z]
	total_weight = float(sum([k[1] for k in choices]))
	vs = []
	import random

	SAMPLES = 10000
	NUM = 5
	options = [(k, random.randint(0, 100)) for k in range(NUM)]

	choices = list(options)
	total_weight = float(sum([k[1] for k in choices]))
	vs = []
	for i in range(SAMPLES):

	class FileSplitPipe(base_handler.PipelineBase):
	def run(self, input_path, output, shards):
	# from google.appengine.api import files
	# from cStringIO import StringIO
	import time
	import logging

	def readline(_file):
	# TODO: Need to fix it