Using Requests and Beautiful Soup, with the most recent Beautiful Soup 4 docs.
Install our tools (preferably in a new virtualenv):
pip install beautifulsoup4
Using Requests and Beautiful Soup, with the most recent Beautiful Soup 4 docs.
Install our tools (preferably in a new virtualenv):
pip install beautifulsoup4
import hashlib | |
import sys | |
from concurrent.futures import ProcessPoolExecutor | |
from time import sleep, time | |
def t1(n): | |
"""Silly function whose time increases as n does.""" | |
for i in range(n): |
from BeautifulSoup import BeautifulSoup | |
def _remove_attrs(soup): | |
for tag in soup.findAll(True): | |
tag.attrs = None | |
return soup | |
def example(): | |
doc = '<html><head><title>test</title></head><body id="foo" onload="whatever"><p class="whatever">junk</p><div style="background: yellow;" id="foo" class="blah">blah</div></body></html>' |
#!/usr/bin/env python | |
""" | |
Spawn a lot of publishers. | |
""" | |
import json | |
import os | |
import random | |
import sys |
""" | |
Use a Counter to find the most common words in "The Wonderful Wizard of Oz" by | |
L. Frank Baum. | |
Available in (mostly) plain text at: | |
https://archive.org/stream/wonderfulwizardo00baumiala/wonderfulwizardo00baumiala_djvu.txt | |
Note: This code also counts the words in the header, so it's not a *realistic* | |
applicaton, but more of a demonstration of python's Counter. |
""" | |
A function to enumerate and print a list of items horizontally using the | |
space available in your terminal (optionally, without wrapping text). | |
Example: | |
>>> friendly.test() | |
1. a cat 2. a dog 3. Three pairs of socks 4. A kick-ass bicycle | |
5. chickens 6. A quick brown fox 7. and a lazy dog | |
8. Lorem ipsum dolor sit amet, consectetur |
#!/usr/bin/env python | |
""" | |
Test if we can reliably figure out the uncompressed size of .gz file... | |
""" | |
import gzip | |
import os | |
import subprocess | |
# read from a csv | |
# http://docs.python.org/2/library/csv.html#csv.reader | |
import csv | |
def read_csv_file(filename): | |
# read a csv file and return a list of lists | |
results = [] | |
with open('analytics.csv', 'rU') as f: | |
myreader = csv.reader(f) | |
for row in myreader: |
#!/usr/bin/env python | |
""" | |
process some data | |
""" | |
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor | |
from joblib import Parallel, delayed | |
import os | |
import statistics |