Skip to content

Instantly share code, notes, and snippets.

View elephantum's full-sized avatar

Andrey Tatarinov elephantum

View GitHub Profile
# -*- coding: utf-8 -*-
# apt-get install python-pandas python-boto
AWS_ACCESS_KEY_ID =
AWS_SECRET_ACCESS_KEY =
import datetime
import gzip
import json
log_format jsonlog '{'
'"time_local": "$time_local", '
'"request": "$request", '
'"msec": "$msec", '
'"status": "$status", '
'"body_bytes_sent": "$body_bytes_sent", '
'"request_time": "$request_time", '
'"remote_addr": "$remote_addr", '
'"remote_user": "$remote_user", '
ideas <- function(x) {
Sys.sleep(1)
re_coordinates = 'shadow%3Afalse%7C(\\d+\\.\\d+)%2C(\\d+\\.\\d+)'
url <- paste('http://moscowidea.ru/ideas/', x, sep="")
read_data <- function(){
thepage = readLines(url)
coord_page <- thepage[grep(re_coordinates, thepage)]
mypattern_in = 'part_idea__summary__suggestion'
mypattern_out = 'part_idea__summary__purpose'
sugg_start = grep(mypattern_in, thepage) + 2
<p
data-style-tablet_portrait="font-size: 37px; line-height: 46px; padding-right: 0px;"
data-style-phone_portrait="font-size: 28px; line-height: 35px; padding-right: 0px;"
data-class-tablet_portrait="empty"
data-class-phone_portrait="empty"
data-size-leading-linked-tablet_portrait="true"
data-size-leading-linked-phone_portrait="true"
data-size-leading-ratio-tablet_portrait="1.25"
data-size-leading-ratio-phone_portrait="1.25"
style="font-size: 31px; line-height: 39px; padding-right: 0px;"
monkeyBook:search elephantum$ mvn compile
[INFO] Scanning for projects...
[INFO] Reactor build order:
[INFO] Parent pom
[INFO] Core search library
[INFO] Search daemon (base/meta)
[INFO] Proxying highlighter service
[INFO] Dictionary checker
WAGON_VERSION: 1.0-beta-2
[INFO] ------------------------------------------------------------------------
import re
import csv
import glob
re_userno = re.compile(r'MBC0812-user(\d+)', re.I)
def get_no(filename):
return re_userno.match(filename).group(1)
re_treename = re.compile(r'"TREENAME"="MBC0812-user(\d+)"', re.I | re.M)
def replace_treename(no, content):
# автор Сибирев Андрей
from operator import itemgetter
from time import sleep
from random import random
import threading
import Queue
def sleeper(arg):
sleep(random())
import os
import itertools
CHUNK_SIZE = 100
def read_lines_backwards(filename):
f = file(filename, 'rb')
f.seek(0, os.SEEK_END)
left = ''
import re, sys, os
from datetime import datetime
from collections import defaultdict
class Request():
def __init__(self, id, start):
self.id = id
self.start = start
import time
import tornado.ioloop
import tornado.httpclient
import logging
logging.basicConfig(level=logging.DEBUG)
io_loop = tornado.ioloop.IOLoop.instance()
interrupts_count = 0