Download from here:
MD5: 59bab8f71f8c096cd3f72cd73851515d
Rename it to:
import pycurl | |
from cStringIO import StringIO | |
import time | |
from collections import deque | |
from itertools import islice | |
from urlparse import urlsplit | |
import select | |
# curl_multi_perform -> transfer data on ready sockets & get num of active handlers | |
# curl_multi_fdset -> extract handlers to use in select/poll |
import pycurl | |
import urllib | |
import copy | |
import re | |
import signal | |
import os | |
import random | |
from urlparse import urlsplit | |
from libpy.html import detect_encoding |
class DocumentProcessor(object): | |
WORKER_TASK_SIZE = 100#00 | |
SAVER_CHUNK_LIMIT = 1000 | |
WORKER_POOL_SIZE = 10 | |
def __init__(self, result_file): | |
self.result_file = result_file | |
self.id_cache = [] | |
self.task_queue = Queue() | |
self.result_queue = Queue() |
Download from here:
MD5: 59bab8f71f8c096cd3f72cd73851515d
Rename it to:
[DllImport("kernel32.dll", CharSet = CharSet.Auto)] | |
private static extern bool FreeLibrary(IntPtr hModule); | |
[DllImport("kernel32.dll", CharSet = CharSet.Ansi)] | |
private static extern IntPtr GetProcAddress(IntPtr hModule, string lpProcName); | |
// Load the DLL file | |
IntPtr Handle = LoadLibrary(fileName); | |
if (Handle == IntPtr.Zero) | |
{ |