Skip to content

Instantly share code, notes, and snippets.

@ismasan
ismasan / gist:3804361
Created September 29, 2012 15:32 — forked from mattetti/gist:3798173
async fetching of urls using goroutines and channels
package main
import (
"fmt"
"net/http"
"time"
)
var urls = []string{
"http://pulsoconf.co/",
@racitup
racitup / html_to_text.py
Last active July 29, 2021 09:43
Extract text from html in python using BeautifulSoup4
from bs4 import BeautifulSoup, NavigableString, Tag
def html_to_text(html):
"Creates a formatted text email message as a string from a rendered html template (page)"
soup = BeautifulSoup(html, 'html.parser')
# Ignore anything in head
body, text = soup.body, []
for element in body.descendants:
# We use type and not isinstance since comments, cdata, etc are subclasses that we don't want
if type(element) == NavigableString:
@antonydevanchi
antonydevanchi / kolduvachestvo.sh
Created July 27, 2021 03:32
Как читать SMS с блядского USB-модема «Yota 4G LTE» через консоль
#!/usr/bin/env bash
# Ahalai-mahalai
FLAG="GET_RCV_SMS_LOCAL"
PAGE="1"
# Krible-krable
XMLRESPONSE=$(curl -sL -XPOST 'http://10.0.0.1/xml_action.cgi?method=set&module=duster&file=message' \
@spikeekips
spikeekips / es-dump-index.py
Last active September 10, 2022 00:01
The simple script to dump the data from ElasticSearch for inserting by bulk API.
# -*- coding: utf-8 -*-
"""
################################################################################
Dump ElasticSearch index for inserting BULK
################################################################################
requires `rawes`.
for more details, run `-h` to show help message.
@mattetti
mattetti / gist:3798173
Last active April 16, 2023 03:09
async fetching of urls using goroutines and channels
package main
import (
"fmt"
"net/http"
"time"
)
var urls = []string{
"https://splice.com/",
@wshayes
wshayes / python_example.py
Created December 24, 2019 18:46
[python multiprocessing example] writing to file from a queue #python #multiprocessing
# https://stackoverflow.com/a/13530258/886938
import multiprocessing as mp
import time
fn = 'c:/temp/temp.txt'
def worker(arg, q):
'''stupidly simulates long running process'''
start = time.clock()
@neu5ron
neu5ron / valid_domain_name_regex
Last active July 2, 2023 10:40
Valid domain name regex including internationalized domain name
domain_regex = r'(([\da-zA-Z])([_\w-]{,62})\.){,127}(([\da-zA-Z])[_\w-]{,61})?([\da-zA-Z]\.((xn\-\-[a-zA-Z\d]+)|([a-zA-Z\d]{2,})))'
#Python
domain_regex = '{0}$'.format(domain_regex)
valid_domain_name_regex = re.compile(domain_regex, re.IGNORECASE)
self.domain_name = self.domain_name.lower().strip().encode('ascii')
if re.match(valid_domain_name_regex, self.domain_name ):
return True
else:
return False
@snakers4
snakers4 / parse_cc_index.py
Last active September 14, 2023 20:00
Plain common crawl pre-processing
import gc
import gzip
import time
import json
import shutil
import os,sys
import tldextract
import collections
import pandas as pd
from tqdm import tqdm