This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Use Gists to store code you would like to remember later on | |
console.log(window); // log the "window" object to the console |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.functions import split, regexp_extract | |
split_df = base_df.select(regexp_extract('value', r'^([^\s]+\s)', 1).alias('host'), | |
regexp_extract('value', r'^.*\[(\d\d/\w{3}/\d{4}:\d{2}:\d{2}:\d{2} -\d{4})]', 1).alias('timestamp'), | |
regexp_extract('value', r'^.*"\w+\s+([^\s]+)\s+HTTP.*"', 1).alias('path'), | |
regexp_extract('value', r'^.*"\s+([^\s]+)', 1).cast('integer').alias('status'), | |
regexp_extract('value', r'^.*\s+(\d+)$', 1).cast('integer').alias('content_size')) | |
split_df.show(truncate=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import hashlib | |
from elasticsearch import Elasticsearch | |
from elasticsearch import helpers | |
from tqdm import tqdm | |
class Storage: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
logging.basicConfig( | |
format='%(asctime)s,%(msecs)05.1f (%(funcName)s) %(message)s', | |
datefmt='%H:%M:%S') | |
log = logging.getLogger() | |
log.setLevel(logging.INFO) | |
import threading | |
import os | |
import time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# List unique values in a DataFrame column | |
pd.unique(df.column_name.ravel()) | |
# Convert Series datatype to numeric, getting rid of any non-numeric values | |
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True) | |
# Grab DataFrame rows where column has certain values | |
valuelist = ['value1', 'value2', 'value3'] | |
df = df[df.column.isin(valuelist)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: '2' | |
services: | |
db: | |
image: mysql:5.7 | |
volumes: | |
- db_data:/var/lib/mysql | |
restart: always | |
environment: | |
MYSQL_ROOT_PASSWORD: wordpress |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Convert wide format csv to long format csv | |
# Time Temp1 Temp2 Temp3 Temp4 Temp5 | |
# 00 21 32 33 21 23 | |
# 10 34 23 12 08 23 | |
# 20 12 54 33 54 55 | |
with open("in.csv") as f,open("out.csv","w") as out: | |
headers = next(f).split()[1:] # keep headers/Time Temp1 Temp2 Temp3 Temp4 Temp5 | |
for row in f: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# GET /_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ "match": { "doc.title": "Search" }}, | |
{ "match": { "doc.content": "Elasticsearch" }} | |
], | |
"filter": [ | |
{ "term": { "doc.status": "published" }}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# use un-analyzed fields | |
{ | |
"aggs" : { | |
"domain" : { | |
"terms" : { | |
"field" : "doc.domain.keyword", | |
"size" : 4, | |
"collect_mode" : "breadth_first" | |
}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Do this as early as possible in your application: | |
from gevent import monkey; monkey.patch_all() | |
from tornado.web import RequestHandler, asynchronous | |
import gevent | |
class MyHandler(RequestHandler): | |
@asynchronous | |
def get(self, *args, **kwargs): | |
def async_task(): |
OlderNewer