Skip to content

Instantly share code, notes, and snippets.

View rmax's full-sized avatar
:octocat:
ヾ(⌐■_■)ノ♪

R Max Espinoza rmax

:octocat:
ヾ(⌐■_■)ノ♪
View GitHub Profile
@rmax
rmax / entropy.py
Created January 10, 2020 14:26
Entropy experiment
"""Entropy experiment."""
from dataclasses import dataclass
from math import inf
from secrets import randbits
DEFAULT_STEP: int = 64 # shall we use bigger step?
@dataclass
class Entropy:
from js2xml import parse, pretty_print as tostring
from js2xml.jsonlike import make_dict, getall as get_json_objs
MAKE_DICT_TYPES = (
# Types that can be handled by make_dict.
'array',
'object',
'property',
'string',
@rmax
rmax / elasticsearch.yml
Created January 28, 2017 04:27 — forked from reyjrar/elasticsearch.yml
ElasticSearch config for a write-heavy cluster
##################################################################
# /etc/elasticsearch/elasticsearch.yml
#
# Base configuration for a write heavy cluster
#
# Cluster / Node Basics
cluster.name: logng
# Node can have abritrary attributes we can use for routing
@rmax
rmax / dask_elasticsearch.py
Last active May 3, 2018 13:51
An Elasticsearch reader for Dask
from dask import delayed
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
def read_elasticsearch(query=None, npartitions=8, client_cls=None,
client_kwargs=None, **kwargs):
"""Reads documents from Elasticsearch.
By default, documents are sorted by ``_doc``. For more information see the
@rmax
rmax / dask_avro.py
Last active September 17, 2018 19:28
An Avro reader for Dask (with fastavro)
"""A fastavro-based avro reader for Dask.
Disclaimer: This code was recovered from dask's distributed project.
"""
import io
import fastavro
import json
from dask import delayed
/home/rolando/miniconda3/envs/datascience/lib/python3.5/site-packages/distributed/protocol/pickle.py - INFO - Failed to serialize <_io.BufferedReader name='/home/shared/input-01.jl.gz'>
Traceback (most recent call last):
File "/home/rolando/miniconda3/envs/datascience/lib/python3.5/site-packages/distributed/protocol/pickle.py", line 30, in dumps
result = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
TypeError: cannot serialize '_io.BufferedReader' object
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/rolando/miniconda3/envs/datascience/lib/python3.5/site-packages/distributed/protocol/pickle.py", line 43, in dumps
@rmax
rmax / demo.py
Created September 28, 2016 15:13
settings = {}
bot = scrapy.CrawlerBot(name="mybot/1.0", settings=settings)
def follow_links(response):
for link in response.iter_links():
bot.crawl(link.url, callback=follow_links, referer=response)
bot.emit({
"url": response.url,
"status": response.status,
@rmax
rmax / sqlite-kv-restful.py
Created August 13, 2016 17:11 — forked from georgepsarakis/sqlite-kv-restful.py
Simple SQLite-backed key-value storage Rest API. Built with Flask & flask-restful.
import os
import sqlite3
from hashlib import md5
from time import time
import simplejson as json
from flask import Flask
from flask.ext import restful
from flask import g
from flask import request
import pandas as pd
import numpy as np
#setting up a comparable dataframe
df = pd.DataFrame(np.random.randint(20,100,size=(50, 4)), columns=['A','B','C','D'])
#these two columns become a multi-column index
df['year_idx'] = np.random.randint(2000,2004,50)
df['id_idx'] = np.random.randint(10000,19999,50)
df.drop_duplicates(subset=['year_idx','id_idx'],inplace=True)
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.