Skip to content

Instantly share code, notes, and snippets.

View rafaelnovello's full-sized avatar

Rafael Novello rafaelnovello

View GitHub Profile
import urllib3
import requests
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def test(url, report):
res = requests.get(url, headers = {'User-agent': 'your bot 0.1'}, verify=False)
if res.status_code == 200:
report['success'].append(res)
# reference: https://hub.docker.com/_/ubuntu/
FROM python:3.8-slim-buster
# Adds metadata to the image as a key value pair example LABEL version="1.0"
LABEL maintainer="Rafael Novello <rafa.reis.novello@gmail.com>"
# Set environment variables
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
@rafaelnovello
rafaelnovello / marvin_example.py
Created June 27, 2019 22:37
Marvin Engine Example Code
// Data Acquisitor
from marvin_python_toolbox.common.data import MarvinData
import pandas as pd
file_path = MarvinData.download_file(url="https://s3.amazonaws.com/marvin-engines-data/Iris.csv")
iris = pd.read_csv(file_path)
iris.drop('Id',axis=1,inplace=True)
marvin_initial_dataset = iris
@rafaelnovello
rafaelnovello / README.md
Created June 6, 2018 19:49 — forked from curran/README.md
The Iris Dataset

This is the "Iris" dataset. Originally published at UCI Machine Learning Repository: Iris Data Set, this small dataset from 1936 is often used for testing out machine learning algorithms and visualizations (for example, Scatter Plot). Each row of the table represents an iris flower, including its species and dimensions of its botanical parts, sepal and petal, in centimeters.

The HTML page provides the basic code required to load the data and display it on the page (as JSON) using D3.js.

Built with blockbuilder.org

web counter
@rafaelnovello
rafaelnovello / openml_test.py
Last active August 22, 2019 11:52
Testing OpenML API
import openml as oml
tasks = oml.tasks.list_tasks(task_type_id=1, size=200)
print(len(tasks))
names = [v['source_data'] for _, v in tasks.items()]
print(len(set(names)))
results = oml.evaluations.list_evaluations('predictive_accuracy', size=100, task=[8])
print(len(results))
flows = [v.flow_name for _, v in results.items()]
from datetime import datetime
links = [
'https://g1.globo.com/rj/rio-de-janeiro/noticia/pf-cumpre-mandados-em-mais-um-desdobramento-da-lava-jato-no-rio.ghtml',
'https://g1.globo.com/sp/sao-paulo/noticia/ataque-a-tiros-mata-1-e-deixa-2-feridas-em-frente-a-hotel-em-sp-veja-video.ghtml',
'https://globoesporte.globo.com/olimpiadas-de-inverno/noticia/nervosa-isadora-williams-sofre-queda-na-final-da-patinacao-e-chora-muito-triste.ghtml',
'https://g1.globo.com/agenda-do-dia/noticia/sexta-feira-23-de-fevereiro.ghtml',
'https://g1.globo.com/rj/rio-de-janeiro/noticia/general-richard-nunez-sera-o-secretario-de-seguranca-do-rj.ghtml',
'https://g1.globo.com/rj/rio-de-janeiro/noticia/comandante-de-instituicao-de-ensino-da-pm-do-rj-e-exonerado-policiais-denunciam-abusos.ghtml',
@rafaelnovello
rafaelnovello / flat_dataframe.py
Created January 17, 2018 16:34
Code to duplicate row for each value in an array cell
def flat(df):
new = pd.DataFrame()
for i, s in df.iterrows():
emails = s['emails'].strip('[]').replace("'", "").split(',')
for e in emails:
x = s.copy()
x['emails'] = e
new = new.append(x, ignore_index=True)
return new
import sys
import logging
from flask import Flask
from raven.conf import setup_logging
from raven.handlers.logging import SentryHandler
from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.ext.flask.middleware import XRayMiddleware
ERROR:slimta.queue:exception:ConnectionLost:unhandled args=() message='' traceback=['Traceback (most recent call last):\n', ' File "/usr/local/lib/python2.7/dist-packages/slimta/queue/__init__.py", line 289, in _attempt\n self.relay._attempt(envelope, attempts)\n', ' File "/usr/local/lib/python2.7/dist-packages/slimta/relay/__init__.py", line 86, in _attempt\n return self.attempt(envelope, attempts)\n', ' File "/usr/local/lib/python2.7/dist-packages/slimta/relay/smtp/mx.py", line 208, in attempt\n return relayer.attempt(envelope, attempts)\n', ' File "/usr/local/lib/python2.7/dist-packages/slimta/relay/pool.py", line 89, in attempt\n return result.get()\n', ' File "/usr/local/lib/python2.7/dist-packages/gevent/event.py", line 223, in get\n raise self._exception\n', ...]
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/gevent/greenlet.py", line 390, in run
result = self._run(*self.args, **self.kwargs)
File "/usr/local/lib/python2.7/dist-packages/slimta
from xlwt import Workbook, easyxf
colors = ['aqua', 'black', 'blue', 'blue_gray', 'bright_green', 'brown', 'coral', 'cyan_ega',
'dark_blue', 'dark_blue_ega', 'dark_green', 'dark_green_ega', 'dark_purple', 'dark_red',
'dark_red_ega', 'dark_teal', 'dark_yellow', 'gold', 'gray_ega', 'gray25', 'gray40', 'gray50',
'gray80', 'green', 'ice_blue', 'indigo', 'ivory', 'lavender', 'light_blue', 'light_green',
'light_orange', 'light_turquoise', 'light_yellow', 'lime', 'magenta_ega', 'ocean_blue',
'olive_ega', 'olive_green', 'orange', 'pale_blue', 'periwinkle', 'pink', 'plum', 'purple_ega',
'red', 'rose', 'sea_green', 'silver_ega', 'sky_blue', 'tan', 'teal', 'teal_ega', 'turquoise',
'violet', 'white', 'yellow']