short url: caseywatts.com/selfpublish
my book is out! an applied psychology / self-help book targeted at developers: Debugging Your Brain
Markdown
--> PDF
(as a booklet!)
Markdown
--> EPUB
and MOBI
short url: caseywatts.com/selfpublish
my book is out! an applied psychology / self-help book targeted at developers: Debugging Your Brain
Markdown
--> PDF
(as a booklet!)
Markdown
--> EPUB
and MOBI
# | |
# Some constants | |
# | |
aws_profile = "your_profile" | |
aws_region = "your_region" | |
s3_bucket = "your_bucket" | |
# | |
# Reading environment variables from aws credential file | |
# |
license: WTFPL |
import pandas as pd | |
def _map_to_pandas(rdds): | |
""" Needs to be here due to pickling issues """ | |
return [pd.DataFrame(list(rdds))] | |
def toPandas(df, n_partitions=None): | |
""" | |
Returns the contents of `df` as a local `pandas.DataFrame` in a speedy fashion. The DataFrame is | |
repartitioned if `n_partitions` is passed. |
#!/usr/bin/env python | |
# encoding: utf-8 | |
import tweepy #https://github.com/tweepy/tweepy | |
import csv | |
#Twitter API credentials | |
consumer_key = "" | |
consumer_secret = "" | |
access_key = "" |
""" | |
A webserver to test Google OAuth in a couple of scenarios. | |
""" | |
import argparse | |
import time | |
import tornado.ioloop | |
import tornado.web | |
import tornado.auth | |
import tornado.gen |
#!/bin/sh | |
TABLE_SCHEMA=$1 | |
TABLE_NAME=$2 | |
mytime=`date '+%y%m%d%H%M'` | |
hostname=`hostname | tr 'A-Z' 'a-z'` | |
file_prefix="trimax$TABLE_NAME$mytime$TABLE_SCHEMA" | |
bucket_name=$file_prefix | |
splitat="4000000000" | |
bulkfiles=200 |
package org.mazerunner.core.programs | |
import org.apache.spark.graphx.{Graph, EdgeTriplet, VertexId} | |
import org.mazerunner.core.abstractions.PregelProgram | |
/** | |
* @author kbastani | |
* The [[MaximumValueProgram]] is an example graph algorithm implemented on the [[PregelProgram]] | |
* abstraction. | |
*/ |
import multiprocessing | |
import pandas as pd | |
import numpy as np | |
def _apply_df(args): | |
df, func, kwargs = args | |
return df.apply(func, **kwargs) | |
def apply_by_multiprocessing(df, func, **kwargs): | |
workers = kwargs.pop('workers') |