- あらかじめ aws cli の設定をすませておいてください
- あとは以下のスクリプトを順番に実行してください
./emr_create_spark_cluster.sh`
./emr_pyspark_wc.sh`
import math | |
import csv | |
import random | |
random.seed(10) | |
n_numbers = 3 # how many numbers do we want to operate on | |
largest = 10 #largest INT |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from pyspark.sql import SparkSession | |
import sys | |
if __name__ == "__main__": | |
hive_table_name = sys.argv[1] | |
out_path = sys.argv[2] | |
# setup and surpress create _$folder$ object to S3 | |
ss = SparkSession.builder \ |
install.packages("fitbitScraper") | |
devtools::install_github("trinker/plotflow") | |
library(fitbitScraper) | |
library(ggplot2) | |
library(plotflow) | |
library(dplyr) | |
# Fitbitからのデータ取得 | |
cookie = login(email="www.fitbit.com ログイン用メールアドレス", password="www.fitbit.com ログイン用パスワード") | |
hr_data <- get_intraday_data(cookie, what="heart-rate", date="2016-10-06") |
# -*- coding: utf-8 -*- | |
import numpy as np | |
class Person(): | |
def __init__(self, n): | |
self.preference = np.random.permutation(range(n)) | |
self.preferred_queue = [] | |
self.position = None |
library(dplyr) | |
# 選択肢1, 2のデータを作成して結合 | |
# 項目は以下の通り | |
# クラス番号 | |
# 候補者1支持ダミー | |
# 候補者2支持ダミー | |
# 価値観1 | |
# 価値観2 | |
location1 <- matrix(c(rep(1, 100), |
library(glmnet) | |
library(caret) | |
library(psych) | |
# load data | |
data(cars) | |
pairs.panels(cars) | |
# lm | |
fit.lm <- glm(Price ~ ., data = cars) |
library(xgboost) | |
library(Matrix) | |
# load data | |
data = read.delim("data/sample.tsv", sep="\t") | |
data$v6 = NULL | |
# create data for k-fold cross validation | |
cv = function(d, k) { | |
n = sample(nrow(d), nrow(d)) |
library(randomForest) | |
# load data | |
data = read.delim("data/sample.tsv", sep="\t") | |
# create data for k-fold cross validation | |
cv = function(d, k) { | |
n = sample(nrow(d), nrow(d)) | |
d.randomized = data[n,] # randomize data | |
n.residual = k-nrow(d)%%k |