Assuming you've installed the AWS SDK for Python:
pip install awscli
pip install boto3
Next, run aws configure
on your laptop or server to add your AWS credentials.
For details, see
$ ./bin/spark-shell | |
14/04/18 15:23:49 INFO spark.HttpServer: Starting HTTP Server | |
14/04/18 15:23:49 INFO server.Server: jetty-7.x.y-SNAPSHOT | |
14/04/18 15:23:49 INFO server.AbstractConnector: Started SocketConnector@0.0.0.0:49861 | |
Welcome to | |
____ __ | |
/ __/__ ___ _____/ /__ | |
_\ \/ _ \/ _ `/ __/ '_/ | |
/___/ .__/\_,_/_/ /_/\_\ version 0.9.1 | |
/_/ |
2014-03-04 | 15dfb8e6cc4111e3a5bb600308919594 | 11 | |
---|---|---|---|
2014-03-06 | 81da510acc4111e387f3600308919594 | 61 |
// load error messages from a log into memory | |
// then interactively search for various patterns | |
// base RDD | |
val lines = sc.textFile("log.txt") | |
// transformed RDDs | |
val errors = lines.filter(_.startsWith("ERROR")) | |
val messages = errors.map(_.split("\t")).map(r => r(1)) | |
messages.cache() |
vagrant up | |
vagrant ssh | |
cd jem | |
nbserver |
# using four part files to construct "minitweet" | |
cat rawtweets/part-0000[1-3] > minitweets | |
# change log4j properties to WARN to reduce noise during demo | |
mv conf/log4j.properties.template conf/log4j.properties | |
vim conf/log4j.properties # Change to WARN | |
# launch Spark shell REPL | |
./bin/spark-shell |
import nltk | |
nltk.download() | |
## use nltk.download() within a Python prompt to | |
## download the `punkt` data | |
## Anaconda is recommended, to pick up NumPy, NLTK, etc. | |
## http://continuum.io/downloads | |
## this also requires TextBlob/PerceptronTagger |
import org.apache.spark.graphx._ | |
import org.apache.spark.rdd.RDD | |
case class Peep(name: String, age: Int) | |
val vertexArray = Array( | |
(1L, Peep("Kim", 23)), | |
(2L, Peep("Pat", 31)), | |
(3L, Peep("Chris", 52)), | |
(4L, Peep("Kelly", 39)), |
<html> | |
<head><title>test pdf</title></head> | |
<div id="pdf" | |
style="width:900px; height:500px" | |
></div> | |
<script src="https://pdfobject.com/js/pdfobject.min.js"></script> | |
<script> | |
var options = { | |
pdfOpenParams: { | |
page: 1, |
#!/usr/bin/env python | |
# encoding: utf-8 | |
import codecs | |
import csv | |
import json | |
import unicodedata | |
filename = "SurveyofDoctorateRecipients_linkages.csv" | |
#filename = "SurveyofEarnedDoctorates_linkages.csv" |
Assuming you've installed the AWS SDK for Python:
pip install awscli
pip install boto3
Next, run aws configure
on your laptop or server to add your AWS credentials.
For details, see