Skip to content

Instantly share code, notes, and snippets.

@kenthzhang
Created April 28, 2017 23:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kenthzhang/e20bb9755481fe8088790a8ce653384b to your computer and use it in GitHub Desktop.
Save kenthzhang/e20bb9755481fe8088790a8ce653384b to your computer and use it in GitHub Desktop.
cc <- rxSparkConnect(reset = TRUE, nameNode="wasb://benchmark@airlinecranrscaler.blob.core.windows.net/", driverMem="4g", numExecutors=4, executorCores=5, executorMem="16g", executorOverheadMem="4g")
# prepare hive data 1M, 2M, 5M, 10M, 100M
hdfs <- RxHdfsFileSystem(hostName="wasb://benchmark@airlinecranrscaler.blob.core.windows.net/")
airText <- RxTextData("wasb://benchmark@airlinecranrscaler.blob.core.windows.net/airline/csv/airOT1M", firstRowIsColNames=T, fileSystem=hdfs)
airHive <- RxHiveData(table="airOT1M")
rxDataStep(airText, airHive)
airText <- RxTextData("wasb://benchmark@airlinecranrscaler.blob.core.windows.net/airline/csv/airOT2M", firstRowIsColNames=T, fileSystem=hdfs)
airHive <- RxHiveData(table="airOT2M")
rxDataStep(airText, airHive)
airText <- RxTextData("wasb://benchmark@airlinecranrscaler.blob.core.windows.net/airline/csv/airOT5M", firstRowIsColNames=T, fileSystem=hdfs)
airHive <- RxHiveData(table="airOT5M")
rxDataStep(airText, airHive)
airText <- RxTextData("wasb://benchmark@airlinecranrscaler.blob.core.windows.net/airline/csv/airOT10M", firstRowIsColNames=T, fileSystem=hdfs)
airHive <- RxHiveData(table="airOT10M")
rxDataStep(airText, airHive)
airText <- RxTextData("wasb://benchmark@airlinecranrscaler.blob.core.windows.net/airline/csv/airOT100M", firstRowIsColNames=T, fileSystem=hdfs)
airHive <- RxHiveData(table="airOT100M")
rxDataStep(airText, airHive)
rxSparkDisconnect(cc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment