Created
May 16, 2017 15:33
-
-
Save gadamc/83e17d237cae05e75519b9c705a1ade2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"language_info": { | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"version": "2.7.11", | |
"pygments_lexer": "ipython2", | |
"name": "python", | |
"codemirror_mode": { | |
"version": 2, | |
"name": "ipython" | |
}, | |
"nbconvert_exporter": "python" | |
}, | |
"kernelspec": { | |
"display_name": "Python 2 with Spark 2.0", | |
"language": "python", | |
"name": "python2-spark20" | |
} | |
}, | |
"cells": [ | |
{ | |
"metadata": { | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": " % Total % Received % Xferd Average Speed Time Time Time Current\n Dload Upload Total Spent Left Speed\n100 18.8M 0 18.8M 0 0 17.9M 0 --:--:-- 0:00:01 --:--:-- 18.0M\n", | |
"name": "stdout" | |
} | |
], | |
"execution_count": 5, | |
"source": "!curl https://setigopublic.mybluemix.net/v1/aca/meta/all > signaldb.csv.gz" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "total 27G\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 1.7K Jun 7 2016 mtcars.csv\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 311K Jun 17 2016 anothertry.png\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 831K Aug 17 2016 airports.dat\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 178K Aug 18 2016 cars.csv\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 4.9G Aug 23 2016 2001-2008-merged.csv\r\n-rw------- 1 sc3e-53554f95eddadf-4e28db014a7c users 3.7G Oct 31 2016 core.20161031.115443.30198.0001.dmp\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 101M Oct 31 2016 heapdump.20161031.115443.30198.0002.phd\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 356K Oct 31 2016 javacore.20161031.115443.30198.0003.txt\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 492K Oct 31 2016 Snap.20161031.115443.30198.0004.trc\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 101M Oct 31 2016 heapdump.20161031.115512.30198.0005.phd\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 313K Oct 31 2016 javacore.20161031.115512.30198.0006.txt\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 28K Oct 31 2016 Snap.20161031.115512.30198.0007.trc\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 106M Oct 31 2016 heapdump.20161031.115844.30198.0008.phd\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 357K Oct 31 2016 javacore.20161031.115844.30198.0009.txt\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 452K Oct 31 2016 Snap.20161031.115844.30198.0010.trc\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 106M Oct 31 2016 heapdump.20161031.115907.30198.0011.phd\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 310K Oct 31 2016 javacore.20161031.115907.30198.0012.txt\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 36K Oct 31 2016 Snap.20161031.115907.30198.0013.trc\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 295K Dec 16 13:22 randomTrees_pmml.xml\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 49K Dec 16 13:22 StatXML.xml\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 436K Jan 9 11:14 world_bank.json.gz\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 15K Jan 9 11:15 SIGHTINGS.csv\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 7.2M Jan 20 10:37 OnlineRetail.csv.gz\r\ndrwxr-xr-x 2 sc3e-53554f95eddadf-4e28db014a7c users 4.0K Jan 25 16:28 spark-warehouse\r\ndrwxr-xr-x 3 sc3e-53554f95eddadf-4e28db014a7c users 4.0K Mar 6 11:47 sparklingdata\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 162K Mar 14 09:11 telco.csv\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 82K Mar 14 09:12 telco_Feb.csv\r\n-rw-r----- 1 sc3e-53554f95eddadf-4e28db014a7c users 19M Mar 28 17:55 all\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 3.6K Apr 18 11:10 README.md\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 75 Apr 18 11:11 Scores.txt\r\n-rw------- 1 sc3e-53554f95eddadf-4e28db014a7c users 1.5G Apr 22 00:09 core.20170422.000710.12338.0001.dmp\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 2.2M Apr 22 00:11 javacore.20170422.000710.12338.0002.txt\r\n-rw-r--r-- 1 sc3e-53554f95eddadf-4e28db014a7c users 108 Apr 22 00:11 jitdump.20170422.000710.12338.0004.dmp\r\n-rw-r----- 1 sc3e-53554f95eddadf-4e28db014a7c users 152K May 9 17:38 enriched.csv\r\n-rw-r----- 1 sc3e-53554f95eddadf-4e28db014a7c users 146K May 10 17:34 enrichedtest.csv\r\n-rw-r----- 1 sc3e-53554f95eddadf-4e28db014a7c users 146K May 10 17:40 enriched2.csv\r\ndrwxr-xr-x 11 sc3e-53554f95eddadf-4e28db014a7c users 4.0K May 15 16:19 ..\r\ndrwxr-xr-x 4 sc3e-53554f95eddadf-4e28db014a7c users 4.0K May 15 16:36 .\r\n-rw-r----- 1 sc3e-53554f95eddadf-4e28db014a7c users 19M May 15 16:36 signaldb.csv.gz\r\n", | |
"name": "stdout" | |
} | |
], | |
"execution_count": 6, | |
"source": "!ls -alrth" | |
}, | |
{ | |
"metadata": { | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"outputs": [], | |
"execution_count": 7, | |
"source": "!gunzip signaldb.csv.gz" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": "-rw-r----- 1 sc3e-53554f95eddadf-4e28db014a7c users 122M May 15 16:36 signaldb.csv\r\n", | |
"name": "stdout" | |
} | |
], | |
"execution_count": 9, | |
"source": "!ls -alh signaldb.csv" | |
}, | |
{ | |
"metadata": { | |
"collapsed": false | |
}, | |
"cell_type": "code", | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"execution_count": 11, | |
"data": { | |
"text/plain": "[Row(UNIQUEID=u'ldn1622_1500_168_3011_19_103758', TIME=u'2013-03-02 06:03:29', ACTTYP=u'pointantswait', TGTID=u'2800031', CATALOG=u'ldn1622grid', RA2000HR=u'5.872', DEC2000DEG=u'1.087', POWER=u'401.0', SNR=None, FREQMHZ=u'1499.79070556', DRIFTHZS=u'-1.145', WIDHZ=u'2.778', POL=u'left', SIGTYP=u'CwP', PPERIODS=None, NPUL=u'0', INTTIMES=u'94.0', TSCPAZDEG=u'233.671', TSCPELDEG=u'36.754', BEAMNO=u'3', SIGCLASS=u'Cand', SIGREASON=u'PsPwrT', CANDREASON=u'SnMulBm', CONTAINER=u'setiCompAmp', OBJECTNAME=u'2013-03-02/act168/2013-03-02_06-03-45_UTC.act168.dx3011.id-19.R.archive-compamp'),\n Row(UNIQUEID=u'ldn1622_1500_168_3002_4_103698', TIME=u'2013-03-02 06:03:29', ACTTYP=u'pointantswait', TGTID=u'2800031', CATALOG=u'ldn1622grid', RA2000HR=u'5.872', DEC2000DEG=u'1.087', POWER=u'414.0', SNR=None, FREQMHZ=u'1491.86280556', DRIFTHZS=u'-0.121', WIDHZ=u'2.778', POL=u'right', SIGTYP=u'CwP', PPERIODS=None, NPUL=u'0', INTTIMES=u'94.0', TSCPAZDEG=u'233.671', TSCPELDEG=u'36.754', BEAMNO=u'3', SIGCLASS=u'Cand', SIGREASON=u'PsPwrT', CANDREASON=u'SnMulBm', CONTAINER=u'setiCompAmp', OBJECTNAME=u'2013-03-02/act168/2013-03-02_06-03-42_UTC.act168.dx3002.id-4.R.archive-compamp'),\n Row(UNIQUEID=u'ldn1622_1500_252_1021_3_129682', TIME=u'2013-03-03 05:21:45', ACTTYP=u'target1-on', TGTID=u'2800031', CATALOG=u'ldn1622grid', RA2000HR=u'5.872', DEC2000DEG=u'1.087', POWER=u'407.0', SNR=None, FREQMHZ=u'1477.35708611', DRIFTHZS=u'0.06', WIDHZ=u'2.778', POL=u'left', SIGTYP=u'CwP', PPERIODS=None, NPUL=u'0', INTTIMES=u'94.0', TSCPAZDEG=u'223.622', TSCPELDEG=u'42.113', BEAMNO=u'1', SIGCLASS=u'Cand', SIGREASON=u'PsPwrT', CANDREASON=u'SnMulBm', CONTAINER=u'setiCompAmp', OBJECTNAME=u'2013-03-03/act252/2013-03-03_05-21-54_UTC.act252.dx1021.id-3.L.archive-compamp'),\n Row(UNIQUEID=u'ldn1622_1500_252_1011_7_129839', TIME=u'2013-03-03 05:21:47', ACTTYP=u'target1-on', TGTID=u'2800031', CATALOG=u'ldn1622grid', RA2000HR=u'5.872', DEC2000DEG=u'1.087', POWER=u'402.0', SNR=None, FREQMHZ=u'1469.56154444', DRIFTHZS=u'1.175', WIDHZ=u'2.778', POL=u'right', SIGTYP=u'CwP', PPERIODS=None, NPUL=u'0', INTTIMES=u'94.0', TSCPAZDEG=u'223.622', TSCPELDEG=u'42.113', BEAMNO=u'1', SIGCLASS=u'Cand', SIGREASON=u'PsPwrT', CANDREASON=u'SnMulBm', CONTAINER=u'setiCompAmp', OBJECTNAME=u'2013-03-03/act252/2013-03-03_05-21-53_UTC.act252.dx1011.id-7.R.archive-compamp'),\n Row(UNIQUEID=u'ldn1622_1500_252_1015_5_129982', TIME=u'2013-03-03 05:21:49', ACTTYP=u'target1-on', TGTID=u'2800031', CATALOG=u'ldn1622grid', RA2000HR=u'5.872', DEC2000DEG=u'1.087', POWER=u'406.0', SNR=None, FREQMHZ=u'1473.07356667', DRIFTHZS=u'0.754', WIDHZ=u'2.778', POL=u'right', SIGTYP=u'CwP', PPERIODS=None, NPUL=u'0', INTTIMES=u'94.0', TSCPAZDEG=u'223.622', TSCPELDEG=u'42.113', BEAMNO=u'1', SIGCLASS=u'Cand', SIGREASON=u'PsPwrT', CANDREASON=u'SnMulBm', CONTAINER=u'setiCompAmp', OBJECTNAME=u'2013-03-03/act252/2013-03-03_05-21-53_UTC.act252.dx1015.id-5.L.archive-compamp')]" | |
}, | |
"output_type": "execute_result" | |
} | |
], | |
"execution_count": 11, | |
"source": "from pyspark.sql import SparkSession\n\nspark = SparkSession.builder.getOrCreate()\n\ndf_data_1 = spark.read\\\n .format('org.apache.spark.sql.execution.datasources.csv.CSVFileFormat')\\\n .option('header', 'true')\\\n .load('signaldb.csv')\ndf_data_1.take(5)\n" | |
}, | |
{ | |
"metadata": { | |
"collapsed": true | |
}, | |
"cell_type": "code", | |
"outputs": [], | |
"execution_count": null, | |
"source": "" | |
} | |
], | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment