Skip to content

Instantly share code, notes, and snippets.

@emerrf
Created August 10, 2020 14:11
Show Gist options
  • Save emerrf/301268f5f0162ea25f02d7ae1314bff3 to your computer and use it in GitHub Desktop.
Save emerrf/301268f5f0162ea25f02d7ae1314bff3 to your computer and use it in GitHub Desktop.
Pyspark dataframe sample
import pyspark.sql.functions as F
from pyspark.sql.types import (StructType, StructField, StringType, ShortType, BooleanType,
IntegerType, FloatType, DoubleType, DecimalType, LongType)
from decimal import Decimal
data_dict = [
{"id":1,"first_name":"Grace","last_name":"Balsellie","email":"gbalsellie0@nih.gov","gender":"Male","ip_address":"210.75.33.4","has_a_turtle":True,"age":31,"date_of_birth":"1982-06-13","last_seen_epoch":1579953816000,"height_m":1.85,"weight_kg":81.1,"longitude":Decimal(12.3750266),"latitude":Decimal(4.6740235),"creditcard_number":"5602241598652866450","description":"Supplement Descending Colon with Nonautologous Tissue Substitute, Percutaneous Endoscopic Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHOSURBVDjLpZOxa1RBEIe/d/e8FKeFEA2IGBWCICZYBSESBCFglUDSCJZaRBBbK1HQ0s4/QQlCgoKdoBA9sVBshCBETCNRiUUg5PDt7MxY7HuXdxgEycKwyzJ88/vN7Gbuzl5WDvDozeZtd66p21EzQw2iGaqGmhPVaqFodNTs/f0rI+M5gLnfmB0/MPg/le88+TLWU6BmgwDtpevgDhrBFETSORQgAQoBEbZvvUJEB2qAqg8ORw6BxRQeS0gBUkAMsPIdAIm60wNVKwEZrG+AW1JilpRotQNDQwCEOiCWgIXhe1w+f/if3hffrXMhxH4Fooa5kzdT0rNPi3TWlrl6bp7PP1d4ufqCiyNTzIzOUYiz1RWCJECjsuBA3swAmBmdoxu6APza3uDB9EM6a8sAFFEJYsRoOwBRww3yxt+Su6FLq9nqAQuxst11QDTcnX2lhc7XVO3jtw8cOzjMzafzTJ26RJUL0B7Ia020dNlsJAsTJyaZODlZziVj+swsWZb1AarJJUCMeCnn8esfaWruiIKoEtQIkry3mlUx+qfg7owd389prd6+9/7CbsvMrfaQ/O3dhdWzQa0tUZGoaDREjahxV8Dm1u/nANlev/MfAjw0JrMu09AAAAAASUVORK5CYII=", "json_struct": {"a": 1, "b": -1}},
{"id":2,"first_name":"Hildy","last_name":None,"email":"hwoodson1@google.de","gender":"Female","ip_address":"73.175.26.17","has_a_turtle":False,"age":None,"date_of_birth":"1989-01-21","last_seen_epoch":1588430854000,"height_m":1.84,"weight_kg":79.4,"longitude":Decimal(-98.882461),"latitude":Decimal(19.5056005),"creditcard_number":"5018264415096666589","description":"Dilation of Inferior Vena Cava with Intraluminal Device, Open Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHVSURBVDjLjZPLaiJBFIZNHmJWCeQdMuT1Mi/gYlARBRUkao+abHUhmhgU0QHtARVxJ0bxhvfGa07Of5Iu21yYFPyLrqrz1f+f6rIRkQ3icca6ZF39RxesU1VnAVyuVqvJdrvd73Y7+ky8Tk6n87cVYgVcoXixWNByuVSaTqc0Ho+p1+sJpNvtksvlUhCb3W7/cf/w+BSLxfapVIqSySRlMhnSdZ2GwyHN53OaTCbU7/cFYBgG4RCPx/MKub27+1ur1Xqj0YjW6zWxCyloNBqUSCSkYDab0WAw+BBJeqLFtQpvGoFqAlAEaZomuc0ocAQnnU7nALiJ3uh8whgnttttarVaVCgUpCAUCgnQhMAJ+gG3CsDZa7xh1mw2ZbFSqYgwgsGgbDQhcIWeAHSIoP1pcGeNarUqgFKpJMLw+/0q72azkYhmPAWIRmM6AGbXc7kc5fN5AXi9XgWACwAguLEAojrfsVGv1yV/sVikcrksAIfDIYUQHEAoPgLwT3GdzWYNdBfXh3xwApDP5zsqtkoBwuHwaSAQ+OV2u//F43GKRCLEc5ROpwVoOngvBXj7jU/wwZPPX72DT7RXgDfIT27QEgvfKea9c3m9FsA5IN94zqbw9M9fAEuW+zzj8uLvAAAAAElFTkSuQmCC", "json_struct": {"a": 11, "b": 10}},
{"id":3,"first_name":"Sayre","last_name":"Brolechan","email":"sbrolechan2@wikia.com","gender":"Male","ip_address":"236.38.84.92","has_a_turtle":True,"age":34,"date_of_birth":"1981-10-13","last_seen_epoch":1580120978000,"height_m":1.68,"weight_kg":72.9,"longitude":Decimal(105.6603149),"latitude":Decimal(21.0877573),"creditcard_number":"374283537666125","description":"Insertion of Radioactive Element into Bilateral Breast, Percutaneous Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAANhSURBVDjLfdPfT5V1AMfx93N+8GPnOYSHhMM5KCEcE5eA0ZAxGm2Uc3O12VV10cZFXhTe4HBu5eyiNVaRcxY012xjtS5QsezHpnPNMlQaRNAhwKNUIiLE4eA5z/P9Pt9znufpwtz0pvcf8Nrn5qO5rsv9jky+rVs51SWV2iWVaJLKQlpyREj1vVfteLfYfSn/w5ejJg+k3Qd6Jw63WVn12ZPFrVVeXwFZzaa0cD3X5qc4Hf+yP6p9MqmyuW6l7I5P91ZffAh4/7dDezb6a06XBCLcUiuMZ6bwuh5a9O18ceV4sjC3pymoPTvSVKOH4n/e5a/bmRcHD9QNAXjeGz8UkkoOlOobGcvM8mPqMqnsGjWFldxZvIlhiLcCbvv+zaUFobJ1BcTKdYRQAzsPDocAPKYluhqL2/SEmGfKnMHMSdZ7Q+jSz5mJk+NR77GrUmT31mwIML0gCRXlE6sI6tIUXf8Bxm6vP5/JzDQqZ1Hk0akr2sLI7M+ItNyXtXxHGzYFvSrrML+imF20CJcEkELuBvCZQm61tBweNBr0J3gsP8rfNxKMJkZPVAUHIkJYjdGSQi7PmpiGjXHXpjbqx5KiCsBnCFOUFTya1xp8ijsLN+kf601ahjp6ePX5vpm1Ud+5wOOJpVW5zYeGaTi4joOlfCghvfcWmGLulz+GG87+PrRsCavnwO2KwfZk4FXXjM88bY4+c3ZrN9KyMU0XYdiUrPNzaymDskTiHmCIz7+aONV/ZD727eY1b5driqnspg26NnsdJ51BCIEpbXLiAuGiiyiZYtkQlEUCa/AavqFEwwnXFD2uKY5lqyvzstWVOLaNb2wSDAPLtFhNfUc4PEJjbRMVoRg/xM/g6j+1NXdGPvC4GcPnpDOvWDsa81SsCjEzjWMInIyBbQgsaZJMfU3Dlnpsj019+XPYWpbmbS0Ab3j8H/ctO2mjQ7s6CvFp/OFyHGFiC4EtJEpIkulF/JrOC7X7ANjffpzq0jqAAg9A4anBk+7MjZ3eb87PceESTmIO25TY0sJSgn9SK058YZie8x0A9Jzr4PrSBIDUHnzjam29bluq21Vql6OyTW4uVxdOLkw2d0beqah65M2WulZi4e1cW/yV4YlLzM+t9T4E/F/NnZEe4HUgCKSBvisfLRz8F8J11bR5XdMKAAAAAElFTkSuQmCC", "json_struct": {"a": 13, "b": 1}},
{"id":4,"first_name":"Evonne","last_name":"Hindmore","email":"ehindmore3@sfgate.com","gender":"Female","ip_address":"120.154.10.57","has_a_turtle":True,"age":34,"date_of_birth":"1987-07-12","last_seen_epoch":1583235146000,"height_m":1.73,"weight_kg":66.7,"longitude":Decimal(111.945153),"latitude":Decimal(28.018107),"creditcard_number":"3552786992911586","description":"Extirpation of Matter from Left Upper Arm Muscle, Open Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAI9SURBVDjLpVNLSJRhFD2f/+QThVHKJ41Ti0pblCAKrtJQ7LGT0E21CUEisFUtioIeIEGLICkwahUtooW4aAQTcahNBWE1gWOY4yQIio8c5/vuo8X8jYuCBO/mfmdxD+ee+x2jqthJ5WCHFYg97U8wc7UIg5nA7EDkQGSz3TkLIhs5dWu84y8CZq7e09IJVYayQIUgwlAmKDsIE5QJH4aftP9TAZGDCCG9koQyQchB2GU6WQhZ5JVU4lHxAAZvvlEmBhMzOeqbvHfycYDIorzu9H935fExXO9pAIsisbjuPXj5/i6ADMG1kRnEkmtgKyDLYMugNGfxwaoikCNYUtSHgjgaDuL+83elABBwLo3e3ZPYyJn1JTuwL/0PLiwL4UKiESUFBrMLyzhQE4SzDlkCcRZsU/6gyw4K2YyR5OCsgyPBl8Q6Upa3CKzdBJNFbnF5xnHynRfyL8BQcji29hru9lWEk3HY0gq0ppsAnIM5c/yIqgpEBKoC9buoZrAqmosFnfWVqGvrQl64HqlPEUxNjGLl29dLOS9GP5qppPse3N+MqsOtiC2aVKihEyW1TZheyh0bjsZNI8/NHGrrQn58HOZZDwpnXmFfbRnUaH/Av9LZaDQ6ACAFYCgSiVz0330A4IkJ51eEgROXtz7QjUp4YmrNdsIUaQ/MtXSfryn6MYJ0agEbANZWPcTimN9WmApLy4c+v52gn5sFWPV2YXnJYHresAIPzXbjHO3ee+XXUrLXYxNiT+cVGOyI0J3frMiI4RHtXVwAAAAASUVORK5CYII=", "json_struct": {"a": 4, "b": -2}},
{"id":5,"first_name":"Gil","last_name":None,"email":"ggothup4@wiley.com","gender":"Male","ip_address":"55.255.10.128","has_a_turtle":True,"age":None,"date_of_birth":"1982-09-30","last_seen_epoch":1569012316000,"height_m":1.88,"weight_kg":55.1,"longitude":Decimal(122.1039229),"latitude":Decimal(40.784068),"creditcard_number":"3559054848061301","description":"Replacement of Genitalia Skin with Nonautologous Tissue Substitute, Full Thickness, External Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAGMSURBVDjLY/j//z8DJZiggtx9Sasyd8Yxk21Axo7YSymbow4QZUDJ8QyHoiNpB/IPJP/P3pPwP3177P+mQ5X/6/aV/o9cFrATrwHFxzIcCg+nnplzacr/TbdW/19/c8X/tTeW/l91bdH/5Vfn/y/ZkvPfb7rbHZwGFBxKnTn9fN//jTdX/W8+XPU/cX34/5iVQf8rtuf/L9mc/d9nqutuvC7I2Zv4AOjf/0D//o9fG3YIJh4wy+OS9xTnQ2699kyO7VacRAUi0L/wUPea5LTGtceW9FgA+ncNyekgfJEfZ9AcTyagfw+59ztcgolbVBsdMi7V/a+Xr/lfK0v1AV4XAP27O2tl0v/UJbH/rRtM/5tVGf6PmB74v/dE0//khdH/VVMUZ+I0AOjflxnLE/5PP9v7f8rprv8TT7X/7zvZ8r/nRON/kLhKssIZxXhZB7wusGu22Bk3N+x/1Mzg//qFWv+1s9X+q6cp/1dOUjigEIeqGWcgAv17AOjfS2RnJt08DWbNTNVVVMmNhDAANau2t3wToKQAAAAASUVORK5CYII=", "json_struct": {"a": 2, "b": -2}},
{"id":6,"first_name":"Corbie","last_name":"Pakeman","email":"cpakeman5@webeden.co.uk","gender":"Male","ip_address":"10.251.75.196","has_a_turtle":True,"age":31,"date_of_birth":"1982-05-23","last_seen_epoch":1593854754000,"height_m":1.83,"weight_kg":62.9,"longitude":Decimal(106.8502879),"latitude":Decimal(-6.4185424),"creditcard_number":"3542124178136997","description":"Repair Right Lacrimal Duct, Percutaneous Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAALVSURBVBgZBcE7jBRlAADg79+dvb2929u9F8cbIeHhK8TeggiNHnbQaDS2xsICDXbGzt7KWEi0sLHQGEUiYhTQYEKEEDlCcrzUg4PjgL2dmZ3Z3bnx+8L80VfOzMzNHKxWqwAgAAAIAiBgWKx7+ODhL9HM3MzBzz85IVSC4foAAEFQKgWQDmIEZVlRrpdKhfePv3cwqlarCkNh8JXk9CVTZ1Irx3cYn2i6fv0P15LP3F3pmJhoub/aUY1q1pLMW/OlWlRVAeh0VnWeGbEWx9JTC+L4njiOhfySvNfVS7v6vVivFxtmiRDWEUQB0G5voBwYe3eaj/9SPzJrbvMWjcENveEeE5MTRqoVUW1EnGZCyBFUgKDT6YjjridzhbWpod7XixTryvScQR5Lu4lBnsqzRD9PhVCCCKDdnqMcmGy3dN5+1tSnq4YHNqvP5nb0V9Wa243Vq2ojNd20jzUCFQLIslwSpzprHd3xwuO9ZF/eVBaZSv+krBfr54ksia33EwEEUQA0GpOarUS71QK1N3ZqHDsvKp6yZeaqrNGztXlFfWRFv3dbtfKmgCgAup1EHOdIXFxa9s+gYuPuwr4T1y0dTsxc+FD8288eLz8SmmPW5xMBkRAEtCY3WRvmLqep2/2a6alpy4eCQyeH7py9Z7Rz3/7XP1Df9ZzelZ8snD9j95OmCpRl6c9/b/l+ccF/j1ZlWSaOE3Geu/B0qXb2d3sOHDF641fhi9eM3fzGzh1Tdj+6KgpIh4mVYqA+0jTeaMvyvuZoU/vvO7bdTS0NMqObdjF/DED00WatLBYRhFAxoaUXUvWiYSw0bbz2wMhSz60X96tcOC29/IPx796R95al6K5VxfUx0XBYKIrC1ukXbJ0hgCDswUvMhiA+klg4961tUw1Rtaa7MnR7pWpxcp/w6tHDP26Ym325WosQBEAAADyf3LL97kUTWSIeHbPY3uvscNOp/wGooE3b/ShD8gAAAABJRU5ErkJggg==", "json_struct": {"a": 8, "b": -1}},
{"id":7,"first_name":"Normy","last_name":"Goldsack","email":"ngoldsack6@sbwire.com","gender":"Male","ip_address":"145.153.190.150","has_a_turtle":False,"age":32,"date_of_birth":"1989-05-18","last_seen_epoch":1571175296000,"height_m":1.72,"weight_kg":82.8,"longitude":Decimal(-3.8235525),"latitude":Decimal(37.7801069),"creditcard_number":"6384709890780926","description":"Drainage of Right Hepatic Duct, Open Approach, Diagnostic","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAITSURBVDjLdZI/aNNBFMc/vySSpLZFg5QguvgHcXHTImQu6OQq6CDoIDjoIpk6dLHgKA7iIOjgotjNLqJCKXQpooMOFSlWGpRGU/NLLnf33nNIQtKmPvjyOO7e57537yVmRj9uvblmF09fYLW+jJgiKj0pUSNnD1V4svCUxTvvkn5NjqGIIaLWPSwqRNVeHsj7OFyyC+AjZhBEhiBdqWp3rxN2ADLDi+AjecbIZwoEiXgJeAkEieSzRYo2TvDh/w6Cj3P19fezV/OfKYV1CuZQNZqa5Zce5u0nJfg491/As2MfNlrxY7M4dXucsVMYGRJtMxkajG9/4WD9QfPyebcxXJP0uxCXj0xjLHDiYdmyk2isQ/yLSYqJgyQBCTRXqzXxXCpfsZUdf2ASqhy9W7bcASz+AWlj4npK0c5PTFMKJ2+WwzbVkSdo9JXs/jNY3MbU9dTpyWHaQsNvMmNThJTKSBdUpWSZHKZt0E5PbiBxIE2wQPSURgGiYBHUD6z3b5eBI6xD9HvMgUapW2hgFnZZdwMIivgWoU19FBDckmytkCTZnfally2QyRRpb36n1WBpBCCO+ebqvZq6LcgWMIuY9JygJLkJokv4+vJ1rZ0yPzIHAJuPkhsSmJs4d728r3QcEsNiivgG6Y811p4v1topszMv7PGeAIBv95Np36AaHRWJlEIHXIu6S1kSYX7mVXeA+vEP7PyqQia3ZfwAAAAASUVORK5CYII=", "json_struct": {"a": 0, "b": 4}},
{"id":8,"first_name":"Reamonn","last_name":"Freda","email":"rfreda7@moonfruit.com","gender":"Male","ip_address":"178.22.190.191","has_a_turtle":True,"age":34,"date_of_birth":"1989-02-13","last_seen_epoch":1591673753000,"height_m":1.66,"weight_kg":62.1,"longitude":Decimal(33.4901963),"latitude":Decimal(46.8117208),"creditcard_number":"3557354120556693","description":"Magnetic Resonance Imaging (MRI) of Thorax Subcutaneous Tissue using Other Contrast","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAJ8SURBVDjLpVLfS1NhGPYP8D6im8DKwrYhUaa1tEyyIXlRWtFFJlZQERIGUZKGI5dO7ZeUEl1YaKUYkyU1J0hE/ppzuOnUDbdpbp7Nue2crZ2J9fSeQ4LhdtUHLx/fx/c87/M+z5cEIOl/6p/DsjGnzWfIhnf0CJjhw2AGD2HpWxY8Xw/CPXAAi/378aNvHxY+p7viEhD416q/FTFfC2JLL8AvPkd0/gl+OhoRsdXDN1gsgLm4CghcE5opw6qvFeHpfHDm4wgZsxEcykLEroZ/tFQAryUcwTsij8WYZ4i6boGz5IE1HkWQxojY6xAwlZN0OVyfZClxCbzD8jMBywXEvC0IT50AazqG4Kgc3ORNcNYqeAYUcGllioQmklnhiKsavLsR3EQuQmPZCAxmitK9388RWFqRMAUCZyyPFSLGvKSOCoTG8xAcycEKOR+eeSSAfzs1e3lHdxo/17WHt79P5W3tO/nZNymMSEAxMezsbepO8y+Q484Gce6IrQ5hqwqsWUmkVQgaKhEYvosFbT4IHJl+vV30I4kyDlLGPGXMU8Y8Oc3P98p4zvoQvl4ZlvWkyliNro4iVDQX40pjIc4rc9iTd6SVm/7Bejl7JAMrhnKwEzUEvo/2tlN40HkJWkszTG4dmvqu4WyTBBnXt6rjEjg+ponSPf1FmPsgxUVV7prG/BiaqacQllp/GU36qwJBNB543KMvhFtXAHvHLr/t7Y4tBffS0Wt5hY2rZ6JZINgETnZ0SzDXmQZyum79PvPGtmi9rhS1uhIRXPulJL4CimmSYmIInLzxnh4qT6t3o0FXJnYWduG8yQP7u9SMRB+GHquoWEH2310l3P8B4M3c7jDaDNsAAAAASUVORK5CYII=", "json_struct": {"a": 3, "b": 3}},
{"id":9,"first_name":"Aylmar","last_name":"Trusse","email":"atrusse8@hatena.ne.jp","gender":"Male","ip_address":"75.247.13.35","has_a_turtle":True,"age":33,"date_of_birth":"1989-03-11","last_seen_epoch":1596892970000,"height_m":1.85,"weight_kg":55.4,"longitude":Decimal(108.0150796),"latitude":Decimal(13.9718356),"creditcard_number":"379457053382920","description":"Removal of Synthetic Substitute from Chest Wall, Percutaneous Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAJcSURBVDjLpZPvS1NRGMfvv9Db6FUvIpDqLwj6AZER1EhBkvTFTALNlWJrkwKRoWKGjZltaUmuYmsIq7SEtrSt1qakzh/TtunSdq9j8253u3fL1vh27t2KNAqsAx+ecw7n+z0PzzkPBYD6HyjZZVNNpzXINz0OQP2IYPwI1YMFXOmfQ+O9WTT0eVFvmMQl/Qcobo+jVueG2uDkRZ1koLEEc9qRNWwXRddYTjIQb/4XqjtGIRmojX6k0+ltI2+15Q1UA4sQBAG8BA+e56W5w+H4jQmbDgsPy+C3ajDVdwzT3UUVlFiwZDKJshbvT1JkLQp+HQJtBWNXIuF9DqTCiHstcN04vkaJ1U5wHLgEgUsUIrfJQKCHwM43Y2P1KZjRDqRnBvEt5MTM/eoMJT4Vy8YRj8elKBFnSWTJHovoEhHPXkMu7UQ6WIt1z0X4BuRw6+XwaGV7qIbeacRiMUQLiPNYNCZl4LF1g51RIZdxQQhUIrNSjuS8GqH+06i73psvoqJnApFIZFMNxPXrwXYw48qCuAKZT2fBzTZhyXASjM+JkkZL3qBO5wHDMIS1QmTge2fE/MtGkvYbZJbl+LJyDgmSSUB/Al85WjpzpsGcN6jVvkc4TBPCoAnuoR7MDasx/aILn92lEJbLsT51Ff67p7C6OAmaFs/RkNWb8KMXTBduOqSfVdVmh7n9KJAMYcFQArt6J97eKcZw8yHUqPQk7SfkZhOIRsQkGWztrpbzRdkNlw4b7ltwaw7DXLUrNaLaf+CP3bh1o/Tgjkxr5d6sra04+6rzyNgz5b7df2vn7z39IYTVO0xbAAAAAElFTkSuQmCC", "json_struct": {"a": -5, "b": 2}},
{"id":10,"first_name":"Reina","last_name":"Gabrieli","email":"rgabrieli9@ftc.gov","gender":"Female","ip_address":"173.93.42.73","has_a_turtle":False,"age":27,"date_of_birth":"1983-07-26","last_seen_epoch":1567327525000,"height_m":1.8,"weight_kg":71.2,"longitude":Decimal(49.98361),"latitude":Decimal(40.42639),"creditcard_number":"3574577575605982","description":"Revision of External Fixation Device in Left Humeral Head, Percutaneous Endoscopic Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAQAAAC1+jfqAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAACjSURBVCjPY/jPgB8yEKmgPKH8ffn/0n4IL3F99P+QAjQTyveX/IexIwWCz2NYUbw/7z/CYK/9GApy92cgKXDEVJC+PxFJgQWmgoT9kUgK9DEVROwPRFKghqnAv9/7v2MAhK3iINePocBNwf69xXlDhf8Myg4y58UUsISkmYL+fI39ivul+0UMSA/q/wza/1X+y/0X/y/0n+c/+3/m/6SbgAsCAM8i/W7eee6fAAAAAElFTkSuQmCC", "json_struct": {"a": 7, "b": 1}},
{"id":11,"first_name":"Robinette","last_name":"Bolens","email":"rbolensa@loc.gov","gender":"Female","ip_address":"47.135.33.131","has_a_turtle":True,"age":33,"date_of_birth":"1985-06-11","last_seen_epoch":1590910719000,"height_m":1.69,"weight_kg":75.3,"longitude":Decimal(9.6591939),"latitude":Decimal(11.3460734),"creditcard_number":"3539436802315010","description":"Supplement Spinal Meninges with Nonautologous Tissue Substitute, Open Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAIhSURBVDjLjZPfS1NxGMbPjX9Bl/0PXqQzIYIEWWIpRFFBQwyNQqhG4VY0dUK0gXQh+KMhu2nKIAkpAu0wkBA0f0QRtoLuFgSbOmSbZ+estsf3+e6ssE7hxXNz4PM+7/d9nqMB0A6jr3Var2hJlBFZorKochhwUpQmkO65iC3/DWwP3sJO0Av59l/QI0qlmuux5buO7EMvcuM+5AInsRdqxo/5ART92j/hqMhIX7uMbOgudu+7YYRdsMaPozRZ1c/EIKwHmiM8KyptD9xEbsyHQvAYSjZozZyC+boDxbeXYKUmkF9vcHQu7QzdRn7KD/OxqwrGW1B8cx7GZheML1eVrO8R5N+5/nqzQWfC1miTgs1X7TA+eBT0bdOD5yudCCRaMPF+CEej2oEBKb6Za9ecTb0TRrIbewLPLnegd/4E2l824vSLBoQ3AjgypR2IqpJ9dAeF4cbfzgJnPnVhZLEVZ23wSsyHvkgcMf0jzvTP/RqQZlSF6D11ML6Za9OZcJuA555dQN+TOKb1JGb0z3i6kKwOsBtWZs6Miu7qYPbadCYcjCUUGJ5eQ09IJ2yKVjlgiQ1jSZgzo+K1eTC+mWvTmbB3dLEGumu344AM68mGqbdLznTntXkwvplr05nwn73hAIvdZj3V+lISDmBUyj1SdbfXdjsNKPPHYLdVPaVhLAlzZlS8tn0w06n2HFDhX8Ufg91mPdkwloQ589K2Vp0G7AOR2a7+EgKeFAAAAABJRU5ErkJggg==", "json_struct": {"a": 51, "b": 2}},
{"id":12,"first_name":"Greta","last_name":"Devall","email":"gdevallb@webnode.com","gender":"Female","ip_address":"51.114.12.144","has_a_turtle":True,"age":34,"date_of_birth":"1984-06-23","last_seen_epoch":1568940812000,"height_m":1.89,"weight_kg":77.1,"longitude":Decimal(116.1713917),"latitude":Decimal(-8.4114856),"creditcard_number":"3529663380095910","description":"Restriction of Hemiazygos Vein, Open Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHlSURBVDjLpVPLShxBFD3T3WOUUUGYByhkI+50o6Bmo3ErAVG3Qvb5giyThets8gmC+DYQzEaGBMUnBhQRJAEXQuZJ8Mlodz28t6rbGY2b4IXqU1XcOvfc01UxrTWeEx5/ptbOssQzILVylVKQChCMkucagrA6JKTQtK+uJyc6Gg2B0npwvLfJ+Z/KH2Z+J+4VEJs5nFh4B3BLUhCrBILAzv1bmvvAbWD2rt9nCSRqCCIfCFtTdFjYoUVIwgQ0BJEc5UxmIGoJuGkTMeBPiQ4qq4R8MEpkhJSSyZhMPyQA/4XP33L6qhLoKGbW8wZnNwoG5zftennLYum8ot9+2uWpVRBQBTISc5tFsBmu62BxqwjPBb7sFNFQ52DlZwnxOOFeAe3pevihB07UArvguTGMv0rDo92x/jQoHyO9aZM43J2y2GNbEEJVCVgBt80Hv+5RJdLFFR0nZpAjwqXtQmh3LQGxcT9xkvymJ/WwIiEridajfVZB4oVXJeBbx+FSxdX98oOK2YMyAoV/lER/zrP9COsB0Q11JfH9sIzXnUn8IBwkfBzHp+dUrOYtGA+ohc6Xjcj9raA904CT/BXaWupM8lOhwssXXiR98XH6V7NPRHzDeEjyJTCPRzxJcHZ5kzdX77nP+Q6ZHT+VaotBJwAAAABJRU5ErkJggg==", "json_struct": {"a": 4, "b": 5}},
{"id":13,"first_name":"Murdock","last_name":"Jancso","email":"mjancsoc@princeton.edu","gender":"Male","ip_address":"222.112.172.81","has_a_turtle":False,"age":26,"date_of_birth":"1983-04-06","last_seen_epoch":1571577130000,"height_m":1.79,"weight_kg":77.6,"longitude":Decimal(111.622398),"latitude":Decimal(-2.688962),"creditcard_number":"3580344294279777","description":"Dilation of Right Internal Carotid Artery, Bifurcation, with Drug-eluting Intraluminal Device, Percutaneous Endoscopic Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAADMSURBVDjLY/z//z8DJYCJgUKAYUBE+440IHYh1gAWLGIzgXgPFINBVFTU/1+/fjH8/v2bAUSD8N69exlBcozIYQCyHUgZAzGIdl1R6bGHVBeEAjW5Qr1QDnOFj4/Pf5jNMHzmzBlUFwA1hQIpkMZ7QKxErCtYoJqVoDaGATXcg/JBBnQAsYmdnR2GC27duoUZBuQAeBhERkZi2IKOYbEAop8/f05lF3h7e/8nZDsy/vz5M5VdYGtr+//nz59Y/QvDf/78QcbUcQHFuREAOJ3Rs6CmnfsAAAAASUVORK5CYII=", "json_struct": {"a": -6, "b": 10}},
{"id":14,"first_name":"Kori","last_name":"Priestland","email":"kpriestlandd@amazon.de","gender":"Female","ip_address":"91.206.236.187","has_a_turtle":True,"age":25,"date_of_birth":"1981-04-20","last_seen_epoch":1588836185000,"height_m":1.68,"weight_kg":66.3,"longitude":Decimal(-52.5852334),"latitude":Decimal(-26.7416528),"creditcard_number":"3547692823360522","description":"Insertion of Other Device into Right Knee Region, Open Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAHxSURBVDjLpZPLa1NREIe/PKhSNK2g9UEIKqEFRSgooi4sCJqCuHEpduEDwZU7/wGhu27cdKWoG7WQrsSgKAV3SnFTAqZCW22xlAbTpL0xyb0z4+Ke5mHrqgMHfouZ3/lm5pyImbGTiLLDiG+KsTfLTRQ1wwARwMBUUQPRUIvCoxupSIcBwJF9XRiGKRhgaqiBtZmowtxKdSuBmGHmChTMDDNH44xUDVVD/G1aEAkRMUPVFW5q7TTxRbcamGiYaKCqmOIK1FEp6fIzDu36SSwYBO53biFQUDFE1NG0tIqSLE1yOr3KicwdMse+8HW0f7iDQANFXI8tklAnN3KcS03TM3AZ70eORG8KS61lcw+O320aBAGIaLP/dpM+8vSeuopU8+zen6QmSxweGuwOvNJ4y0AEkbiburvdwlWWgz780gLRWJFI1KMrUYRalY3leqyNwBBxxW76Cb/AmdgE3QcEDXyiugbRvZjf4NurQi1fqNxrIzA3g/D2ZH2K/uAdA5eGsMZ7IrLOzNsa5lUxhYW5ys2Rl/PZNgKlsFh1g4OLBz9x8kqGRnGceLyHmY8+r5euU9xzHgNePD+bDffvHsy/Z3Fy+HN9ZdS8wjWbfnxh/cmt9Mh2efH//bJf88UPlfLU0epv78/s7OrD20+/T2yX9xfxXpKxy4ipWwAAAABJRU5ErkJggg==", "json_struct": {"a": 13, "b": 4}},
{"id":15,"first_name":"Mayne","last_name":"Loncaster","email":"mloncastere@purevolume.com","gender":"Male","ip_address":"155.62.145.3","has_a_turtle":True,"age":30,"date_of_birth":"1980-12-31","last_seen_epoch":1586969235000,"height_m":1.8,"weight_kg":68.7,"longitude":Decimal(116.4960979),"latitude":Decimal(-8.6433591),"creditcard_number":"4017953241273039","description":"Removal of Feeding Device from Lower Intestinal Tract, Percutaneous Endoscopic Approach","image_url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABGdBTUEAAK/INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAIHSURBVDjLfZKxa5NBGMZ/35dEamtFMkgGpaAVQcTRLp1LoYubi6MOgqiTZOrQxYKjOIh/gIuim5YqdChKl4KbQ0XESoPYaGq+fHff3fu+DklM2lRfeLg7uOd3z929iZnRL7l9y7hwkbi6isWIiaAhoL2xMj+PvVlj4u1K0veUGaphkw0ZNcbuOkasCMOWA4AQsRhR79EYu6AY/87LhwDSfYAiYN7D1BTiPeo94j3iHMn0NNrJsaL4T4IiLNH+sFhZ2KFy9Qcc8aga0knR3S/oWrO751+A8p332+bSNmfuHmP8PEZKqjlJaKF7HynGHrZ1wW0Pe5L+L8R3p2YwXjL9qGal42hsQvyNSYaJgyQBCbQ36w0puFK7Zhv73sAk1Dl9r2blE1j8BZJj4nrKUP8d04yxczdrYY/6yBU0FrOliUtY3MPU9eR7cph20PCTdPwkIWN25BdUpWppGdMc1PfkBhIH0gYLxILqKEAULIIWg+j902WQCPPE4pA+0ChNCy3MwoHobgBBkaJDyGmOAoJbl90NkqS0P770Rguk6VHyna90WqyPAMSx3N6831C3C6UxzCImvSQoSXmS6BI+PX/VyDOWR/oAYOdxckMCS5OXr9cq1bOQGBYzpGiRfdti6+nrRp6xOPfMnhwKAPj8IJkpWtSjY1Yi1eDBdWi6jHURludedBuoX38Av56vLTwJJBoAAAAASUVORK5CYII=", "json_struct": {"a": 11, "b": 1}}
]
data_schema = StructType([
StructField(name="id", dataType=ShortType(), nullable=False),
StructField(name="first_name", dataType=StringType(), nullable=False),
StructField(name="last_name", dataType=StringType(), nullable=True),
StructField(name="email", dataType=StringType(), nullable=False),
StructField(name="gender", dataType=StringType(), nullable=False),
StructField(name="ip_address", dataType=StringType(), nullable=False),
StructField(name="has_a_turtle", dataType=BooleanType(), nullable=False),
StructField(name="age", dataType=IntegerType(), nullable=True),
StructField(name="date_of_birth", dataType=StringType(), nullable=False),
StructField(name="last_seen_epoch", dataType=LongType(), nullable=False),
StructField(name="height_m", dataType=FloatType(), nullable=False),
StructField(name="weight_kg", dataType=DoubleType(), nullable=False),
StructField(name="longitude", dataType=DecimalType(10, 7), nullable=False),
StructField(name="latitude", dataType=DecimalType(10, 7), nullable=False),
StructField(name="creditcard_number", dataType=StringType(), nullable=False),
StructField(name="description", dataType=StringType(), nullable=False),
StructField(name="image_url", dataType=StringType(), nullable=False),
StructField(name="json_struct", dataType=StructType([
StructField("a", IntegerType()),
StructField("b", IntegerType())]), nullable=False)
])
df_sample = (
self.spark.createDataFrame(data=data_dict, schema=data_schema)
.withColumn("date_of_birth", F.to_date("date_of_birth"))
.withColumn("last_seen_epoch", F.from_unixtime(F.col("last_seen_epoch") / 1000))
.withColumn("image_url", F.unbase64(F.col("image_url")))
.withColumn("genderMap", F.create_map("gender", "age"))
.withColumn("descArray", F.split(F.col("description"), " "))
.withColumn("ageByte", F.col("age").cast("byte"))
)
df_sample.show()
# +---+----------+----------+--------------------+------+---------------+------------+----+-------------+-------------------+--------+---------+-----------+-----------+-------------------+--------------------+--------------------+--------+--------------+--------------------+-------+
# | id|first_name| last_name| email|gender| ip_address|has_a_turtle| age|date_of_birth| last_seen_epoch|height_m|weight_kg| longitude| latitude| creditcard_number| description| image_url|json_map| genderMap| descArray|ageByte|
# +---+----------+----------+--------------------+------+---------------+------------+----+-------------+-------------------+--------+---------+-----------+-----------+-------------------+--------------------+--------------------+--------+--------------+--------------------+-------+
# | 1| Grace| Balsellie| gbalsellie0@nih.gov| Male| 210.75.33.4| true| 31| 1982-06-13|2020-01-25 12:03:36| 1.85| 81.1| 12.3750266| 4.6740235|5602241598652866450|Supplement Descen...|[75 AB 5A 8A 66 A...| [1, -1]| [Male -> 31]|[Supplement, Desc...| 31|
# | 2| Hildy| null| hwoodson1@google.de|Female| 73.175.26.17| false|null| 1989-01-21|2020-05-02 14:47:34| 1.84| 79.4|-98.8824610| 19.5056005|5018264415096666589|Dilation of Infer...|[75 AB 5A 8A 66 A...|[11, 10]| [Female ->]|[Dilation, of, In...| null|
# | 3| Sayre| Brolechan|sbrolechan2@wikia...| Male| 236.38.84.92| true| 34| 1981-10-13|2020-01-27 10:29:38| 1.68| 72.9|105.6603149| 21.0877573| 374283537666125|Insertion of Radi...|[75 AB 5A 8A 66 A...| [13, 1]| [Male -> 34]|[Insertion, of, R...| 34|
# | 4| Evonne| Hindmore|ehindmore3@sfgate...|Female| 120.154.10.57| true| 34| 1987-07-12|2020-03-03 11:32:26| 1.73| 66.7|111.9451530| 28.0181070| 3552786992911586|Extirpation of Ma...|[75 AB 5A 8A 66 A...| [4, -2]|[Female -> 34]|[Extirpation, of,...| 34|
# | 5| Gil| null| ggothup4@wiley.com| Male| 55.255.10.128| true|null| 1982-09-30|2019-09-20 20:45:16| 1.88| 55.1|122.1039229| 40.7840680| 3559054848061301|Replacement of Ge...|[75 AB 5A 8A 66 A...| [2, -2]| [Male ->]|[Replacement, of,...| null|
# | 6| Corbie| Pakeman|cpakeman5@webeden...| Male| 10.251.75.196| true| 31| 1982-05-23|2020-07-04 09:25:54| 1.83| 62.9|106.8502879| -6.4185424| 3542124178136997|Repair Right Lacr...|[75 AB 5A 8A 66 A...| [8, -1]| [Male -> 31]|[Repair, Right, L...| 31|
# | 7| Normy| Goldsack|ngoldsack6@sbwire...| Male|145.153.190.150| false| 32| 1989-05-18|2019-10-15 21:34:56| 1.72| 82.8| -3.8235525| 37.7801069| 6384709890780926|Drainage of Right...|[75 AB 5A 8A 66 A...| [0, 4]| [Male -> 32]|[Drainage, of, Ri...| 32|
# | 8| Reamonn| Freda|rfreda7@moonfruit...| Male| 178.22.190.191| true| 34| 1989-02-13|2020-06-09 03:35:53| 1.66| 62.1| 33.4901963| 46.8117208| 3557354120556693|Magnetic Resonanc...|[75 AB 5A 8A 66 A...| [3, 3]| [Male -> 34]|[Magnetic, Resona...| 34|
# | 9| Aylmar| Trusse|atrusse8@hatena.n...| Male| 75.247.13.35| true| 33| 1989-03-11|2020-08-08 13:22:50| 1.85| 55.4|108.0150796| 13.9718356| 379457053382920|Removal of Synthe...|[75 AB 5A 8A 66 A...| [-5, 2]| [Male -> 33]|[Removal, of, Syn...| 33|
# | 10| Reina| Gabrieli| rgabrieli9@ftc.gov|Female| 173.93.42.73| false| 27| 1983-07-26|2019-09-01 08:45:25| 1.8| 71.2| 49.9836100| 40.4263900| 3574577575605982|Revision of Exter...|[75 AB 5A 8A 66 A...| [7, 1]|[Female -> 27]|[Revision, of, Ex...| 27|
# | 11| Robinette| Bolens| rbolensa@loc.gov|Female| 47.135.33.131| true| 33| 1985-06-11|2020-05-31 07:38:39| 1.69| 75.3| 9.6591939| 11.3460734| 3539436802315010|Supplement Spinal...|[75 AB 5A 8A 66 A...| [51, 2]|[Female -> 33]|[Supplement, Spin...| 33|
# | 12| Greta| Devall|gdevallb@webnode.com|Female| 51.114.12.144| true| 34| 1984-06-23|2019-09-20 00:53:32| 1.89| 77.1|116.1713917| -8.4114856| 3529663380095910|Restriction of He...|[75 AB 5A 8A 66 A...| [4, 5]|[Female -> 34]|[Restriction, of,...| 34|
# | 13| Murdock| Jancso|mjancsoc@princeto...| Male| 222.112.172.81| false| 26| 1983-04-06|2019-10-20 13:12:10| 1.79| 77.6|111.6223980| -2.6889620| 3580344294279777|Dilation of Right...|[75 AB 5A 8A 66 A...|[-6, 10]| [Male -> 26]|[Dilation, of, Ri...| 26|
# | 14| Kori|Priestland|kpriestlandd@amaz...|Female| 91.206.236.187| true| 25| 1981-04-20|2020-05-07 07:23:05| 1.68| 66.3|-52.5852334|-26.7416528| 3547692823360522|Insertion of Othe...|[75 AB 5A 8A 66 A...| [13, 4]|[Female -> 25]|[Insertion, of, O...| 25|
# | 15| Mayne| Loncaster|mloncastere@purev...| Male| 155.62.145.3| true| 30| 1980-12-31|2020-04-15 16:47:15| 1.8| 68.7|116.4960979| -8.6433591| 4017953241273039|Removal of Feedin...|[75 AB 5A 8A 66 A...| [11, 1]| [Male -> 30]|[Removal, of, Fee...| 30|
# +---+----------+----------+--------------------+------+---------------+------------+----+-------------+-------------------+--------+---------+-----------+-----------+-------------------+--------------------+--------------------+--------+--------------+--------------------+-------+
df_sample.printSchema()
# root
# |-- id: short (nullable = false)
# |-- first_name: string (nullable = false)
# |-- last_name: string (nullable = true)
# |-- email: string (nullable = false)
# |-- gender: string (nullable = false)
# |-- ip_address: string (nullable = false)
# |-- has_a_turtle: boolean (nullable = false)
# |-- age: integer (nullable = true)
# |-- date_of_birth: date (nullable = true)
# |-- last_seen_epoch: string (nullable = true)
# |-- height_m: float (nullable = false)
# |-- weight_kg: double (nullable = false)
# |-- longitude: decimal(10,7) (nullable = false)
# |-- latitude: decimal(10,7) (nullable = false)
# |-- creditcard_number: string (nullable = false)
# |-- description: string (nullable = false)
# |-- image_url: binary (nullable = false)
# |-- json_map: struct (nullable = false)
# | |-- a: integer (nullable = true)
# | |-- b: integer (nullable = true)
# |-- genderMap: map (nullable = false)
# | |-- key: string
# | |-- value: integer (valueContainsNull = true)
# |-- descArray: array (nullable = false)
# | |-- element: string (containsNull = true)
# |-- ageByte: byte (nullable = true)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment