Notebook comes with a spark session
print(dir()) # sc, spark, sql, sqlContext
print(type(spark)) # <class 'pyspark.sql.session.SparkSession'>
print(type(sc)) # <class 'dbruntime.spark_connection.RemoteContext'>
print(type(sql)) # <class 'method'> Help on method sql in module pyspark.sql.context
print(type(sqlContext)) # <pyspark.sql.context.SQLContext object at 0x7fb14a1373d0>
All funcs/attribs available in a python cell
dir()
Out[1]: ['In',
'Out',
'_',
'__',
'___',
'__builtin__',
'__builtins__',
'__name__',
'_dh',
'_i',
'_i1',
'_ih',
'_ii',
'_iii',
'_oh',
'dbutils',
'display',
'displayHTML',
'exit',
'getArgument',
'get_ipython',
'quit',
'sc',
'spark',
'sql',
'sqlContext',
'table',
'udf']
%py
for f in dir():
print(f"{f} {type(eval(f))}")
dbutils <class 'dbruntime.dbutils.DBUtils'>
display <class 'method'>
displayHTML <class 'method'>
exit <class 'IPython.core.autocall.ExitAutocall'>
getArgument <class 'method'>
get_ipython <class 'method'>
quit <class 'IPython.core.autocall.ExitAutocall'>
sc <class 'dbruntime.spark_connection.RemoteContext'>
spark <class 'pyspark.sql.session.SparkSession'>
sql <class 'method'>
sqlContext <class 'pyspark.sql.context.SQLContext'>
table <class 'method'>
udf <class 'function'>
dir(spark)
Out[10]: ['Builder',
'__class__',
'__delattr__',
'__dict__',
'__dir__',
'__doc__',
'__enter__',
'__eq__',
'__exit__',
'__format__',
'__ge__',
'__getattribute__',
'__gt__',
'__hash__',
'__init__',
'__init_subclass__',
'__le__',
'__lt__',
'__module__',
'__ne__',
'__new__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__setattr__',
'__sizeof__',
'__str__',
'__subclasshook__',
'__weakref__',
'_activeSession',
'_conf',
'_convert_from_pandas',
'_createFromLocal',
'_createFromLocalTrusted',
'_createFromRDD',
'_create_dataframe',
'_create_from_pandas_with_arrow',
'_create_rdd_from_local_trusted',
'_create_shell_session',
'_get_numpy_record_dtype',
'_get_pandas_num_slices',
'_inferSchema',
'_inferSchemaFromList',
'_instantiatedSession',
'_jsc',
'_jsparkSession',
'_jvm',
'_jwrapped',
'_repr_html_',
'_sc',
'_wrap_data_schema',
'_wrapped',
'_write_to_trusted_path',
'builder',
'catalog',
'conf',
'createDataFrame',
'getActiveSession',
'newSession',
'range',
'read',
'readStream',
'sparkContext',
'sql',
'stop',
'streams',
'table',
'udf',
'version']
dir(spark.conf)
Out[11]: ['__class__',
'__delattr__',
'__dict__',
'__dir__',
'__doc__',
'__eq__',
'__format__',
'__ge__',
'__getattribute__',
'__gt__',
'__hash__',
'__init__',
'__init_subclass__',
'__le__',
'__lt__',
'__module__',
'__ne__',
'__new__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__setattr__',
'__sizeof__',
'__str__',
'__subclasshook__',
'__weakref__',
'_checkType',
'_jconf',
'get',
'isModifiable',
'set',
'unset']
dir(spark.conf._jconf)
Out[14]: ['$anonfun$requireNonStaticConf$1',
'$anonfun$unset$1',
'contains',
'equals',
'get',
'getAll',
'getClass',
'getOption',
'hashCode',
'isModifiable',
'notify',
'notifyAll',
'set',
'sqlConf',
'sqlConf_$eq',
'toString',
'unset',
'wait']
help(spark.conf._jconf.getAll())
sc._conf.getAll()
spark.sparkContext.getConf().getAll()
for item in sorted(sc._conf.getAll()): print(item)
select * from parquet.`s3://....`