Skip to content

Instantly share code, notes, and snippets.

@jimathyp
Last active August 24, 2022 20:38
Show Gist options
  • Save jimathyp/b1eab3eb30eedde929c12d1eda9dd51a to your computer and use it in GitHub Desktop.
Save jimathyp/b1eab3eb30eedde929c12d1eda9dd51a to your computer and use it in GitHub Desktop.

Spark functions

dir(df)

['__class__',
'__delattr__',
'__dict__',
'__dir__',
'__doc__',
'__eq__',
'__format__',
'__ge__',
'__getattr__',
'__getattribute__',
'__getitem__',
'__gt__',
'__hash__',
'__init__',
'__init_subclass__',
'__le__',
'__lt__',
'__module__',
'__ne__',
'__new__',
'__reduce__',
'__reduce_ex__',
'__repr__',
'__setattr__',
'__sizeof__',
'__str__',
'__subclasshook__',
'__weakref__',
'_collect_as_arrow',
'_jcols',
'_jdf',
'_jmap',
'_joinAsOf',
'_jseq',
'_lazy_rdd',
'_repr_html_',
'_sc',
'_schema',
'_sort_cols',
'_support_repr_html',
'_to_corrected_pandas_type',
'agg',
'alias',
'approxQuantile',
'cache',
'checkpoint',
'coalesce',
'colRegex',
'collect',
'columns',
'corr',
'count',
'cov',
'createGlobalTempView',
'createOrReplaceGlobalTempView',
'createOrReplaceTempView',
'createTempView',
'crossJoin',
'crosstab',
'cube',
'describe',
'display',
'distinct',
'drop',
'dropDuplicates',
'drop_duplicates',
'dropna',
'dtypes',
'exceptAll',
'explain',
'fillna',
'filter',
'first',
'foreach',
'foreachPartition',
'freqItems',
'groupBy',
'groupby',
'head',
'hint',
'inputFiles',
'intersect',
'intersectAll',
'isLocal',
'isStreaming',
'is_cached',
'join',
'limit',
'localCheckpoint',
'mapInPandas',
'na',
'orderBy',
'persist',
'printSchema',
'randomSplit',
'rdd',
'registerTempTable',
'repartition',
'repartitionByRange',
'replace',
'rollup',
'sameSemantics',
'sample',
'sampleBy',
'schema',
'select',
'selectExpr',
'semanticHash',
'show',
'sort',
'sortWithinPartitions',
'sql_ctx',
'stat',
'storageLevel',
'subtract',
'summary',
'tail',
'take',
'toDF',
'toJSON',
'toLocalIterator',
'toPandas',
'to_koalas',
'to_pandas_on_spark',
'transform',
'union',
'unionAll',
'unionByName',
'unpersist',
'where',
'withColumn',
'withColumnRenamed',
'withColumns',
'withMetadata',
'withWatermark',
'write',
'writeStream',
'writeTo']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment