Skip to content

Instantly share code, notes, and snippets.

@avcaliani
Last active March 22, 2022 00:56
Show Gist options
  • Save avcaliani/9d54023e0f7c6c1ef981eca4e74c8b7a to your computer and use it in GitHub Desktop.
Save avcaliani/9d54023e0f7c6c1ef981eca4e74c8b7a to your computer and use it in GitHub Desktop.
#apache-spark #pyspark #info
"""Information Script for Apache Spark.
How to use?
> spark-submit info.py
"""
import os
import platform
import sys
from contextlib import contextmanager
from pyspark.sql import SparkSession
@contextmanager
def spark_session() -> SparkSession:
spark = SparkSession.builder.appName("info").getOrCreate()
yield spark
spark.stop()
def main() -> None:
with spark_session() as spark:
ctx = spark.sparkContext
print("\n 🌎 Environment Info")
print(" ===================")
print(f" 🐍 Python => {platform.python_version()} ")
print(f" 🍺 CPUs => {os.cpu_count()}")
print(f" 💥 Spark => {spark.version}")
print(f" 🐘 Hadoop => {ctx._jvm.org.apache.hadoop.util.VersionInfo.getVersion()}")
print("\n 🔧 Spark Config")
print(" ===============")
configurations = ctx.getConf().getAll()
for conf in configurations:
print(f" {conf[0]} => {conf[1]}")
print("")
if __name__ == "__main__":
main()
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment