Skip to content

Instantly share code, notes, and snippets.

View bjornjorgensen's full-sized avatar

Bjørn Jørgensen bjornjorgensen

View GitHub Profile
@gabefair
gabefair / to_dataframe.py
Last active November 2, 2020 02:49
Mongodb collection to pandas dataframe
import pandas as pd
from pymongo import MongoClient
client = MongoClient("mongodb://localhost:27017")
db = client.database_Name
collection_conn = db['collection_name']
collection_cursor = collection_conn.find()
collection_pandas_df = pd.DataFrame(list(collection_cursor))
from pyspark.sql.types import *
from pyspark.sql.functions import *
#Flatten array of structs and structs
def flatten(df):
# compute Complex Fields (Lists and Structs) in Schema
complex_fields = dict([(field.name, field.dataType)
for field in df.schema.fields
if type(field.dataType) == ArrayType or type(field.dataType) == StructType])
while len(complex_fields)!=0:

MongoDB Cheat Sheet

Show All Databases

show dbs

Show Current Database

@fahadsiddiqui
fahadsiddiqui / flatten_df.scala
Last active May 22, 2023 19:42
Flatten a nested JSON Spark DataFrame using Scala, Spark 2.2.+ — a custom solution.
def flattenDataFrame(df: DataFrame): DataFrame = {
val fields = df.schema.fields
val fieldNames = fields.map(x => x.name)
for (i <- fields.indices) {
val field = fields(i)
val fieldType = field.dataType
val fieldName = field.name
fieldType match {
case _: ArrayType =>