Bjørn Jørgensen bjornjorgensen

## to_dataframe.py
import pandas as pd
from pymongo import MongoClient

client = MongoClient("mongodb://localhost:27017")
db = client.database_Name

collection_conn = db['collection_name']
collection_cursor = collection_conn.find()
collection_pandas_df = pd.DataFrame(list(collection_cursor))

## Pyspark Flatten json
from pyspark.sql.types import *
from pyspark.sql.functions import *

#Flatten array of structs and structs
def flatten(df):
   # compute Complex Fields (Lists and Structs) in Schema
   complex_fields = dict([(field.name, field.dataType)
                             for field in df.schema.fields
                             if type(field.dataType) == ArrayType or  type(field.dataType) == StructType])
   while len(complex_fields)!=0:

## mongodb_cheat_sheet.md

      
              1 file
            
          
              1290 forks
            
          
              226 comments
            
          
              2388 stars
            
          
                bradtraversy
                / mongodb_cheat_sheet.md
            
            
              Last active
              July 24, 2024 17:30
            
              
                MongoDB Cheat Sheet
              
          
    MongoDB Cheat Sheet

Show All Databases

show dbs

Show Current Database


## flatten_df.scala
def flattenDataFrame(df: DataFrame): DataFrame = {
  val fields = df.schema.fields
  val fieldNames = fields.map(x => x.name)

  for (i <- fields.indices) {
    val field = fields(i)
    val fieldType = field.dataType
    val fieldName = field.name
    fieldType match {
      case _: ArrayType =>
	import pandas as pd
	from pymongo import MongoClient

	client = MongoClient("mongodb://localhost:27017")
	db = client.database_Name

	collection_conn = db['collection_name']
	collection_cursor = collection_conn.find()
	collection_pandas_df = pd.DataFrame(list(collection_cursor))
	from pyspark.sql.types import *
	from pyspark.sql.functions import *

	#Flatten array of structs and structs
	def flatten(df):
	# compute Complex Fields (Lists and Structs) in Schema
	complex_fields = dict([(field.name, field.dataType)
	for field in df.schema.fields
	if type(field.dataType) == ArrayType or type(field.dataType) == StructType])
	while len(complex_fields)!=0:
	def flattenDataFrame(df: DataFrame): DataFrame = {
	val fields = df.schema.fields
	val fieldNames = fields.map(x => x.name)

	for (i <- fields.indices) {
	val field = fields(i)
	val fieldType = field.dataType
	val fieldName = field.name
	fieldType match {
	case _: ArrayType =>