Tony Fraser tonythor

## conditionalJoin.scala
// first time used a tranformer... It's pretty cool so i'm adding this to my notes

def addColumnsAsZero(df: DataFrame, columns: Seq[String]): DataFrame = {
    columns.foldLeft(df)((tempDF, colName) => tempDF.withColumn(colName, lit(0)))
}

def conditionalJoin(baseDf:DataFrame, leftDf: DataFrame,
                    doJoin:Boolean, joinCondition: Column,
                    joinType:String = "inner", zeroColumns:Seq[String] ) = {
  if(doJoin) {

## file.py
## the bad chart
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

titanic = pd.read_csv('https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv')
titanic['Survival Status'] = titanic['Survived'].map({0: 'died', 1: 'survived'})
sns.set(style="dark", rc={'axes.facecolor':'#f2f2f2', 'grid.color': 'black'})

g = sns.catplot(x="Sex", hue="Survival Status", col="Pclass",

## load_kaggle_data.r
library(jsonlite)
library(readr)
library(tidyverse)

load_data_kaggle <- function(local_filename, kaggle_dataset) {
  if (!file.exists(local_filename)) {
    # Load Kaggle API credentials only if the file needs to be downloaded

    # To get credentials file, log into kaggle, click on settings, api, you'll see it.
    # save as nogit_kaggle.json

## lookslikethis.png

      
              2 files
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                tonythor
                / lookslikethis.png
            
            
              Last active
              November 3, 2023 01:01
            
              
                Quarto / LaTex Color Header
              
          
## SparkLoadDFIntegationTest.scala
// PS, see this page, and yeah that is my comment!
// https://stackoverflow.com/questions/27360977/how-to-read-files-from-resources-folder-in-scala/55084068#55084068

// put a csv file in [project]/src/it/resources/data/mytestdata.csv
def loadDf(_p: String): DataFrame = {
    val d = spark.sparkContext.parallelize(scala.io.Source.fromResource(s"data/${_p}").getLines().toSeq).toDS
    spark.read.option("header", "true").option("delimiter", ",").csv(d)
}
val myDf = loadDf(_p = "mytestdata.csv"))

## rgdal_sf_mapview.on.osx.MD

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              2 stars
            
          
                tonythor
                / rgdal_sf_mapview.on.osx.MD
            
            
              Last active
              October 14, 2025 11:37
            
              
                install rgdal on osx
              
          
    install rgdal on mac/osx

This problem is all over the place with a whole not of nobody having got it working. Sorry I just got out of that hell myself and it was brutal. If you find this, you're looking at somebody who actually got it working on a brand new mac, with that running in developer mode. SO yes, it's really possible.
errors for the algorighms


checking PROJ: checking whether PROJ and sqlite3 are available for linking:... no
configure: error: libproj or sqlite3 not found in standard or given locations.
ERROR: configuration failed for package ‘sf
configure: error: gdal-config not found or not executable.


## dyplyr_and_sqldf_cheatsheet.R
# This is all cut and paste from my chat-gpt window, so
# be sure to double chedk everyhting!

## SQLDF ##########################################
library(sqldf)
# Basic SQL Operations
sqldf("SELECT column1, column2 FROM df")              # SELECT
sqldf("SELECT * FROM df WHERE column1 = 'value'")    # WHERE
sqldf("SELECT * FROM df ORDER BY column1 ASC")       # ORDER BY
sqldf("SELECT column1, AVG(column2) FROM df GROUP BY column1 HAVING AVG(column2) > value") # GROUP BY & HAVING # nolint

## ComplexDfDebugger.scala
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.StringType

object ComplexDFToCsv {
  /**
   * A utility class that stringifys all columns within a dataframe,
   * and then inserts a couple of characters in every null/empty record
   * to make viewing in excel a little easier.
   * Example in: com.nbcuas.add.demandpreprocess.AccessS3

## getType.scala
//Python programmers, yes it sucks. there is no type() method
//in scala. There's this though, I use it all the time.

import scala.reflect.runtime.universe._
def getType[T: TypeTag](a: T): Type = typeOf[T]


## PrettyPrint.scala
package com.nbcuas.add.common.general

import java.lang.reflect.Field
import java.sql.Timestamp

/**
## another shameless reprint of this stack post:
## https://stackoverflow.com/questions/15718506/scala-how-to-print-case-classes-like-pretty-printed-tree
## thanks to stack users [@F. P Freely] and [@samthebest]
	// first time used a tranformer... It's pretty cool so i'm adding this to my notes

	def addColumnsAsZero(df: DataFrame, columns: Seq[String]): DataFrame = {
	columns.foldLeft(df)((tempDF, colName) => tempDF.withColumn(colName, lit(0)))
	}

	def conditionalJoin(baseDf:DataFrame, leftDf: DataFrame,
	doJoin:Boolean, joinCondition: Column,
	joinType:String = "inner", zeroColumns:Seq[String] ) = {
	if(doJoin) {
	## the bad chart
	import seaborn as sns
	import pandas as pd
	import matplotlib.pyplot as plt

	titanic = pd.read_csv('https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv')
	titanic['Survival Status'] = titanic['Survived'].map({0: 'died', 1: 'survived'})
	sns.set(style="dark", rc={'axes.facecolor':'#f2f2f2', 'grid.color': 'black'})

	g = sns.catplot(x="Sex", hue="Survival Status", col="Pclass",
	library(jsonlite)
	library(readr)
	library(tidyverse)

	load_data_kaggle <- function(local_filename, kaggle_dataset) {
	if (!file.exists(local_filename)) {
	# Load Kaggle API credentials only if the file needs to be downloaded

	# To get credentials file, log into kaggle, click on settings, api, you'll see it.
	# save as nogit_kaggle.json
	// PS, see this page, and yeah that is my comment!
	// https://stackoverflow.com/questions/27360977/how-to-read-files-from-resources-folder-in-scala/55084068#55084068

	// put a csv file in [project]/src/it/resources/data/mytestdata.csv
	def loadDf(_p: String): DataFrame = {
	val d = spark.sparkContext.parallelize(scala.io.Source.fromResource(s"data/${_p}").getLines().toSeq).toDS
	spark.read.option("header", "true").option("delimiter", ",").csv(d)
	}
	val myDf = loadDf(_p = "mytestdata.csv"))
	# This is all cut and paste from my chat-gpt window, so
	# be sure to double chedk everyhting!

	## SQLDF ##########################################
	library(sqldf)
	# Basic SQL Operations
	sqldf("SELECT column1, column2 FROM df") # SELECT
	sqldf("SELECT * FROM df WHERE column1 = 'value'") # WHERE
	sqldf("SELECT * FROM df ORDER BY column1 ASC") # ORDER BY
	sqldf("SELECT column1, AVG(column2) FROM df GROUP BY column1 HAVING AVG(column2) > value") # GROUP BY & HAVING # nolint
	import org.apache.spark.sql.DataFrame
	import org.apache.spark.sql.functions.{col, udf}
	import org.apache.spark.sql.types.StringType

	object ComplexDFToCsv {
	/**
	* A utility class that stringifys all columns within a dataframe,
	* and then inserts a couple of characters in every null/empty record
	* to make viewing in excel a little easier.
	* Example in: com.nbcuas.add.demandpreprocess.AccessS3
	//Python programmers, yes it sucks. there is no type() method
	//in scala. There's this though, I use it all the time.

	import scala.reflect.runtime.universe._
	def getType[T: TypeTag](a: T): Type = typeOf[T]
	package com.nbcuas.add.common.general

	import java.lang.reflect.Field
	import java.sql.Timestamp

	/**
	## another shameless reprint of this stack post:
	## https://stackoverflow.com/questions/15718506/scala-how-to-print-case-classes-like-pretty-printed-tree
	## thanks to stack users [@F. P Freely] and [@samthebest]