tonyfraser

## python_basics_review.py

  In [157]: colors
  Out[157]: ['0red', '1orange', '2pink', '3green', '4blue', '5indigo', '6violet']

  #slices [start:stop:step] non inclusive
  In [156]: colors[2:4]
  Out[156]: ['2pink', '3green']

  #start from the end
  In [165]: colors[-4::2]

## _userdata.sh
#!/bin/bash
## Launch amilinux2 under an ec2 role that has permissions of:
## 1. Full S3 Access
## 2. Amazon Managed CloudwatchFullAccess
## 3. Amazon Managed AmazonSSMFullAccess

## [be sure to combine into policy with after POC works]

## And make sure that role has an ec2 trust policy like:
# {

## return_different_process.py
from airflow import DAG
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator, BranchPythonOperator
# from airflow.operators.subdag import SubDagOperator
from airflow.utils.task_group import TaskGroup
from airflow.operators.dummy import DummyOperator

from random import uniform
from datetime import datetime

## 63568370.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                tonyfraser
                / 63568370.md
            
            
              Last active
              June 12, 2022 17:03
            
              
                For a stack overflow suggestion...
              
          
The end of the line is **here.** 
 
And here is another _line._

The end of the line is **here.** 
 And here is another _line._

The end of the line is **here.**
And here is another line.

  
## simple_conditional_pandas_column_using_lambda.py
# load random weblog data
columns = ['accept_language', 'domain', 'geo_city', 'geo_country','post_mobiledevice', 'post_mobileosversion']
s3.load(full_path='{bucket}/tfraser/{weblog}/{folder}/',
          file_type='csv',
          file_filter=".csv"
          )[columns].dropna(how='any').copy()

#  data looks like this.
#  accept_language     domain geo_city geo_country post_mobiledevice post_mobileosversion
#0           en-us     rr.com   austin         usa           iPad4,2           iOS 11.1.2

## seaborn_on_ipython.py
# this is for pip3 and pip3 ipython, you should ave these installed and be able to run.
# thunder:~ user$ pip3 install seaborn ipython matplotlib

################### Using mathplotlib  ################
# thunder:~ user$ ipython
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# %matplotlib inline # <- don't do this, your terminal can't render this. You need the popups.
titanic = pd.read_csv('https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv')

## simple_indexes.py
import pandas as pd
from numpy import randn

rows = ['a','b','c','d','e']
cols = ['w','x','y','z']
df = pd.DataFrame(randn(5,4), rows, cols)

#          w         x         y         z
# a  2.706850  0.628133  0.907969  0.503826
# b  0.651118 -0.319318 -0.848077  0.605965

## list_map_lambda_filter_easy.py
# List Comprehension / Map / Lambda Fucntions Explained SUPER EASY

# say you have a list of files and want to work with the extensions.
files = ['tony.txt', 'fraser.csv', 'ex.xls']

# it could be a function, you could loop through it.
def get_suffix(file:str):
    return file.split('.')[1]
# for file in files: print(get_suffix(file))

## ZeppelinService.scala
package com.gimmesome.zeppelin

import com.softwaremill.sttp._
import scala.util.parsing.json.JSON

// case class ZeppelinConfig (instance: String, baseUrl: String, authLoginUrl: String, authUid: String, authPass: String)

// Usage:
//  import something.ZeppelinService
//  val notebook = "2E6T7JZX1"

## UseDariaToMakeExcelSafeCSV.scala
import com.github.mrpowers.spark.daria.sql.transformations
import scala.annotation.tailrec
// import other stuff related to spark

val DefaultReplacements = Map(
    "'" -> "\\'",
    "\"" -> "\\'",
    "," -> "\\,")

// if you wanted to pass in a list of columns, say all columns in a DF, you could replace like so.

	In [157]: colors
	Out[157]: ['0red', '1orange', '2pink', '3green', '4blue', '5indigo', '6violet']

	#slices [start:stop:step] non inclusive
	In [156]: colors[2:4]
	Out[156]: ['2pink', '3green']

	#start from the end
	In [165]: colors[-4::2]
	#!/bin/bash
	## Launch amilinux2 under an ec2 role that has permissions of:
	## 1. Full S3 Access
	## 2. Amazon Managed CloudwatchFullAccess
	## 3. Amazon Managed AmazonSSMFullAccess

	## [be sure to combine into policy with after POC works]

	## And make sure that role has an ec2 trust policy like:
	# {
	from airflow import DAG
	from airflow.operators.bash import BashOperator
	from airflow.operators.python import PythonOperator, BranchPythonOperator
	# from airflow.operators.subdag import SubDagOperator
	from airflow.utils.task_group import TaskGroup
	from airflow.operators.dummy import DummyOperator

	from random import uniform
	from datetime import datetime
	# load random weblog data
	columns = ['accept_language', 'domain', 'geo_city', 'geo_country','post_mobiledevice', 'post_mobileosversion']
	s3.load(full_path='{bucket}/tfraser/{weblog}/{folder}/',
	file_type='csv',
	file_filter=".csv"
	)[columns].dropna(how='any').copy()

	# data looks like this.
	# accept_language domain geo_city geo_country post_mobiledevice post_mobileosversion
	#0 en-us rr.com austin usa iPad4,2 iOS 11.1.2
	# this is for pip3 and pip3 ipython, you should ave these installed and be able to run.
	# thunder:~ user$ pip3 install seaborn ipython matplotlib

	################### Using mathplotlib ################
	# thunder:~ user$ ipython
	import seaborn as sns
	import pandas as pd
	import matplotlib.pyplot as plt
	# %matplotlib inline # <- don't do this, your terminal can't render this. You need the popups.
	titanic = pd.read_csv('https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv')
	import pandas as pd
	from numpy import randn

	rows = ['a','b','c','d','e']
	cols = ['w','x','y','z']
	df = pd.DataFrame(randn(5,4), rows, cols)

	# w x y z
	# a 2.706850 0.628133 0.907969 0.503826
	# b 0.651118 -0.319318 -0.848077 0.605965
	# List Comprehension / Map / Lambda Fucntions Explained SUPER EASY

	# say you have a list of files and want to work with the extensions.
	files = ['tony.txt', 'fraser.csv', 'ex.xls']

	# it could be a function, you could loop through it.
	def get_suffix(file:str):
	return file.split('.')[1]
	# for file in files: print(get_suffix(file))
	package com.gimmesome.zeppelin

	import com.softwaremill.sttp._
	import scala.util.parsing.json.JSON

	// case class ZeppelinConfig (instance: String, baseUrl: String, authLoginUrl: String, authUid: String, authPass: String)

	// Usage:
	// import something.ZeppelinService
	// val notebook = "2E6T7JZX1"
	import com.github.mrpowers.spark.daria.sql.transformations
	import scala.annotation.tailrec
	// import other stuff related to spark

	val DefaultReplacements = Map(
	"'" -> "\\'",
	"\"" -> "\\'",
	"," -> "\\,")

	// if you wanted to pass in a list of columns, say all columns in a DF, you could replace like so.