Siaterlis Konstantinos siakon89

## recognize_image.py
from decimal import Decimal

import boto3
import os

DYNAMODB_TABLE = os.environ.get("DYNAMODB_TABLE", None)
dynamodb = boto3.resource("dynamodb")
dynamodb_table = dynamodb.Table(DYNAMODB_TABLE)


## template.yaml
AWSTemplateFormatVersion: '2010-09-09'
Transform: AWS::Serverless-2016-10-31
Description: >
  My Image App - Label images

# This is the global definitions
# In this example the definitions is only for the Lambdas
Globals:
  Function:
    Runtime: python3.6

## pyspark_rdd_read.py
# Filesystem schemes and URIs
|=====================================|
| Filesystem     | URI Structure      |
|----------------|--------------------|
| Local Fs       | file:///path       |
| HDFS           | hdfs://hdfs_path   |
| S3             | s3://bucket/object |
|=====================================|

# Loading a file into an RDD

## pyspark_df_groupby.py
# GroupBy a column and count
df.groupby("column").count().show()

# GroupBy a column and sum
df.groupby("column1").sum("column2").show()

# GroupBy with multiple columns
df.groupby("column1", "column2").count().show()

# GroupBy with multiple columns and sum multiple columns

## pyspark_df_filter.py
# Filter your DataFrame and select a column
df.filter(df.column1 > 20).select("column2").show()

# Filter your DataFrame with AND
df.filter((df.column1 > 20) & (df.column2 < 10)).select("column2").show()

# Filter your DataFrame with OR
df.filter((df.column1 > 20) | (df.column2 < 10)).select("column2").show()

## pyspark_df_select.py
# Select one column from your DataFrame
df.select("column").show()

# Select multiple columns from your DataFrame
df.select("column1", "column2").show()

# Select a column and add 1 to every entry
df.select(df.column + 1).show()

## pyspark_read_files.py
# Reading a CSV
df = spark.read.csv("filename.csv")

# Reading a CSV with header
df = spark.read.csv("filename.csv", header=True)

# Reading a CSV using the load method
df = spark.read.format("csv").load("filename.csv")

# Reading a CSV using the load method with header

## athena_query.py
import boto3

# SQL Query
query = "SELECT * FROM table LIMIT 10"

# Create the client
athena = boto3.client("athena")

# Submit a Query - Response will have a QueryId
response = athena.start_query_execution(

## aws_s3.sh
# List all bucket
aws s3 ls

# Use a different profile
aws s3 ls --profile <profile name>

# List objects in bucket
aws s3 ls s3://<my bucket>

# Create a bucket

## dataframe_filter.py
# Filter your DataFrame and select a column
df.filter(df.column1 > 20).select("column2").show()

# Filter your DataFrame with AND
df.filter((df.column1 > 20) & (df.column2 < 10)).select("column2").show()

# Filter your DataFrame with OR
df.filter((df.column1 > 20) | (df.column2 < 10)).select("column2").show()
	from decimal import Decimal

	import boto3
	import os

	DYNAMODB_TABLE = os.environ.get("DYNAMODB_TABLE", None)
	dynamodb = boto3.resource("dynamodb")
	dynamodb_table = dynamodb.Table(DYNAMODB_TABLE)
	AWSTemplateFormatVersion: '2010-09-09'
	Transform: AWS::Serverless-2016-10-31
	Description: >
	My Image App - Label images

	# This is the global definitions
	# In this example the definitions is only for the Lambdas
	Globals:
	Function:
	Runtime: python3.6
	# Filesystem schemes and URIs
	\|=====================================\|
	\| Filesystem \| URI Structure \|
	\|----------------\|--------------------\|
	\| Local Fs \| file:///path \|
	\| HDFS \| hdfs://hdfs_path \|
	\| S3 \| s3://bucket/object \|
	\|=====================================\|

	# Loading a file into an RDD
	# GroupBy a column and count
	df.groupby("column").count().show()

	# GroupBy a column and sum
	df.groupby("column1").sum("column2").show()

	# GroupBy with multiple columns
	df.groupby("column1", "column2").count().show()

	# GroupBy with multiple columns and sum multiple columns
	# Filter your DataFrame and select a column
	df.filter(df.column1 > 20).select("column2").show()

	# Filter your DataFrame with AND
	df.filter((df.column1 > 20) & (df.column2 < 10)).select("column2").show()

	# Filter your DataFrame with OR
	df.filter((df.column1 > 20) \| (df.column2 < 10)).select("column2").show()
	# Select one column from your DataFrame
	df.select("column").show()

	# Select multiple columns from your DataFrame
	df.select("column1", "column2").show()

	# Select a column and add 1 to every entry
	df.select(df.column + 1).show()
	# Reading a CSV
	df = spark.read.csv("filename.csv")

	# Reading a CSV with header
	df = spark.read.csv("filename.csv", header=True)

	# Reading a CSV using the load method
	df = spark.read.format("csv").load("filename.csv")

	# Reading a CSV using the load method with header
	import boto3

	# SQL Query
	query = "SELECT * FROM table LIMIT 10"

	# Create the client
	athena = boto3.client("athena")

	# Submit a Query - Response will have a QueryId
	response = athena.start_query_execution(
	# List all bucket
	aws s3 ls

	# Use a different profile
	aws s3 ls --profile <profile name>

	# List objects in bucket
	aws s3 ls s3://<my bucket>

	# Create a bucket