- Create new EBS Volume in the correct Availability Zone
- Attach new EBS Volume to EC2 Instance
- Check filesystem type (ext4)
sudo file -s /dev/xvd*
- Find new disk
sudo fdisk -l
- Create filesystem
sudo mkfs -t ext4 /dev/xvdf
-- show running queries (pre 9.2) | |
SELECT procpid, age(clock_timestamp(), query_start), usename, current_query | |
FROM pg_stat_activity | |
WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%' | |
ORDER BY query_start desc; | |
-- show running queries (9.2) | |
SELECT pid, age(clock_timestamp(), query_start), usename, query | |
FROM pg_stat_activity | |
WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%' |
import java.io.InputStream | |
import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils | |
import org.apache.spark.sql.{ DataFrame, Row } | |
import org.postgresql.copy.CopyManager | |
import org.postgresql.core.BaseConnection | |
val jdbcUrl = s"jdbc:postgresql://..." // db credentials elided | |
val connectionProperties = { |
# Function to convert python object to Java objects | |
def _to_java_object_rdd(rdd): | |
""" Return a JavaRDD of Object by unpickling | |
It will convert each Python object into Java object by Pyrolite, whenever the | |
RDD is serialized in batch or not. | |
""" | |
rdd = rdd._reserialize(AutoBatchedSerializer(PickleSerializer())) | |
return rdd.ctx._jvm.org.apache.spark.mllib.api.python.SerDe.pythonToJava(rdd._jrdd, True) | |
# Convert DataFrame to an RDD |
import boto3 | |
import json | |
import traceback | |
def handler(event, context): | |
session = boto3.Session(profile_name="sandbox") | |
client = session.client('ec2') | |
response = client('ec2').describe_images( | |
# Owners=[event['ResourceProperties']['Owner']], | |
Filters=[ |