Skip to content

Instantly share code, notes, and snippets.

View taiwotman's full-sized avatar
🎯
Focusing

Taiwo O. Adetiloye taiwotman

🎯
Focusing
View GitHub Profile
Sqoop is a tool designed to transfer data between Hadoop and relational database servers. For example between MySQL, Orable
to HDFS and HBASE
Clients access HBase through a Java API, a REST interface, a Thrift gateway, or the HBase shell command-line interface.
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by taiwoadetiloye on 2016-04-01.
*/
object kmeans extends App{
@taiwotman
taiwotman / __init__.py
Created April 30, 2020 16:07
Flasklogin-Neo4j key files
"""Initialize app."""
from flask import Flask
from .flask_py2neo import Py2Neo
from flask_login import LoginManager
from . import routes
from . import auth
db = Py2Neo()
login_manager = LoginManager()
"""Database models"""
from . import db
from werkzeug.security import generate_password_hash, check_password_hash
from flask_login import UserMixin
from py2neo import Node
from py2neo.ogm import GraphObject, Property
from datetime import datetime
class User(UserMixin, GraphObject):
"""Model for user accounts."""
@taiwotman
taiwotman / __init__.py
Last active April 30, 2020 16:24
Flasklogin-Neo4j key files
"""Initialize app."""
from flask import Flask
from .flask_py2neo import Py2Neo
from flask_login import LoginManager
from . import routes
from . import auth
db = Py2Neo()
login_manager = LoginManager()
"""App entry point."""
from application import create_app
app = create_app()
if __name__ == "__main__":
app.run(host='0.0.0.0', debug=True)
@taiwotman
taiwotman / rds_sql1.sql
Last active December 17, 2020 22:35
AWS REDSHIFT SQL
create table seattle_emergence_calls(
incident_number varchar,
address varchar,
type varchar,
call_date varchar,
latitude varchar,
longitude varchar,
report_location varchar
);
from pyspark.sql import SparkSession
from pyspark.sql.types import DoubleType
from pyspark.sql.functions import col
jar_path = 'rds_jar_driver.jar'
spark = SparkSession \
.builder \
.appName("AWS REDSHIFT PYSPARK APP") \
.config("spark.jars", jar_path)\
.config('spark.driver.extraClassPath', jar_path) \
#Transformation
df = df.withColumn("latitude", col("latitude").cast(DoubleType()))\
.withColumn("longitude", col("longitude").cast(DoubleType()))
df.printSchema()
df.show(10)
#More Insight
@taiwotman
taiwotman / rds_sql2.sql
Last active December 18, 2020 01:26
AWS REDSHIFT SQL
COPY seattle_emergence_calls from 's3://seattle-realtime-emergence-fire-call/Seattle_Real_Time_Fire_911_Calls.csv' FORMAT CSV
IGNOREHEADER 1
CREDENTIALS 'aws_iam_role=your_iam_role'
REGION 'us-east-1';