Skip to content

Instantly share code, notes, and snippets.

View anjijava16's full-sized avatar
💭
Awesome

Anjaiah Methuku anjijava16

💭
Awesome
View GitHub Profile
# Read data
dfCustomer = spark.read\
.format("cosmos.oltp")\
.option("spark.synapse.linkedService", "AdventureWorksSQL")\
.option("spark.cosmos.container", "Customer")\
.load()
display(dfCustomer.limit(10))
# Write data
# Spark Streaming with Cassandra (Azure Cosmos DB)
1. https://github.com/mspnp/azure-databricks-streaming-analytics
1.http://www.teradatahelp.com/2010/09/how-can-you-view-data-distribution-in.html
1. http://rajgopal-gurrapushala.blogspot.com/2012/11/types-of-hash-functions-used-in-teradata_734.html
Create Table emp
(
ID BIGINT NOT NULL GENERATED BY DEFAULT AS IDENTITY
(START WITH 1
INCREMENT BY 1
MINVALUE -999999999999999999
{
"_id" : "F0FF0BA8-CEF4-454C-9173-FA7C546F7179",
"customerId" : "A852CB99-DAA1-4348-A86F-622D4478A7D0",
"orderDate" : "2013-07-11T00:00:00",
"shipDate" : "2013-07-18T00:00:00",
"details" : [
{
"sku" : "LJ-0192-M",
"name" : "Long-Sleeve Logo Jersey, M",
"price" : 49.99,
package com.iwinner.runanysql;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.Reader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
{
insertId: "wr3zn3e373bw"
logName: "projects/iwinner-data-318822/logs/cloudaudit.googleapis.com%2Factivity"
operation: {
first: true
id: "operation-1627046240424-5c7ca39319163-07a56cfa-f16ec9a5"
producer: "compute.googleapis.com"
}
protoPayload: {
@type: "type.googleapis.com/google.cloud.audit.AuditLog"
https://morioh.com/p/4d9c2c2cb9ae?f=5c224490c513a556c9042463&fbclid=IwAR0EA40-1MS3YNSJRkdnGIjAe6of7e3kIRGVRXq1t14aT44eIx4N6ykJ4L8
https://medium.com/@somanathsankaran/window-functions-deep-dive-4aeffebf81e1
https://medium.com/analytics-vidhya/better-spark-logging-and-monitoring-with-spark-listener-c2c5fe86547a
https://medium.com/@somanathsankaran/window-functions-deep-dive-4aeffebf81e1
https://medium.com/analytics-vidhya/better-spark-logging-and-monitoring-with-spark-listener-c2c5fe86547a
Small files spark
https://github.com/capt-blackdron/pyspark_custom_repartitioner/blob/main/custom_repartitioner.py
Scd type 2
https://github.com/capt-blackdron/pyspark-examples/blob/main/SCD_Type_2_Implementation_in_PySpark.ipynb
Spark program
https://stackoverflow.com/questions/59617081/cast-a-column-to-binary-datatype-in-spark
https://stackoverflow.com/questions/53075020/why-does-spark-infer-a-binary-instead-of-an-arraybyte-when-creating-a-datafram
https://stackoverflow.com/questions/41149522/extract-byte-from-spark-binarytype
https://stackoverflow.com/questions/47902691/filtering-by-binary-type-in-spark-sql
https://stackoverflow.com/questions/60627680/spark-binarytype-to-scala-java
SELECT
COUNT(*) TOTAL_QUERIES,
SUM(total_slot_ms/TIMESTAMP_DIFF(end_time,creation_time,MILLISECOND)) AVG_SLOT_USAGE,
SUM(TIMESTAMP_DIFF(end_time,creation_time,SECOND)) TOTAL_DURATION_IN_SECONDS,
AVG(TIMESTAMP_DIFF(end_time,creation_time,SECOND)) AVG_DURATION_IN_SECONDS,
SUM(total_bytes_processed*10e-12) TOTAL_PROCESSED_TB,
EXTRACT (DATE FROM creation_time) AS EXECUTION_DATE,
user_email as USER
FROM `iwinner-data-318822.region-us.INFORMATION_SCHEMA.JOBS_BY_PROJECT`
WHERE state='DONE'