Skip to content

Instantly share code, notes, and snippets.

View velotiotech's full-sized avatar

Velotio Technologies velotiotech

View GitHub Profile
{
"conf": {
"spark.sql.catalog.glue_catalog1": "org.apache.iceberg.spark.SparkCatalog",
"spark.sql.catalog.glue_catalog1.warehouse":
"s3://YOUR-BUCKET-NAME/iceberg/glue_catalog1/tables/",
"spark.sql.catalog.glue_catalog1.catalog-impl": "org.apache.iceberg.aws.glue.GlueCatalog",
"spark.sql.catalog.glue_catalog1.io-impl": "org.apache.iceberg.aws.s3.S3FileIO",
"spark.sql.catalog.glue_catalog1.lock-impl": "org.apache.iceberg.aws.glue.DynamoLockManager",
"spark.sql.catalog.glue_catalog1.lock.table": "myGlueLockTable",
"spark.sql.extensions": "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
spark.sql(""" CREATE TABLE IF NOT EXISTS demo.db.iceberg_data_2(id INT, first_name String,
last_name String, address String, pincocde INT, net_income INT, source_of_income String,
state String, email_id String, description String, population INT, population_1 String,
population_2 String, population_3 String, population_4 String, population_5 String, population_6 String,
population_7 String, date INT)
USING iceberg
TBLPROPERTIES ('format'='parquet', 'format-version' = '2')
PARTITIONED BY (`date`)
location 's3a://abhishek-test-01012023/iceberg_v2/db/iceberg_data_2'""")
AWS_ACCESS_KEY_ID = "XXXXXXXXXXX"
AWS_SECRET_ACCESS_KEY = "XXXXXXXXXXX+XXXXXXXXXXX"
spark_jars_packages = "com.amazonaws:aws-java-sdk:1.12.246,org.apache.hadoop:hadoop-aws:3.2.2,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0"
spark = pyspark.sql.SparkSession.builder \
.config("spark.jars.packages", spark_jars_packages) \
.config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
.config("spark.sql.catalog.demo", "org.apache.iceberg.spark.SparkCatalog") \
.config("spark.sql.catalog.demo.warehouse", "s3a://abhishek-test-01012023/iceberg-sample-data/") \
docker exec -it <container-id> bash
docker cp input-data <Container ID>:/home/iceberg/data  
version: "3"
services:
spark-iceberg:
image: tabulario/spark-iceberg
container_name: spark-iceberg
build: spark/
depends_on:
- rest
- minio
AWS_ACCESS_KEY_ID = "XXXXXXXXXXXXXX"
AWS_SECRET_ACCESS_KEY = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXpiwvahk7e"
spark_jars_packages = "com.amazonaws:aws-java-sdk:1.12.246,org.apache.hadoop:hadoop-aws:3.2.2,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0"
spark = pyspark.sql.SparkSession.builder \
.config("spark.jars.packages", spark_jars_packages) \
.config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
.config("spark.sql.catalog.demo", "org.apache.iceberg.spark.SparkCatalog") \
.config("spark.sql.catalog.demo.warehouse", "s3a://abhishek-test-01012023/iceberg-sample-data/") \
def outer_function():
x = 10
def inner_function():
nonlocal x
x += 5
print("Inner:", x)
inner_function()
print("Outer:", x)
#include <bits/stdc++.h>
int main() {
std::vector<std::unique_ptr<int>> numbers;
std::unique_ptr<int> ptr1(new int(20));
std::unique_ptr<int> ptr2(new int(10));
numbers.push_back(ptr1);
numbers.push_back(ptr2);
for (const auto& ptr : numbers)
class numbers {
private:
std::vector<std::unique_ptr<int>> numbers;
public:
void addNumber(int value)
{
std::unique_ptr<int> ptr(new int(value));
numbers.push_back(ptr);
}
void printNumbers()