Skip to content

Instantly share code, notes, and snippets.

@Saraf-SPb
Saraf-SPb / generateSmallFiles.scala
Last active October 21, 2025 11:14
Generates many small Parquet files up to ~targetMb total
// Works in spark-shell (Scala 2.13, Spark 3.5.x) without extra deps.
import org.apache.spark.sql.{SaveMode, SparkSession}
import java.time.Instant
import java.nio.file.{Files, Paths}
import scala.jdk.CollectionConverters._
object GistGenerateSmallFiles {
def generateSmallFiles(spark: SparkSession, outPath: String, targetMb: Int): Unit = {
import spark.implicits._
@Saraf-SPb
Saraf-SPb / HIveQL.sql
Last active August 15, 2022 11:47
HiveQL.OTUS
-- Полеты с задержкой более 60 минут связанные с аэропортом города Denver
select
f.*
from
flights f
join airports a on
f.originairportid = a.airport_id
where
(1 = 1)
and city = 'Denver'
@Saraf-SPb
Saraf-SPb / PostgresDatasource.scala
Last active August 12, 2022 02:40
OTUS. Spark. Connectors.
package org.example.datasource.postgres
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.connector.catalog._
import org.apache.spark.sql.connector.expressions.Transform
import org.apache.spark.sql.connector.read._
import org.apache.spark.sql.connector.write._
import org.apache.spark.sql.types._
import org.apache.spark.sql.util.CaseInsensitiveStringMap
@Saraf-SPb
Saraf-SPb / OTUS. Spark. DataFrame
Last active May 25, 2022 15:41
Домашнее задание №3. RDD/DataFrame/DataSet
val driver = "org.postgresql.Driver"
val url = "jdbc:postgresql://localhost:5432/otus"
val user = "docker"
val password = "docker"
val path_yellow_taxi_jan_25_2018 = "src/main/resources/data/yellow_taxi_jan_25_2018"
val path_taxi_zones = "src/main/resources/data/taxi_zones.csv"
val path_rdd_export_txt = "out/rdd/result.txt"
val path_to_sql_dll_taxi_trip = "src/main/scala/homework2/sql/ddl.taxi_trip.sql"