Created
February 27, 2020 01:12
-
-
Save FernandoBontorin/37296bc670a808a38a022f8a43264e1f to your computer and use it in GitHub Desktop.
Nulleable
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.sql.functions.{col, max} | |
import org.apache.spark.sql.{DataFrame, Row} | |
import scala.reflect.ClassTag | |
object SparkUtils { | |
def hasNull(df: DataFrame, cols: String*): Boolean = { | |
hasNull(asArrayOf[Boolean](isNull(df, cols: _*).collect)) | |
} | |
def hasNull(b: Array[Boolean]): Boolean = { | |
b.exists(p => p) | |
} | |
def nullColumnNames(df: DataFrame, cols: String*): Array[String] = { | |
val dfNulls = isNull(df, cols: _*) | |
(dfNulls.columns zip asArrayOf[Boolean](dfNulls.collect)).toMap | |
.filter(_._2 == true) | |
.keySet | |
.toArray | |
} | |
def asArrayOf[A: ClassTag](rows: Array[Row]): Array[A] = { | |
rows.head.toSeq.asInstanceOf[Seq[A]].toArray | |
} | |
def isNull(df: DataFrame, cols: String*): DataFrame = { | |
if (cols.isEmpty) { | |
df.select(df.columns.map(c => max(col(c).isNull).alias(c)): _*) | |
} else { | |
df.select(cols.map(c => max(col(c).isNull).alias(c)): _*) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment