Skip to content

Instantly share code, notes, and snippets.

View mykidong's full-sized avatar

Kidong Lee mykidong

View GitHub Profile
Dataset<Row> hiveDf = spark.read().format("hive-with-jdbc")
.option("dbTable", "mc.crawl_youtube")
.option("conditionClause", "where year = '2020' and month = '02' and day = '19'")
.option("hiveJdbcUrl", "jdbc:hive2://mc-d01.mykidong.io:10000")
.option("hiveJdbcUser", "xxxx")
.option("hiveJdbcPassword", "xxxx")
.option("hiveMetastoreUrl", "jdbc:mysql://mc-d01.mykidong.io:3306/hive")
.option("hiveMetastoreUser", "xxxx")
.option("hiveMetastorePassword", "xxxx")
.option("defaultFs", "")
private void buildSchema()
{
String dbTable = parametersAsJava.get(JdbcHiveOptions.dbTable);
String hiveJdbcUrl = parametersAsJava.get(JdbcHiveOptions.hiveJdbcUrl);
String hiveJdbcUser = parametersAsJava.get(JdbcHiveOptions.hiveJdbcUser);
String hiveJdbcPassword = parametersAsJava.get(JdbcHiveOptions.hiveJdbcPassword);
String hiveMetastoreUrl = parametersAsJava.get(JdbcHiveOptions.hiveMetastoreUrl);
String hiveMetastoreUser = parametersAsJava.get(JdbcHiveOptions.hiveMetastoreUser);
String hiveMetastorePassword = parametersAsJava.get(JdbcHiveOptions.hiveMetastorePassword);
Dataset<Row> jdbcHiveDf = spark.read().format("jdbc-hive")
.option("dbTable", "mc.crawl_youtube")
.option("conditionClause", "where year = '2020' and month = '02' and day = '19'")
.option("hiveJdbcUrl", "jdbc:hive2://mc-d01.mykidong.io:10000")
.option("hiveJdbcUser", "xxxx")
.option("hiveJdbcPassword", "xxxx")
.option("hiveMetastoreUrl", "jdbc:mysql://mc-d01.mykidong.io:3306/hive")
.option("hiveMetastoreUser", "xxxx")
.option("hiveMetastorePassword", "xxxx")
.option("fetchsize", "10")
@mykidong
mykidong / hive-metastore.sql
Created March 4, 2020 00:36
hive-metastore.sql
SELECT
a.DB_TABLE AS DB_TABLE,
a.COLUMN_NAME AS COLUMN_NAME,
a.COLUMN_TYPE AS COLUMN_TYPE
FROM
(SELECT
CONCAT(DBS.NAME, '.', TBLS.TBL_NAME) AS DB_TABLE,
COLUMNS_V2.COLUMN_NAME AS COLUMN_NAME,
COLUMNS_V2.TYPE_NAME AS COLUMN_TYPE
FROM
@mykidong
mykidong / HiveRelation.class
Last active April 24, 2020 06:39
HiveRelation.class
public class HiveRelation extends BaseRelation implements Serializable, TableScan {
private SQLContext sqlContext;
private StructType schema;
private java.util.Map<String, String> parametersAsJava;
private Dataset<Row> df;
public HiveRelation(SQLContext sqlContext, Map<String, String> parameters)
{
@mykidong
mykidong / HiveRelationProvider.class
Created March 4, 2020 00:29
HiveRelationProvider.class
package mykidong.connector.hive;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.sources.BaseRelation;
import org.apache.spark.sql.sources.DataSourceRegister;
import org.apache.spark.sql.sources.RelationProvider;
import scala.collection.immutable.Map;
public class HiveRelationProvider implements RelationProvider, DataSourceRegister {
@mykidong
mykidong / JdbcHiveRelation.class
Last active April 24, 2020 06:33
JdbcHiveRelation.class
public class JdbcHiveRelation extends BaseRelation implements Serializable, TableScan {
private SQLContext sqlContext;
private StructType schema;
private java.util.Map<String, String> parametersAsJava;
private java.util.Map<String, HiveMetaResolver.HiveMetadata> hiveMetadataMap;
public JdbcHiveRelation(SQLContext sqlContext, Map<String, String> parameters)
{
@mykidong
mykidong / JdbcHiveRelationProvider.class
Created March 4, 2020 00:27
JdbcHiveRelationProvider.class
package mykidong.datasources.jdbc.hive;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.sources.BaseRelation;
import org.apache.spark.sql.sources.DataSourceRegister;
import org.apache.spark.sql.sources.RelationProvider;
import scala.collection.immutable.Map;
public class JdbcHiveRelationProvider implements RelationProvider, DataSourceRegister {
@mykidong
mykidong / HiveMetaResolver.class
Last active April 24, 2020 06:40
HiveMetaResolver.class
public class HiveMetaResolver {
private String dbTable;
private String hiveJdbcUrl;
private String hiveJdbcUser;
private String hiveJdbcPassword;
private String hiveMetastoreUrl;
private String hiveMetastoreUser;
private String hiveMetastorePassword;
String pathDir = "/META-INF/avro";
AvroSchemaLoader avroSchemaLoader = AvroSchemaLoader.singleton(pathDir);
String schemaKey = "io.shunters.coda.avro.api.ProduceRequest";
Schema schema = avroSchemaLoader.getSchema(schemaKey);
log.info("schema key: [" + schemaKey + "]\n" + schema.toString(true));