Skip to content

Instantly share code, notes, and snippets.

View milindjagre's full-sized avatar
💭
❤️ DATA ❤️

Milind Jagre milindjagre

💭
❤️ DATA ❤️
View GitHub Profile
@milindjagre
milindjagre / pom.xml
Created April 13, 2016 12:17
This pom.xml file is used for third party jar files involved in Writing PDF files using JAVA API
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.milind</groupId>
<artifactId>word-to-pdf</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
@milindjagre
milindjagre / ExcelDriver.java
Created April 16, 2016 02:19
This is Driver Class for reading Excel File using MapReduce
/* * To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor. */
package com.milind.mr.excel;
/**
* * @author milind
*/
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
# Assuming we are using hduser for doing Hadoop Operations
cd /home/hduser
# Loading .bashrc file in order to access HIVE Installation Directory Path
source .bashrc
# Removing already existing CSV SERDE JAR FILE
rm csv-serde-1.1.2-0.11.0-all.jar
# Downloading CSV SERDE JAR FILE during run time
wget https://drone.io/github.com/ogrodnek/csv-serde/files/target/csv-serde-1.1.2-0.11.0-all.jar
# Copying Downloaded CSV SERDE FILE to HIVE lib Directory
cp csv-serde-1.1.2-0.11.0-all.jar $HIVE_HOME/lib
# Assuming we are using hduser for doing Hadoop Operations
cd /home/hduser
# Loading .bashrc file in order to access HIVE Installation Directory Path
source .bashrc
# Removing already existing CSV SERDE JAR FILE
rm csv-serde-1.1.2-0.11.0-all.jar
# Downloading CSV SERDE JAR FILE during run time
wget https://drone.io/github.com/ogrodnek/csv-serde/files/target/csv-serde-1.1.2-0.11.0-all.jar
# Copying Downloaded CSV SERDE FILE to HIVE lib Directory
cp csv-serde-1.1.2-0.11.0-all.jar $HIVE_HOME/lib
STEP 1 : CREATING INPUT XML FILE WHICH WE WILL LOAD IN HIVE TABLE
nano student.xml
<student> <id>1</id> <name>Milind</name> <age>25</age> </student>
<student> <id>2</id> <name>Ramesh</name> <age>Testing</age> </student>
STEP 2 : LOG IN TO HIVE
hive
STEP 3 : CREATING HIVE TABLE
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.milind.mr.excel;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.milind.mr.excel;
/**
*
* @author milind
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.milind.mr.excel;
/**
*
* @author milind
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package com.milind.mr.excel;
/**
*
* @author milind
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.milind</groupId>
<artifactId>mr-excel</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>