Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save tmusabbir/7018842 to your computer and use it in GitHub Desktop.
Save tmusabbir/7018842 to your computer and use it in GitHub Desktop.
Chunk data import / Incremental Import in Sqoop
Chunk data import / Incremental Import in Sqoop
[ambari_qa@ip-10-0-0-91 ~]$ hadoop fs -lsr /user/ambari_qa/sqoop
-rw-rw-rw- 3 ambari_qa hdfs 39 2013-05-16 19:34 /user/ambari_qa/sqoop/import.properties
-rw-rw-rw- 3 ambari_qa hdfs 344 2013-05-16 19:34 /user/ambari_qa/sqoop/option.par
-rw------- 3 ambari_qa hdfs 1183 2013-05-10 21:45 /user/ambari_qa/sqoop/script.sh
-rw------- 3 ambari_qa hdfs 7181 2013-05-14 04:35 /user/ambari_qa/sqoop/workflow.xml
startIndex=21
chunkSize=20
nameNode=hdfs://ip-10-0-0-92:8020
jobTracker=ip-10-0-0-93:50300
queueName=default
script=script.sh
propertyFile=import.properties
optionFile=option.par
oozie.use.system.libpath=true
oozie.libpath=/user/oozie/share/lib
oozie.wf.application.path=${nameNode}/user/${user.name}/sqoop
--connect
jdbc:oracle:thin:@//10.0.0.24:1521/testdb
--username
test_user
--password
password1
--table
TEST_USER.TABLE1
--fields-terminated-by
\t
--class-name
com.test.TABLE1
--verbose
--split-by
ID
--where
ID >= 1 and ID <= 20
--target-dir
/user/ambari_qa/test/input
-m
8
#!/bin/sh
importFile=import.properties
optionFile=option.par
. $importFile
newStartIndex=`expr $chunkSize + $startIndex`
newStartIndexStr=startIndex=$newStartIndex
oldStartIndexStr=startIndex=$startIndex
chunkSizeStr=chunkSize=$chunkSize
oldWhereClause=`grep "ID >" $optionFile`
newWhereClause="ID >= "$startIndex" and ID <= "$newStartIndex
echo "Removing old properties and option file"
hadoop fs -rm /user/ambari_qa/sqoop/$importFile
hadoop fs -rm /user/ambari_qa/sqoop/$optionFile
sed -i "s|$oldStartIndexStr|$newStartIndexStr|g" $importFile
sed -i "s|$oldWhereClause|$newWhereClause|g" $optionFile
echo "Creating updated properties and option file"
hadoop fs -put $importFile /user/ambari_qa/sqoop
hadoop fs -put $optionFile /user/ambari_qa/sqoop
echo "Giving read/write permission to other users"
hadoop fs -chmod 777 /user/ambari_qa/sqoop/$importFile
hadoop fs -chmod 777 /user/ambari_qa/sqoop/$optionFile
<workflow-app name="temp-coordinator" xmlns="uri:oozie:workflow:0.2">
<start to="sqoop-import">
<action name="sqoop-import">
<sqoop xmlns="uri:oozie:sqoop-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<prepare>
<delete path="${nameNode}/user/${wf:user()}/test/input">
</delete>
</prepare>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<arg>import</arg>
<arg>--options-file</arg>
<arg>${optionFile}</arg>
<file>${optionFile}</file>
</sqoop>
<ok to="shell-updatebatch">
<error to="sqoop-import-fail">
</action>
<kill name="sqoop-import-fail">
<message>Sqoop failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<action name="shell-updatebatch">
<shell xmlns="uri:oozie:shell-action:0.1">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<exec>${script}</exec>
<file>${script}</file>
<file>${propertyFile}</file>
<file>${optionFile}</file>
</shell>
<ok to="end">
<error to="shell-fail">
</action>
<kill name="shell-fail">
<message>Script failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="end">
</workflow-app>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment