Last active
August 9, 2021 12:49
-
-
Save jeongho/773dca7180939a33cf46 to your computer and use it in GitHub Desktop.
Create an empty avro file from avro schema - Pig doesn't like an empty directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#1. create a sample avro schema | |
cat > example.avsc << EOF | |
{"namespace": "example.avro", | |
"type": "record", | |
"name": "User", | |
"fields": [ | |
{"name": "name", "type": "string"}, | |
{"name": "favorite_number", "type": ["int", "null"]}, | |
{"name": "favorite_color", "type": ["string", "null"]} | |
] | |
} | |
EOF | |
#2. create an empty file | |
touch empty | |
#3. create an empty avro file with the avro schema and empty file | |
avro-tools fromjson --schema-file example.avsc empty > empty_example.avro | |
#4. check avro schema and empty data from the empty avro file | |
avro-tools getschema empty_example.avro | |
avro-tools tojson empty_example.avro | |
#5. convert to empty parquet | |
cp empty_example.avro /tmp/ | |
cat << EOF | pig -x local | |
empty_example = load 'file:///tmp/empty_example.avro' using AvroStorage(); | |
SET parquet.compression gzip; | |
store empty_example into 'file:///tmp/empty_example_parquet' using parquet.pig.ParquetStorer; | |
quit | |
EOF | |
#6. verify | |
$ ls -l /tmp/empty_example_parquet | |
-rw-r--r-- 1 hdfs hdfs 231 May 25 08:58 _common_metadata | |
-rw-r--r-- 1 hdfs hdfs 231 May 25 08:58 _metadata | |
-rw-r--r-- 1 hdfs hdfs 231 May 25 08:58 part-m-00000.gz.parquet | |
-rw-r--r-- 1 hdfs hdfs 0 May 25 08:58 _SUCCESS | |
$ parquet-tools schema file:///tmp/empty_example_parquet/part-m-00000.gz.parquet | |
message pig_schema { | |
optional binary name (UTF8); | |
optional int32 favorite_number; | |
optional binary favorite_color (UTF8); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment