jlcanela/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Avro Maven

Some simple examples of how to use the Avro Maven plugin to generate Avro sources given an Avro
schema, protocol or IDL file.
License

Apache version 2.0 (for more details look at LICENSE.
Usage

Download the sources:
bash $ git clone git://github.com/alexholmes/avro-maven.git 
Run Avro's code generation against the Avro files contained in src/main/avro (
weather.avsc,
weather.avpr and
weather.avdl ) using Maven:
$ cd avro-maven/
$ mvn clean compile
Examine the code-generated sources:
$ find . -type f -name *.java src/main/java/com/alexholmes/avro/Weather.java
./target/generated-sources/avro/com/alexholmes/avro/Weather.java
./target/generated-sources/avro/com/alexholmes/avro/weatherstation1/Station.java
./target/generated-sources/avro/com/alexholmes/avro/weatherstation1/WeatherStation.java
./target/generated-sources/avro/com/alexholmes/avro/weatherstation2/Simple.java
./target/generated-sources/avro/com/alexholmes/avro/weatherstation2/WeatherStation.java
The Magic

The complete Maven file can be viewed in pom.xml.
The key is in adding the following plugin to your pom.xml file:
<plugin>
  <groupId>org.apache.avro</groupId>
  <artifactId>avro-maven-plugin</artifactId>
  <version>${avro.version}</version>
  <executions>
    <execution>
      <phase>generate-sources</phase>
      <goals>
        <goal>schema</goal>
        <goal>protocol</goal>
        <goal>idl-protocol</goal>
      </goals>
    </execution>
  </executions>
</plugin>

You'll also need to include Avro as a dependency:
<properties>
    <avro.version>1.7.4</avro.version>
    ...
</properties>

...

<dependencies>
    <dependency>
        <groupId>org.apache.avro</groupId>
        <artifactId>avro</artifactId>
        <version>${avro.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.avro</groupId>
        <artifactId>avro-maven-plugin</artifactId>
        <version>${avro.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.avro</groupId>
        <artifactId>avro-compiler</artifactId>
        <version>${avro.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.avro</groupId>
      <artifactId>avro-ipc</artifactId>
      <version>${avro.version}</version>
    </dependency>
</dependencies>

Customizing the plugin

If you want to customize the location of the source or destination files, as well as other settings, take a look at
pom-schema-fulldefs.xml, as well as my blog post giving more details on the subject at
http://grepalex.com/2013/05/24/avro-maven/.
To see the customized pom.xml in action, use the following command:
$ mvn clean compile -f pom-schema-fulldefs.xml
The generated output files can be seen with the find command:
$ find . -type f -name *.java
./src/main/altjava/com/alexholmes/avro/Weather.java
./src/test/altjava/com/alexholmes/avro/Test.java

  
## pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.alexholmes.avro.maven</groupId>
  <artifactId>avro-maven</artifactId>
  <version>0.0.1</version>
  <packaging>jar</packaging>

  <repositories>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
  </repositories>

  <name>Avro Maven Example</name>
  <url>https://github.com/alexholmes/avro-maven</url>

  <properties>
    <jdkLevel>1.6</jdkLevel>
    <requiredMavenVersion>[2.1,)</requiredMavenVersion>
    <main.basedir>${project.basedir}</main.basedir>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <project.build.outputEncoding>UTF-8</project.build.outputEncoding>
    <maven.compiler>2.0.2</maven.compiler>
    <avro.version>1.7.5</avro.version>
    <hadoop.version>2.0.0-cdh4.1.2</hadoop.version>
    <hadoopmr1.version>2.0.0-mr1-cdh4.1.2</hadoopmr1.version>
  </properties>

  <description>
    A simple example of how Avro's Maven plugin can be used to compile Avro schema files into Java.
  </description>

  <developers>
    <developer>
      <id>aholmes</id>
      <name>Alex Holmes</name>
      <email>grep.alex@gmail.com</email>
      <url>http://grepalex.com</url>
    </developer>
  </developers>


  <build>
    <plugins>
        <plugin>
            <groupId>org.codehaus.mojo</groupId>
            <artifactId>exec-maven-plugin</artifactId>
            <version>1.1</version>
            <configuration>
                <mainClass>Sample</mainClass>
            </configuration>
        </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-compiler-plugin</artifactId>
        <version>2.3.2</version>
        <configuration>
          <source>${jdkLevel}</source>
          <target>${jdkLevel}</target>
          <showDeprecation>true</showDeprecation>
          <showWarnings>true</showWarnings>

        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.avro</groupId>
        <artifactId>avro-maven-plugin</artifactId>
        <version>${avro.version}</version>
        <executions>
          <execution>
            <phase>generate-sources</phase>
            <goals>
              <goal>schema</goal>
              <goal>protocol</goal>
              <goal>idl-protocol</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
    </plugins>
  </build>
  <dependencies>
    <dependency>
      <groupId>org.apache.avro</groupId>
      <artifactId>avro</artifactId>
      <version>${avro.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.avro</groupId>
      <artifactId>avro-maven-plugin</artifactId>
      <version>${avro.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.avro</groupId>
      <artifactId>avro-compiler</artifactId>
      <version>${avro.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.avro</groupId>
      <artifactId>avro-ipc</artifactId>
      <version>${avro.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>${hadoopmr1.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-hdfs</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-core</artifactId>
      <version>${hadoopmr1.version}</version>
    </dependency>
  </dependencies>
</project>


## sample.avsc
{
  "type":"record",
  "name":"MarketPlaceMailMember",
  "namespace":"com.viadeo.data.bean",
  "fields":[
    {"name":"memberId", "type":"int"},
    {"name":"firstname", "type":"string"},
    {"name":"lastname", "type":"string"},
    {"name":"templateId", "type":"string"},
    {"name":"mtrck", "type":"string"},
    {"name" : "jobs",
        "type" : {
          "type" : "array",
          "items" : {
            "type" : "record",
              "name" : "Job",
              "fields" : [
                {"name" : "jobId", "type" : { "type" : "int"}},
                {"name" : "jobName", "type" : [ "string", "null" ]}
              ]
          }
        }
     }
  ]
}


## Sample.java
import java.io.*;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;

import com.viadeo.data.bean.Job;
import com.viadeo.data.bean.MarketPlaceMailMember;
import org.apache.avro.Schema;
import org.apache.avro.file.Codec;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Progressable;

class Sample {
    public static void main(String[] args ) {
        System.out.println("Generating avro file");


        Sample s = new Sample();
        try {
            s.write("src/main/avro/sample.avsc", new FileOutputStream(new File("test.avro")));
            //s.writeComplexSchema("complex.avro", false);
        } catch(Exception ex) {
            ex.printStackTrace();
        }

    }

    public void write(String schemaFile, OutputStream out) throws IOException{

        InputStream schemaAsJsonStream =  new FileInputStream(new File(schemaFile)); // getClass().getResourceAsStream(schemaFile);
        Schema schema = new Schema.Parser().parse(schemaAsJsonStream);

        DataFileWriter writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>());
        CodecFactory codec = CodecFactory.nullCodec(); //.deflateCodec(Deflater.DEFAULT_COMPRESSION)
        writer.setCodec(codec);
        writer.create(schema, out);

        java.util.List<Job> jobs = new ArrayList<com.viadeo.data.bean.Job>();
        Job job = new Job();
        jobs.add(job);

        MarketPlaceMailMember m = new MarketPlaceMailMember(1, "firstname", "lastname", "templateId", "mtrck", jobs);
        writer.append(m);

        writer.flush();
        writer.close();


    }
    public void writeComplexSchema(String fileName, boolean isHdfs) throws IOException
    {
        Schema.Parser parser = new Schema.Parser();
        Schema schema = parser.parse(getClass().getResourceAsStream("sample.avsc"));

        Collection<String> coll = new ArrayList<String>();
        coll.add("javatute");
        coll.add("hadoop");

        GenericRecord datum = new GenericData.Record(schema);
        datum.put("id",    "1");
        datum.put("name",  "Ashish");
        datum.put("groups", coll);

        System.out.println("Writing: \n"+datum);

        File file= new File(fileName);

        DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
        DataFileWriter<GenericRecord> dataWriter = new DataFileWriter<GenericRecord>(writer);
        if(isHdfs)
        {
            String dest = fileName;
            Path path = new Path(dest);
            Configuration conf = new Configuration();
            FileSystem fs = FileSystem.get(URI.create(dest), conf);
            FSDataOutputStream out = fs.create(path, new Progressable()
            {
                public void progress()
                {
                    System.out.print(". ");
                    try{Thread.sleep(1000);}catch(InterruptedException iex){}
                }
            });
            dataWriter.create(schema, out);
        }
        else
        {
            dataWriter.create(schema, file);
        }
        dataWriter.append(datum);
        dataWriter.close();
    }
}
	<project xmlns="http://maven.apache.org/POM/4.0.0"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.alexholmes.avro.maven</groupId>
	<artifactId>avro-maven</artifactId>
	<version>0.0.1</version>
	<packaging>jar</packaging>

	<repositories>
	<repository>
	<id>cloudera</id>
	<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
	</repository>
	</repositories>

	<name>Avro Maven Example</name>
	<url>https://github.com/alexholmes/avro-maven</url>

	<properties>
	<jdkLevel>1.6</jdkLevel>
	<requiredMavenVersion>[2.1,)</requiredMavenVersion>
	<main.basedir>${project.basedir}</main.basedir>
	<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	<project.build.outputEncoding>UTF-8</project.build.outputEncoding>
	<maven.compiler>2.0.2</maven.compiler>
	<avro.version>1.7.5</avro.version>
	<hadoop.version>2.0.0-cdh4.1.2</hadoop.version>
	<hadoopmr1.version>2.0.0-mr1-cdh4.1.2</hadoopmr1.version>
	</properties>

	<description>
	A simple example of how Avro's Maven plugin can be used to compile Avro schema files into Java.
	</description>

	<developers>
	<developer>
	<id>aholmes</id>
	<name>Alex Holmes</name>
	<email>grep.alex@gmail.com</email>
	<url>http://grepalex.com</url>
	</developer>
	</developers>


	<build>
	<plugins>
	<plugin>
	<groupId>org.codehaus.mojo</groupId>
	<artifactId>exec-maven-plugin</artifactId>
	<version>1.1</version>
	<configuration>
	<mainClass>Sample</mainClass>
	</configuration>
	</plugin>
	<plugin>
	<groupId>org.apache.maven.plugins</groupId>
	<artifactId>maven-compiler-plugin</artifactId>
	<version>2.3.2</version>
	<configuration>
	<source>${jdkLevel}</source>
	<target>${jdkLevel}</target>
	<showDeprecation>true</showDeprecation>
	<showWarnings>true</showWarnings>

	</configuration>
	</plugin>
	<plugin>
	<groupId>org.apache.avro</groupId>
	<artifactId>avro-maven-plugin</artifactId>
	<version>${avro.version}</version>
	<executions>
	<execution>
	<phase>generate-sources</phase>
	<goals>
	<goal>schema</goal>
	<goal>protocol</goal>
	<goal>idl-protocol</goal>
	</goals>
	</execution>
	</executions>
	</plugin>
	</plugins>
	</build>
	<dependencies>
	<dependency>
	<groupId>org.apache.avro</groupId>
	<artifactId>avro</artifactId>
	<version>${avro.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.avro</groupId>
	<artifactId>avro-maven-plugin</artifactId>
	<version>${avro.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.avro</groupId>
	<artifactId>avro-compiler</artifactId>
	<version>${avro.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.avro</groupId>
	<artifactId>avro-ipc</artifactId>
	<version>${avro.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-client</artifactId>
	<version>${hadoopmr1.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-common</artifactId>
	<version>${hadoop.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-hdfs</artifactId>
	<version>${hadoop.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-core</artifactId>
	<version>${hadoopmr1.version}</version>
	</dependency>
	</dependencies>
	</project>
	{
	"type":"record",
	"name":"MarketPlaceMailMember",
	"namespace":"com.viadeo.data.bean",
	"fields":[
	{"name":"memberId", "type":"int"},
	{"name":"firstname", "type":"string"},
	{"name":"lastname", "type":"string"},
	{"name":"templateId", "type":"string"},
	{"name":"mtrck", "type":"string"},
	{"name" : "jobs",
	"type" : {
	"type" : "array",
	"items" : {
	"type" : "record",
	"name" : "Job",
	"fields" : [
	{"name" : "jobId", "type" : { "type" : "int"}},
	{"name" : "jobName", "type" : [ "string", "null" ]}
	]
	}
	}
	}
	]
	}
	import java.io.*;
	import java.net.URI;
	import java.util.ArrayList;
	import java.util.Collection;

	import com.viadeo.data.bean.Job;
	import com.viadeo.data.bean.MarketPlaceMailMember;
	import org.apache.avro.Schema;
	import org.apache.avro.file.Codec;
	import org.apache.avro.file.CodecFactory;
	import org.apache.avro.file.DataFileReader;
	import org.apache.avro.file.DataFileWriter;
	import org.apache.avro.generic.GenericData;
	import org.apache.avro.generic.GenericDatumReader;
	import org.apache.avro.generic.GenericDatumWriter;
	import org.apache.avro.generic.GenericRecord;
	import org.apache.avro.io.DatumReader;
	import org.apache.avro.io.DatumWriter;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FSDataOutputStream;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.util.Progressable;

	class Sample {
	public static void main(String[] args ) {
	System.out.println("Generating avro file");


	Sample s = new Sample();
	try {
	s.write("src/main/avro/sample.avsc", new FileOutputStream(new File("test.avro")));
	//s.writeComplexSchema("complex.avro", false);
	} catch(Exception ex) {
	ex.printStackTrace();
	}

	}

	public void write(String schemaFile, OutputStream out) throws IOException{

	InputStream schemaAsJsonStream = new FileInputStream(new File(schemaFile)); // getClass().getResourceAsStream(schemaFile);
	Schema schema = new Schema.Parser().parse(schemaAsJsonStream);

	DataFileWriter writer = new DataFileWriter<Object>(new GenericDatumWriter<Object>());
	CodecFactory codec = CodecFactory.nullCodec(); //.deflateCodec(Deflater.DEFAULT_COMPRESSION)
	writer.setCodec(codec);
	writer.create(schema, out);

	java.util.List<Job> jobs = new ArrayList<com.viadeo.data.bean.Job>();
	Job job = new Job();
	jobs.add(job);

	MarketPlaceMailMember m = new MarketPlaceMailMember(1, "firstname", "lastname", "templateId", "mtrck", jobs);
	writer.append(m);

	writer.flush();
	writer.close();


	}
	public void writeComplexSchema(String fileName, boolean isHdfs) throws IOException
	{
	Schema.Parser parser = new Schema.Parser();
	Schema schema = parser.parse(getClass().getResourceAsStream("sample.avsc"));

	Collection<String> coll = new ArrayList<String>();
	coll.add("javatute");
	coll.add("hadoop");

	GenericRecord datum = new GenericData.Record(schema);
	datum.put("id", "1");
	datum.put("name", "Ashish");
	datum.put("groups", coll);

	System.out.println("Writing: \n"+datum);

	File file= new File(fileName);

	DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
	DataFileWriter<GenericRecord> dataWriter = new DataFileWriter<GenericRecord>(writer);
	if(isHdfs)
	{
	String dest = fileName;
	Path path = new Path(dest);
	Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(URI.create(dest), conf);
	FSDataOutputStream out = fs.create(path, new Progressable()
	{
	public void progress()
	{
	System.out.print(". ");
	try{Thread.sleep(1000);}catch(InterruptedException iex){}
	}
	});
	dataWriter.create(schema, out);
	}
	else
	{
	dataWriter.create(schema, file);
	}
	dataWriter.append(datum);
	dataWriter.close();
	}
	}