Skip to content

Instantly share code, notes, and snippets.

@ccsevers
Last active December 11, 2015 06:19
Show Gist options
  • Save ccsevers/4558790 to your computer and use it in GitHub Desktop.
Save ccsevers/4558790 to your computer and use it in GitHub Desktop.
Scoobi Avro Example
name := "Scoobi Avro Example"
version := "1.0"
scalaVersion := "2.9.2"
scalacOptions ++= Seq("-Ydependent-method-types", "-deprecation")
resolvers += "Radlab Repository" at "http://scads.knowsql.org/nexus/content/groups/public/"
libraryDependencies ++= Seq("com.nicta" %% "scoobi" % "0.6.1-cdh3",
"edu.berkeley.cs" %% "avro-plugin" % "2.1.4-SNAPSHOT")
scalacOptions <++= update map { report =>
val pluginClasspath = report matching configurationFilter(Configurations.CompilerPlugin.name)
pluginClasspath.map("-Xplugin:" + _.getAbsolutePath).toSeq
}
addCompilerPlugin("edu.berkeley.cs" %% "avro-plugin" % "2.1.4-SNAPSHOT" % "plugin")
package com.ebay.scoobitest
import edu.berkeley.cs.avro.marker._
import edu.berkeley.cs.avro.runtime._
import com.nicta.scoobi.Scoobi._
case class LongRec(var f1: Long) extends AvroRecord
case class Cluster(var firstSearchTime: Long) extends AvroRecord
object ScoobiMain extends ScoobiApp {
val outfile = AvroOutFile[LongRec](new java.io.File("longs.avro"))
(1 to 1024).foreach(i => outfile.append(LongRec(i)))
outfile.close
implicit val clusterFmt: WireFormat[Cluster] = mkCaseWireFormat(Cluster, Cluster.unapply(_))
implicit val longRecFmt: WireFormat[LongRec] = mkCaseWireFormat(LongRec, LongRec.unapply(_))
def run() {
val path = "longs.avro"
val searches = fromAvroFile[LongRec](path)
val groups = searches.map(s => Cluster(s.f1))
persist(toAvroFile(groups, "ScoobiClusters", overwrite = true))
}
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.ebay.maven-plugin-test</groupId>
<artifactId>sample</artifactId>
<version>1.0-SNAPSHOT</version>
<description>Test for maven scalac plugin with dependencies</description>
<inceptionYear>2013</inceptionYear>
<properties>
<encoding>UTF-8</encoding>
</properties>
<pluginRepositories>
<pluginRepository>
<id>sonatype.snapshots</id>
<url>https://oss.sonatype.org/content/repositories/snapshots/</url>
</pluginRepository>
</pluginRepositories>
<repositories>
<repository>
<id>SCADS</id>
<url>http://scads.knowsql.org/nexus/content/groups/public/</url>
</repository>
<repository>
<id>Maven-Central</id>
<url>http://repo1.maven.org/maven2/</url>
</repository>
<repository>
<id>Sonatype-Snapshots</id>
<url>https://oss.sonatype.org/content/repositories/snapshots/</url>
</repository>
<repository>
<id>maven-central</id>
<url>http://repo1.maven.org/maven2/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>com.nicta</groupId>
<artifactId>scoobi_2.9.2</artifactId>
<version>0.6.1-cdh3</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.0.3</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>edu.berkeley.cs</groupId>
<artifactId>avro-plugin_2.9.2</artifactId>
<version>2.1.4-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<testSourceDirectory>src/test/scala</testSourceDirectory>
<pluginManagement>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.1.2-SNAPSHOT</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<configuration>
<compilerPlugins>
<compilerPlugin>
<groupId>edu.berkeley.cs</groupId>
<artifactId>avro-plugin_2.9.2</artifactId>
<version>2.1.4-SNAPSHOT</version>
</compilerPlugin>
</compilerPlugins>
<!-- for now, have to explicitly specify compiler plugin dependencies below -->
<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<version>1.8.2</version>
</dependency>
</dependencies>
<launchers>
<launcher>
<id>myApp</id>
<mainClass>com.ebay.scoobitest.ScoobiMain</mainClass>
</launcher>
</launchers>
</configuration>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.6</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>prepare-package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<excludeScope>provided</excludeScope>
<outputDirectory>${project.build.directory}/classes/lib</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment