Created
April 3, 2012 17:29
-
-
Save danoyoung/2293909 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grunt> set io.sort.mb 150; | |
grunt> /* | |
grunt> set mapred.reduce.task 1; | |
grunt> gets all the people for a franchise. | |
grunt> rm avro/franchise_people; | |
grunt> */ | |
grunt> franchise_people = LOAD 'hdfs://127.0.0.1:9000/user/hadoop/indexer/avro/franchise_people' using org.apache.pig.piggybank.storage.avro.AvroStorage(); | |
grunt> | |
grunt> a = FILTER franchise_people BY (role_type == 'cast') OR (role_type == 'crew'); | |
grunt> b = GROUP a BY (franchise_id); | |
grunt> | |
grunt> /*per_franchise_id, need to get c stored in avro with the franchise_id as one field, and another | |
grunt> field which has the array of names.......*/ | |
grunt> c = FOREACH b {GENERATE group AS franchise_id, a.full_name AS cast_and_crew;}; | |
grunt> illustrate c; | |
2012-04-03 11:24:53,536 [main] INFO org.apache.pig.backend.hadoop.executionengine.HExecutionEngine - Connecting to hadoop file system at: hdfs://127.0.0.1:9000 | |
..... | |
..... | |
..... | |
----------------------------------------------------------------------------------------------------------------------- | |
| franchise_people | franchise_id:int | full_name:chararray | role_type:chararray | role:chararray | | |
----------------------------------------------------------------------------------------------------------------------- | |
| | 189820 | William Peacock | cast | Narrator | | |
| | 189820 | Pierre-Dominique Gaisseau | crew | Director | | |
| | 189820 | William Peacock | 0 | Narrator | | |
----------------------------------------------------------------------------------------------------------------------- | |
-------------------------------------------------------------------------------------------------------- | |
| a | franchise_id:int | full_name:chararray | role_type:chararray | role:chararray | | |
-------------------------------------------------------------------------------------------------------- | |
| | 189820 | William Peacock | cast | Narrator | | |
| | 189820 | Pierre-Dominique Gaisseau | crew | Director | | |
-------------------------------------------------------------------------------------------------------- | |
------------------------------------------------------------------------------------------------------------------------------------ | |
| b | group:int | a:bag{:tuple(franchise_id:int,full_name:chararray,role_type:chararray,role:chararray)} | | |
------------------------------------------------------------------------------------------------------------------------------------ | |
| | 189820 | {(189820, ..., Narrator), (189820, ..., Director)} | | |
------------------------------------------------------------------------------------------------------------------------------------ | |
------------------------------------------------------------------------------------------- | |
| c | franchise_id:int | cast_and_crew:bag{:tuple(full_name:chararray)} | | |
------------------------------------------------------------------------------------------- | |
| | 189820 | {(William Peacock), (Pierre-Dominique Gaisseau)} | | |
------------------------------------------------------------------------------------------- | |
Without Schema: | |
grunt> STORE c INTO 'hdfs://127.0.0.1:9000/user/hadoop/indexer/avro/franchise_cast_and_crew' using org.apache.pig.piggybank.storage.avro.AvroStorage(); | |
2012-04-03 11:25:08,756 [main] INFO org.apache.pig.tools.pigstats.ScriptState - Pig features used in the script: GROUP_BY,FILTER | |
2012-04-03 11:25:08,810 [main] ERROR org.apache.pig.tools.grunt.Grunt - ERROR 1002: Unable to store alias c | |
Details at logfile: /Users/dan.young/pig_1333473876375.log | |
Stack Trace: | |
Pig Stack Trace | |
--------------- | |
ERROR 1002: Unable to store alias c | |
org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1002: Unable to store alias c | |
at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1552) | |
at org.apache.pig.PigServer.registerQuery(PigServer.java:540) | |
at org.apache.pig.tools.grunt.GruntParser.processPig(GruntParser.java:945) | |
at org.apache.pig.tools.pigscript.parser.PigScriptParser.parse(PigScriptParser.java:392) | |
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:190) | |
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:166) | |
at org.apache.pig.tools.grunt.Grunt.run(Grunt.java:69) | |
at org.apache.pig.Main.run(Main.java:535) | |
at org.apache.pig.Main.main(Main.java:153) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) | |
at java.lang.reflect.Method.invoke(Method.java:597) | |
at org.apache.hadoop.util.RunJar.main(RunJar.java:156) | |
Caused by: java.lang.NullPointerException | |
at org.apache.pig.piggybank.storage.avro.AvroStorageUtils.isTupleWrapper(AvroStorageUtils.java:327) | |
at org.apache.pig.piggybank.storage.avro.PigSchema2Avro.convert(PigSchema2Avro.java:82) | |
at org.apache.pig.piggybank.storage.avro.PigSchema2Avro.convert(PigSchema2Avro.java:105) | |
at org.apache.pig.piggybank.storage.avro.PigSchema2Avro.convertRecord(PigSchema2Avro.java:151) | |
at org.apache.pig.piggybank.storage.avro.PigSchema2Avro.convert(PigSchema2Avro.java:62) | |
at org.apache.pig.piggybank.storage.avro.AvroStorage.checkSchema(AvroStorage.java:533) | |
at org.apache.pig.newplan.logical.rules.InputOutputFileValidator$InputOutputFileVisitor.visit(InputOutputFileValidator.java:65) | |
at org.apache.pig.newplan.logical.relational.LOStore.accept(LOStore.java:77) | |
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:64) | |
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) | |
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) | |
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) | |
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66) | |
at org.apache.pig.newplan.DepthFirstWalker.walk(DepthFirstWalker.java:53) | |
at org.apache.pig.newplan.PlanVisitor.visit(PlanVisitor.java:50) | |
at org.apache.pig.newplan.logical.rules.InputOutputFileValidator.validate(InputOutputFileValidator.java:45) | |
at org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.compile(HExecutionEngine.java:298) | |
at org.apache.pig.PigServer.compilePp(PigServer.java:1316) | |
at org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1253) | |
at org.apache.pig.PigServer.execute(PigServer.java:1245) | |
at org.apache.pig.PigServer.access$400(PigServer.java:127) | |
at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1547) | |
... 13 more | |
================================================================================ | |
With schema: | |
grunt> STORE c INTO 'hdfs://127.0.0.1:9000/user/hadoop/indexer/avro/franchise_cast_and_crew' using org.apache.pig.piggybank.storage.avro.AvroStorage('{"index":1,"schema":{"type":"record","name":"franchise_cast_and_crew","fields":[{"name":"franchise_id","type":"int"},{"name":"cast_and_crew","type":{"type":"array","items":"string"}}]}'); | |
2012-04-03 11:27:49,074 [main] ERROR org.apache.pig.tools.grunt.Grunt - ERROR 1200: could not instantiate 'org.apache.pig.piggybank.storage.avro.AvroStorage' with arguments '[{"index":1,"schema":{"type":"record","name":"franchise_cast_and_crew","fields":[{"name":"franchise_id","type":"int"},{"name":"cast_and_crew","type":{"type":"array","items":"string"}}]}]' | |
Details at logfile: /Users/dan.young/pig_1333473876375.log | |
grunt> | |
Stack Trace: | |
Pig Stack Trace | |
--------------- | |
ERROR 1200: could not instantiate 'org.apache.pig.piggybank.storage.avro.AvroStorage' with arguments '[{"index":1,"schema":{"type":"record","name":"franchise_cast_and_crew","fields":[{"name":"franchise_id","type":"int"},{"name":"cast_and_crew","type":{"type":"array","items":"string"}}]}]' | |
Failed to parse: could not instantiate 'org.apache.pig.piggybank.storage.avro.AvroStorage' with arguments '[{"index":1,"schema":{"type":"record","name":"franchise_cast_and_crew","fields":[{"name":"franchise_id","type":"int"},{"name":"cast_and_crew","type":{"type":"array","items":"string"}}]}]' | |
at org.apache.pig.parser.QueryParserDriver.parse(QueryParserDriver.java:184) | |
at org.apache.pig.PigServer$Graph.validateQuery(PigServer.java:1565) | |
at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1538) | |
at org.apache.pig.PigServer.registerQuery(PigServer.java:540) | |
at org.apache.pig.tools.grunt.GruntParser.processPig(GruntParser.java:945) | |
at org.apache.pig.tools.pigscript.parser.PigScriptParser.parse(PigScriptParser.java:392) | |
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:190) | |
at org.apache.pig.tools.grunt.GruntParser.parseStopOnError(GruntParser.java:166) | |
at org.apache.pig.tools.grunt.Grunt.run(Grunt.java:69) | |
at org.apache.pig.Main.run(Main.java:535) | |
at org.apache.pig.Main.main(Main.java:153) | |
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) | |
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) | |
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) | |
at java.lang.reflect.Method.invoke(Method.java:597) | |
at org.apache.hadoop.util.RunJar.main(RunJar.java:156) | |
Caused by: java.lang.RuntimeException: could not instantiate 'org.apache.pig.piggybank.storage.avro.AvroStorage' with arguments '[{"index":1,"schema":{"type":"record","name":"franchise_cast_and_crew","fields":[{"name":"franchise_id","type":"int"},{"name":"cast_and_crew","type":{"type":"array","items":"string"}}]}]' | |
at org.apache.pig.impl.PigContext.instantiateFuncFromSpec(PigContext.java:546) | |
at org.apache.pig.parser.LogicalPlanBuilder.validateFuncSpec(LogicalPlanBuilder.java:791) | |
at org.apache.pig.parser.LogicalPlanBuilder.buildFuncSpec(LogicalPlanBuilder.java:780) | |
at org.apache.pig.parser.LogicalPlanGenerator.func_clause(LogicalPlanGenerator.java:4670) | |
at org.apache.pig.parser.LogicalPlanGenerator.store_clause(LogicalPlanGenerator.java:6312) | |
at org.apache.pig.parser.LogicalPlanGenerator.op_clause(LogicalPlanGenerator.java:1337) | |
at org.apache.pig.parser.LogicalPlanGenerator.general_statement(LogicalPlanGenerator.java:791) | |
at org.apache.pig.parser.LogicalPlanGenerator.statement(LogicalPlanGenerator.java:509) | |
at org.apache.pig.parser.LogicalPlanGenerator.query(LogicalPlanGenerator.java:384) | |
at org.apache.pig.parser.QueryParserDriver.parse(QueryParserDriver.java:175) | |
... 15 more | |
Caused by: java.lang.reflect.InvocationTargetException | |
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) | |
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39) | |
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27) | |
at java.lang.reflect.Constructor.newInstance(Constructor.java:513) | |
at org.apache.pig.impl.PigContext.instantiateFuncFromSpec(PigContext.java:536) | |
... 24 more | |
Caused by: Unexpected token END OF FILE at position 184. | |
at org.json.simple.parser.JSONParser.parse(Unknown Source) | |
at org.json.simple.parser.JSONParser.parse(Unknown Source) | |
at org.json.simple.parser.JSONParser.parse(Unknown Source) | |
at org.apache.pig.piggybank.storage.avro.AvroStorage.parseJsonString(AvroStorage.java:335) | |
at org.apache.pig.piggybank.storage.avro.AvroStorage.<init>(AvroStorage.java:118) | |
... 29 more | |
================================================================================ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment