Skip to content

Instantly share code, notes, and snippets.

@egonw
Forked from lindenb/Makefile
Last active August 29, 2015 14:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save egonw/102ef35d0df168fa85ca to your computer and use it in GitHub Desktop.
Save egonw/102ef35d0df168fa85ca to your computer and use it in GitHub Desktop.
all: sql2rdf.jar
curl -s "http://ebi.edu.au/ftp/databases/chembl/ChEMBLdb/releases/chembl_15/chembl_15_mysql.tar.gz" |\
tar xOvzf - chembl_15_mysql/chembl_15.mysqldump.sql |\
java -cp sql2rdf.jar SQLToRDF
sql2rdf.jar : SQL2RDF.jj
javacc SQL2RDF.jj ## tested with javacc 5.0
javac SQLToRDF.java
jar cvf $@ *.class
options {
STATIC=false;
IGNORE_CASE=true;
DEBUG_PARSER=false;
DEBUG_TOKEN_MANAGER=false;
}
PARSER_BEGIN(SQLToRDF)
/*
The MIT License (MIT)
Copyright (c) 2015 Pierre Lindenbaum
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Motivation: https://twitter.com/egonwillighagen/status/624853921673146368
*/
import java.util.*;
import java.io.*;
import java.math.*;
public class SQLToRDF
{
private class TypeDef
{
String name;
public String getName()
{
return name;
}
}
private class ClassDef extends TypeDef
{
List<ColumnDef> columns=new ArrayList<ColumnDef>();
}
Map<String,ClassDef> name2table = new HashMap<String,ClassDef>();
private class ColumnDef extends TypeDef
{
String type;
boolean nil;
}
private void insert(String tableName,String column, Object value)
{
//TODO
}
private void insert(String tableName, List<Object> L)
{
ClassDef table = name2table.get(tableName);
for(int i=0;i< L.size();++i)
{
ColumnDef col = table.columns.get(i);
Object value = L.get(i);
insert(tableName,col.name,value);
}
}
public static void main(String[] args)
{
try
{
SQLToRDF parser = new SQLToRDF(System.in);
parser.input();
System.err.println("Done.");
}
catch(Throwable err)
{
err.printStackTrace();
}
}
}
PARSER_END(SQLToRDF)
SKIP:{
" "
|"\n"
| <"--" (~["\n"])* "\n">
| <"/*" (~["\n"])* "*/">
}
TOKEN:
{
<DROP: "DROP">
| <CREATE: "CREATE">
| <USE: "USE">
| <TABLE: "TABLE">
| <TABLES: "TABLES">
| <TEMPORARY: "TEMPORARY">
| <DATABASE: "DATABASE">
| <IF: "IF">
| <CONSTRAINT:"CONSTRAINT">
| <REFERENCES:"REFERENCES">
| <DELETE:"DELETE">
| <UPDATE:"UPDATE">
| <CASCADE:"CASCADE">
| <FOREIGN:"FOREIGN">
| <LOCK:"LOCK">
| <UNLOCK:"UNLOCK">
| <WRITE:"WRITE">
| <INSERT:"INSERT">
| <INTO:"INTO">
| <VALUES:"VALUES">
| <ON:"ON">
| <NOT: "NOT">
| <NULL: "NULL">
| <EXISTS: "EXISTS">
| <SET: ("SET"|"set")>
| <ENUM: ("ENUM"|"enum")>
| <KEY: "KEY">
| <VARCHAR: "varchar">
| <CHAR: "char">
| <DEFAULTVALUE: ("default"|"DEFAULT")>
| <LONGBLOB:"longblob">
| <BLOB:"blob">
| <INTEGER:"int">
| <SMALLINT:"smallint">
| <DATETIME:"datetime">
| <DOUBLE:"double">
| <FLOAT:"float">
| <MEDIUMBLOG:"mediumblob">
| <TINYINT:"tinyint">
| <TEXT:("longtext"|"tinytext"|"text"|"mediumtext")>
| <UNSIGNED:"unsigned">
| <SIGNED:"signed">
| <UNIQUE:"UNIQUE">
| <PRIMARY:"PRIMARY">
| <CHARACTER:"character">
| <COLLATE:"collate">
}
TOKEN:
{
<#LETTER:(["a"-"z"]|["A"-"Z"]|"_")>
| <#DIGIT:["0"-"9"]>
| <#SIGN: ["-","+"]>
| <#EXPONENT: ("E"|"e") (<SIGN>)? (<DIGIT>)+ >
| <FLOATING: (<SIGN>)? (<DIGIT>)* "." (<DIGIT>)* (<EXPONENT>)?| (<DIGIT>)+ (<EXPONENT>) >
| <INT: (<SIGN>)?<DIGIT>(<DIGIT>)* >
| <IDENTIFIER1: <LETTER> (<DIGIT>|<LETTER>)* >
| <IDENTIFIER2: "`"(<IDENTIFIER1>) "`" >
| <VARIABLE: "@"("@")?<IDENTIFIER1> >
| <EQ:"=">
| <SEMICOLON:";">
| <LPAR:"(">
| <RPAR:")">
| <COMMA:",">
| <#ESCAPE_CHAR: "\\" ["n","t","b","r","f","\\","'","\""] >
| <SIMPLE_QUOTE_LITERAL: "\'"( (~["\'","\\","\n","\r"])| <ESCAPE_CHAR>)* "\'">
| <DOUBLE_QUOTE_LITERAL: "\""( (~["\'","\\","\n","\r"])| <ESCAPE_CHAR>)* "\"">
}
private void input():{ClassDef t;}
{
(
drop()
| set()
|create()
|use()
| insert()
| <LOCK> <TABLES> identifier() <WRITE> <SEMICOLON>
| <UNLOCK> <TABLES> <SEMICOLON>
| <SEMICOLON>
)* <EOF>
}
private void insert():{String tableName; List<Object> L;}
{
<INSERT> <INTO> tableName=identifier() <VALUES> L=insert_columns() {insert(tableName,L);} ( <COMMA> L=insert_columns() {insert(tableName,L);} )* <SEMICOLON>
}
private List<Object> insert_columns():{Object o;List<Object> cols=new ArrayList<Object>();}
{
<LPAR> o=insert_column2() {cols.add(o);} (<COMMA> insert_column2() {cols.add(o);} )*<RPAR>
{
return cols;
}
}
private Object insert_column2():{Object o=null;}
{
(o=quoted() | o=integer() | o=floating() | <NULL>) { return o;}
}
private void use():{}
{
<USE> identifier() <SEMICOLON>
}
private void create():{}
{
<CREATE> (createDatabase()|createTable()) <SEMICOLON>
}
private void drop():{}
{
<DROP> <TABLE> (<IF> <EXISTS>)? identifier() <SEMICOLON>
}
private void set():{}
{
<SET> (identifier()|<VARIABLE> ) <EQ> (identifier()|<VARIABLE> ) <SEMICOLON>
}
private void createDatabase():{String s;}
{
(<DATABASE> s=identifier())
{
}
}
private void createTable():{ClassDef clazz=new ClassDef(); String tableName; ColumnDef col=null;}
{
(<TEMPORARY>)? <TABLE>
(<IF> <NOT> <EXISTS> )?
(tableName=identifier() <LPAR>
(col=component() { if(col!=null) clazz.columns.add(col);}
(<COMMA> col=component() { if(col!=null) clazz.columns.add(col);} )* )
<RPAR> ( identifier() | <EQ> | <DEFAULTVALUE> )* )
{
name2table.put(tableName,clazz);
}
}
private ColumnDef component():{ColumnDef c=null;}
{
(
key()
| constraint()
| c=column() { return c;}
) { return c;}
}
private void constraint():{}
{
<CONSTRAINT> identifier() <FOREIGN> <KEY> <LPAR> identifier() <RPAR> <REFERENCES> identifier() <LPAR> identifier() <RPAR> (<ON> (<DELETE>|<UPDATE>) <CASCADE> )*
}
private void key():{}
{
(<UNIQUE>|<PRIMARY>)?
<KEY> (identifier())? <LPAR> identifier() (<LPAR> integer() <RPAR>)?
(<COMMA> identifier() (<LPAR> integer() <RPAR>)? )* <RPAR>
}
private ColumnDef column():{ColumnDef c=new ColumnDef();String s;}
{
(
s=identifier()
colType() ((<NOT> )? <NULL>)? (<DEFAULTVALUE> (<NULL>|quoted()|integer()))?
) {
c.name=s;
return c;
}
}
private void colType():{}
{
(
(<VARCHAR>|<CHAR>) (dimension())? (charset())?
| (<LONGBLOB>|<MEDIUMBLOG>|<BLOB>|<TEXT>)
| <INTEGER> (dimension())? (<SIGNED>|<UNSIGNED>)?
| (<SMALLINT>|<TINYINT>) (dimension())? (<SIGNED>|<UNSIGNED>)?
| <DATETIME>
| <ENUM> stringset() (charset())?
| <SET> stringset()
| <FLOAT>
| <DOUBLE>
)
}
private void stringset():{}
{
(<LPAR> quoted() (<COMMA> quoted() )* <RPAR>)
}
private void charset():{}
{
<CHARACTER> <SET> (identifier()|quoted()) (<COLLATE> (identifier()|quoted()) )?
}
private void dimension():{}
{
<LPAR> integer() <RPAR>
}
private String identifier():{Token t;String s;}
{
(
t=<IDENTIFIER1> {s=t.image;}
|t=<IDENTIFIER2> {s=t.image.substring(1,t.image.length()-1);}
)
{
return s;
}
}
private BigDecimal floating():{Token t;}
{
t=<FLOATING>{ return new BigDecimal(t.image);}
}
private BigInteger integer():{Token t;}
{
t=<INT> { return new BigInteger(t.image); }
}
private String quoted():{Token t;}
{
(
t=<SIMPLE_QUOTE_LITERAL> { return t.image;}
| t=<DOUBLE_QUOTE_LITERAL> { return t.image;}
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment