Skip to content

Instantly share code, notes, and snippets.

@ernesto-jimenez
Created November 23, 2008 15:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ernesto-jimenez/28140 to your computer and use it in GitHub Desktop.
Save ernesto-jimenez/28140 to your computer and use it in GitHub Desktop.
import java_cup.runtime.*;
import java.io.*;
import java.util.Hashtable;
import java_cup.runtime.Symbol;
parser code {:
public static void main(String argv[]) throws Exception {
String fInName = argv[0];
int n = 0, nErrors = 0;
if (argv.length !=1) {
System.out.println ("Usage:");
System.out.println ("\tjava BibtexScan filename.bib");
return;
} else {
if (!fInName.toUpperCase().endsWith(".BIB")) {
System.out.println ("\nExtension .bib is mandatory in source files ");
System.out.println ("Please rename " + fInName + " file and try again");
return;
}
// Create lexical analyzer
FileInputStream input = new FileInputStream( fInName );
BibtexScan scanner = new BibtexScan(input);
new parser(scanner).parse();
n = scanner.n;
nErrors = scanner.nErrors;
}
if (n == 0)
System.out.println("Error: file " + fInName + " is empty.");
else{
System.out.println("\nScanning process completed. Number of BibTex records analysed: " + n);
if (nErrors>0)
System.out.println(nErrors + " errors found.");
else
System.out.println("No errors found.");
// rest of comments
}
}
:}
terminal OPEN, CLOSE, EQUALS, COMMA, AND;
terminal OAUTHOR, OTITLE, OPUBLISHER, OYEAR, OEDITOR, OISBN, OJOURNAL;
terminal OVOLUME, ONUMBER, OEDITION, OMONTH, OBOOKTITLE, ONOTE;
terminal BOOKTYPE, ARTICLETYPE, PROCEEDINGSTYPE, INPROCEEDINGSTYPE;
terminal Integer YEAR;
terminal Integer NUMBER;
terminal Integer ISBN;
terminal String WORD;
terminal String KEY;
non terminal string, names, author, title, publisher, year, editor, note;
non terminal isbn, journal, volume, number, edition, month, booktitle;
non terminal book, bauthor, btitle, bpublisher, byear, bedition, bisbn;
non terminal article, aauthor, atitle, ajournal, ayear, avolume, anumber;
non terminal proceedings, ptitle, pyear, peditor, pisbn;
non terminal inproceedings, iauthor, iyear, ititle;
non terminal ibooktitle, imonth, ieditor, inote;
non terminal record, records;
// Gramática
start with records;
string ::= WORD string | WORD;
names ::= string AND names | string;
author ::= OAUTHOR EQUALS OPEN names CLOSE;
title ::= OTITLE EQUALS OPEN string CLOSE;
publisher ::= OPUBLISHER EQUALS OPEN string CLOSE;
year ::= OYEAR EQUALS OPEN YEAR CLOSE;
editor ::= OEDITOR EQUALS OPEN names CLOSE;
isbn ::= OISBN EQUALS OPEN ISBN CLOSE;
journal ::= OJOURNAL EQUALS OPEN string CLOSE;
volume ::= OVOLUME EQUALS OPEN NUMBER CLOSE;
number ::= ONUMBER EQUALS OPEN NUMBER CLOSE;
edition ::= OEDITION EQUALS OPEN NUMBER CLOSE;
month ::= OMONTH EQUALS OPEN string CLOSE;
booktitle ::= OBOOKTITLE EQUALS OPEN string CLOSE;
note ::= ONOTE EQUALS OPEN string CLOSE;
book ::= OPEN KEY bauthor CLOSE;
bauthor ::= COMMA author btitle;
btitle ::= COMMA title bpublisher;
bpublisher ::= COMMA publisher byear;
byear ::= COMMA year bedition;
bedition ::= COMMA edition bisbn |;
bisbn ::= COMMA isbn |;
article ::= OPEN KEY aauthor CLOSE;
aauthor ::= COMMA author atitle;
atitle ::= COMMA title ajournal;
ajournal ::= COMMA journal ayear;
ayear ::= COMMA year avolume;
avolume ::= COMMA volume anumber |;
anumber ::= COMMA number |;
proceedings ::= OPEN KEY ptitle CLOSE;
ptitle ::= COMMA title pyear;
pyear ::= COMMA year peditor;
peditor ::= COMMA editor pisbn |;
pisbn ::= COMMA isbn |;
inproceedings ::= OPEN KEY iauthor CLOSE;
iauthor ::= COMMA author iyear;
iyear ::= COMMA year ititle;
ititle ::= COMMA title ibooktitle;
ibooktitle ::= COMMA booktitle imonth;
imonth ::= COMMA month ieditor |;
ieditor ::= COMMA editor inote |;
inote ::= COMMA note |;
record ::= BOOKTYPE book
| ARTICLETYPE article
| PROCEEDINGSTYPE proceedings
| INPROCEEDINGSTYPE inproceedings;
records ::= record records |;
import java.io.*;
import java.util.Hashtable;
import java_cup.runtime.*;
%%
%{
static int n = 0; // Number of Bibtex records, used to control file emptyness.
static int nErrors = 0; // Number of errors found according to specifications
static String fInName ; // Input filename
static Hashtable<String,Integer> TableOfKeys = new Hashtable<String,Integer>();
public static void CheckKey(Hashtable<String,Integer> table, String key){
if (key.length()>10){
nErrors++;
System.out.println( "Error: keyword too long (" + key + ")" );
}
else
if (!table.containsKey(key)){
table.put(key,0);
System.out.println("Key: " + key);
}
else{
nErrors++;
System.out.println("Error: duplicate record key (" + key + ")");
}
}
public static void CheckYear(String yyyy){
int year = Integer.parseInt(yyyy);
if (year < 1900 || year > 2009){
nErrors++;
System.out.println("Error in year data: " + yyyy);
}
}
public static void CheckISBN(String isbn){
if (isbn.length() != 10){
nErrors++;
System.out.println("Error in ISBN data: " + isbn);
}
}
public static void CheckPages(String pp){
String separator="--";
int x = Integer.parseInt(pp.substring(0,pp.indexOf(separator)));
int y = Integer.parseInt(pp.substring(pp.indexOf(separator)+2));
if (x > y){
nErrors++;
System.out.println("Error in pages: " + pp);
}
}
public static void main (String argv []) throws java.io.IOException {
if (argv.length !=1) {
System.out.println ("Usage:");
System.out.println ("\tjava BibtexScan filename.bib");
return;
} else {
String fInName = argv[0];
if (!fInName.toUpperCase().endsWith(".BIB")) {
System.out.println ("\nExtension .bib is mandatory in source files ");
System.out.println ("Please rename " + fInName + " file and try again");
return;
}
// Create lexical analyzer
FileInputStream input = new FileInputStream( fInName );
BibtexScan yy = new BibtexScan( input );
System.out.println("\nProcessing file " + fInName + "...");
Symbol symbol;
//Read all tokens
while ((symbol = yy.next_token()) != null) {
}
}
if (n == 0)
System.out.println("Error: file " + fInName + " is empty.");
else{
System.out.println("\nScanning process completed. Number of BibTex records analysed: " + n);
if (nErrors>0)
System.out.println(nErrors + " errors found.");
else
System.out.println("No errors found.");
// rest of comments
}
} //End main
%}
%class BibtexScan
%cup
%line
%unicode
%state PROCESSING_RECORD, PROCESSING_KEY
%state PROCESSING_GENERAL_ENTRY, PROCESSING_YEAR_ENTRY, PROCESSING_NUM_ENTRY
%state PROCESSING_ISSUE_ENTRY, PROCESSING_ISBN_ENTRY, PROCESSING_PAGE_ENTRY
%state PROCESSING_NAMES_ENTRY
intData=[0-9]+
recordKey=[a-zA-Z][a-zA-Z0-9_-]*
text=([a-zA-Z0-9\-\(\)][".":,]?)
numData=[1-9]|1[0-2]
pagesRange=[0-9]+\-\-[0-9]+
%%
[ \t]+ { /* do nothing */ }
[\r\n]+ { /* do nothing */ }
"," { return new Symbol(sym.COMMA); }
"=" { return new Symbol(sym.EQUALS); }
<YYINITIAL>"@ARTICLE" { yybegin(PROCESSING_RECORD); System.out.println("\n" + yytext()); return new Symbol(sym.ARTICLETYPE); }
<YYINITIAL>"@BOOK" { yybegin(PROCESSING_RECORD); System.out.println("\n" + yytext()); return new Symbol(sym.BOOKTYPE); }
<YYINITIAL>"@PROCEEDINGS" { yybegin(PROCESSING_RECORD); System.out.println("\n" + yytext()); return new Symbol(sym.PROCEEDINGSTYPE); }
<YYINITIAL>"@INPROCEEDINGS" { yybegin(PROCESSING_RECORD); System.out.println("\n" + yytext()); return new Symbol(sym.INPROCEEDINGSTYPE); }
<YYINITIAL>. { System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
<PROCESSING_RECORD>"{" { yybegin(PROCESSING_KEY); return new Symbol(sym.OPEN); }
<PROCESSING_RECORD>"}" { n++; yybegin(YYINITIAL); return new Symbol(sym.CLOSE);}
<PROCESSING_RECORD>"title" { yybegin(PROCESSING_GENERAL_ENTRY); return new Symbol(sym.OTITLE); }
<PROCESSING_RECORD>"journal" { yybegin(PROCESSING_GENERAL_ENTRY); return new Symbol(sym.OJOURNAL); }
<PROCESSING_RECORD>"publisher" { yybegin(PROCESSING_GENERAL_ENTRY); return new Symbol(sym.OPUBLISHER); }
<PROCESSING_RECORD>"month" { yybegin(PROCESSING_GENERAL_ENTRY); return new Symbol(sym.OMONTH); }
<PROCESSING_RECORD>"note" { yybegin(PROCESSING_GENERAL_ENTRY); return new Symbol(sym.ONOTE); }
<PROCESSING_RECORD>"booktitle" { yybegin(PROCESSING_GENERAL_ENTRY); return new Symbol(sym.OBOOKTITLE); }
<PROCESSING_RECORD>"author" { yybegin(PROCESSING_NAMES_ENTRY); return new Symbol(sym.OAUTHOR); }
<PROCESSING_RECORD>"editor" { yybegin(PROCESSING_NAMES_ENTRY); return new Symbol(sym.OEDITOR); }
<PROCESSING_RECORD>"year" { yybegin(PROCESSING_YEAR_ENTRY); return new Symbol(sym.OYEAR); }
<PROCESSING_RECORD>"volume" { yybegin(PROCESSING_NUM_ENTRY); return new Symbol(sym.OVOLUME); }
<PROCESSING_RECORD>"edition" { yybegin(PROCESSING_NUM_ENTRY); return new Symbol(sym.OEDITION); }
<PROCESSING_RECORD>"number" { yybegin(PROCESSING_ISSUE_ENTRY); return new Symbol(sym.ONUMBER); }
<PROCESSING_RECORD>"isbn" { yybegin(PROCESSING_ISBN_ENTRY); return new Symbol(sym.OISBN); }
<PROCESSING_RECORD>"pages" { yybegin(PROCESSING_PAGE_ENTRY); /* This is not used according to PAC2 */ }
<PROCESSING_RECORD>. { nErrors++; System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
<PROCESSING_KEY>{recordKey} { CheckKey(TableOfKeys,yytext()); yybegin(PROCESSING_RECORD); return new Symbol(sym.KEY, yytext()); }
<PROCESSING_KEY>. { nErrors++; System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
<PROCESSING_GENERAL_ENTRY>"{" { return new Symbol(sym.OPEN); }
<PROCESSING_GENERAL_ENTRY>{text}* { return new Symbol(sym.WORD, yytext()); }
<PROCESSING_GENERAL_ENTRY>"}" { yybegin(PROCESSING_RECORD); return new Symbol(sym.CLOSE); }
<PROCESSING_GENERAL_ENTRY>. { nErrors++; System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
<PROCESSING_NAMES_ENTRY>"{" { return new Symbol(sym.OPEN); }
<PROCESSING_NAMES_ENTRY>"and" { return new Symbol(sym.AND); }
<PROCESSING_NAMES_ENTRY>{text}* { return new Symbol(sym.WORD, yytext()); }
<PROCESSING_NAMES_ENTRY>"}" { yybegin(PROCESSING_RECORD); return new Symbol(sym.CLOSE); }
<PROCESSING_NAMES_ENTRY>. { nErrors++; System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
<PROCESSING_NUM_ENTRY>"{" { return new Symbol(sym.OPEN); }
<PROCESSING_NUM_ENTRY>{intData} { return new Symbol(sym.NUMBER, new Integer(yytext())); }
<PROCESSING_NUM_ENTRY>"}" { yybegin(PROCESSING_RECORD); return new Symbol(sym.CLOSE); }
<PROCESSING_NUM_ENTRY>. { nErrors++; System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
<PROCESSING_YEAR_ENTRY>"{" { return new Symbol(sym.OPEN); }
<PROCESSING_YEAR_ENTRY>{intData} { CheckYear(yytext()); return new Symbol(sym.YEAR, new Integer(yytext())); }
<PROCESSING_YEAR_ENTRY>"}" { yybegin(PROCESSING_RECORD); return new Symbol(sym.CLOSE); }
<PROCESSING_YEAR_ENTRY>. { nErrors++; System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
<PROCESSING_ISSUE_ENTRY>"{" { return new Symbol(sym.OPEN); }
<PROCESSING_ISSUE_ENTRY>{numData} { return new Symbol(sym.NUMBER, new Integer(yytext())); }
<PROCESSING_ISSUE_ENTRY>"}" { yybegin(PROCESSING_RECORD); return new Symbol(sym.CLOSE); }
<PROCESSING_ISSUE_ENTRY>. { nErrors++; System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
<PROCESSING_ISBN_ENTRY>"{" { return new Symbol(sym.OPEN); }
<PROCESSING_ISBN_ENTRY>{intData} { CheckISBN(yytext()); return new Symbol(sym.ISBN, new Integer(yytext())); }
<PROCESSING_ISBN_ENTRY>"}" { yybegin(PROCESSING_RECORD); return new Symbol(sym.CLOSE); }
<PROCESSING_ISBN_ENTRY>. { nErrors++; System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
<PROCESSING_PAGE_ENTRY>"{" { return new Symbol(sym.OPEN); }
<PROCESSING_PAGE_ENTRY>{pagesRange} { CheckPages(yytext()); /* This is not used according to PAC2 */ }
<PROCESSING_PAGE_ENTRY>"}" { yybegin(PROCESSING_RECORD); return new Symbol(sym.CLOSE); }
<PROCESSING_PAGE_ENTRY>. { nErrors++; System.out.println( "Unexpected character in line: " + (yyline + 1) ); }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment