Skip to content

Instantly share code, notes, and snippets.

@OneRaynyDay
Created November 11, 2014 01:41
Show Gist options
  • Save OneRaynyDay/2d8d8fa7e23c3203547a to your computer and use it in GitHub Desktop.
Save OneRaynyDay/2d8d8fa7e23c3203547a to your computer and use it in GitHub Desktop.
package cs_1c;
//class EBookEntry -----------------------------------------------------
public class EBookEntry implements Comparable<EBookEntry>
{
private String title, creator, subject;
private int eTextNum;
public static final int MIN_STRING = 1;
public static final int MAX_STRING = 300;
public static final int MAX_ENTRY_LENGTH = 10000;
public static final int MAX_ID = 100000;
// comparable tools
public static final int SORT_BY_TITLE = 0;
public static final int SORT_BY_CREATOR = 1;
public static final int SORT_BY_SUBJECT = 2;
public static final int SORT_BY_ID = 3;
private static int sortKey = SORT_BY_CREATOR;
// default constructor
EBookEntry()
{
title = "";
creator = "";
subject = "";
eTextNum = 0;
}
// accessors
public String getTitle() { return title; }
public String getCreator() { return creator; }
public String getSubject() { return subject; }
public int getETextNum() { return eTextNum; }
// mutators
public boolean setTitle(String strArg)
{
if ( !validString(strArg) )
return false;
title = strArg;
return true;
}
public boolean setCreator(String strArg)
{
if ( !validString(strArg) )
return false;
creator = strArg;
return true;
}
public boolean setSubject(String strArg)
{
if ( !validString(strArg) )
return false;
subject = strArg;
return true;
}
public boolean setEtextNum(int intArg)
{
if (intArg < 1 || intArg > MAX_ID)
return false;
eTextNum = intArg;
return true;
}
public static boolean setSortType( int whichType )
{
switch (whichType)
{
case SORT_BY_TITLE:
case SORT_BY_CREATOR:
case SORT_BY_SUBJECT:
case SORT_BY_ID:
sortKey = whichType;
return true;
default:
return false;
}
}
public int compareTo(EBookEntry other)
{
switch (sortKey)
{
case SORT_BY_TITLE:
return (title.compareToIgnoreCase(other.title));
case SORT_BY_CREATOR:
return (creator.compareToIgnoreCase(other.creator));
case SORT_BY_SUBJECT:
return (subject.compareToIgnoreCase(other.subject));
case SORT_BY_ID:
return (eTextNum - other.eTextNum);
default:
return 0;
}
}
public String toString()
{
return " # " + eTextNum + " ---------------\n"
+ " \"" + title + "\"\n"
+ " by " + creator + "\n"
+ " re: " + subject + "\n\n";
}
// utility for testing all String mutator args
private static boolean validString(String strArg)
{
if (strArg == null)
return false;
if (strArg.length() < MIN_STRING || strArg.length() > MAX_STRING)
return false;
return true;
}
}
package cs_1c;
import java.io.*;
import java.util.*;
//class EBookEntryReader -----------------------------------------------------
public class EBookEntryReader
{
ArrayList<EBookEntry> books = new ArrayList<EBookEntry>();
private int numBooks;
private boolean fileOpenError;
private String bookFile;
// constructor
public EBookEntryReader(String fileName)
{
EBookEntry book;
BufferedReader inFile;
String line, entryString;
numBooks = 0;
fileOpenError = false;
bookFile = "NO FILE NAME PROVIDED";
if (fileName.length() == 0)
{
fileOpenError = true;
return;
}
bookFile = fileName;
// open file for reading
try
{
// ------- open and read the file
inFile = new BufferedReader(
new FileReader(fileName) );
while ( inFile.ready() )
{
line = inFile.readLine();
if (isDataLine(line))
{
entryString = readOneEntry(inFile, line); // expands line to entry
if (entryString == null)
{
fileOpenError = true;
break;
}
// if not an English entry, we'll have prob with chars
if ( !entryString.contains( ">en<" ) )
continue;
book = readOneBook(entryString);
books.add(book);
numBooks++;
}
}
inFile.close();
}
catch( FileNotFoundException e)
{
fileOpenError = true;
}
catch( IOException e)
{
fileOpenError = true;
}
}
// accessors
public EBookEntry getBook(int k)
{
if (k < 0 || k >= numBooks)
return new EBookEntry(); // dummy return
return books.get(k);
}
public String getFileName() { return bookFile; }
public boolean readError() { return fileOpenError; }
public int getNumBooks() { return numBooks; }
// helpers
// reads lines from the input stream, concatenating until it finds
// the terminating tag, "</pgterms:etext>". Result is a single long
// line containing entire record wrapped in
// "<pgterms:etext> ... </pgterms:etext>" which it returns to client.
// assumes first line containing <pgterms:etext> is already in
// line parameter - required for call
private String readOneEntry(BufferedReader infile, String line)
{
String fullEntry = line;
String nextLine = "";
try
{
while ( infile.ready()
&& fullEntry.length() < EBookEntry.MAX_ENTRY_LENGTH - 20 )
{
nextLine = infile.readLine();
fullEntry += nextLine;
if ( nextLine.equals("</pgterms:etext>") )
break;
}
}
catch (IOException e)
{
return null;
}
// if we have an unterminated entry, force it to be terminated.
if ( !nextLine.equals("</pgterms:etext>") )
fullEntry += "</pgterms:etext>";
return fullEntry;
}
// reads one string in the above record (all lines of the record
// are assumed to be concatenated into a single line prior to
// this call surrounded by <pgterms:etext> ... </pgterms:etext> )
// and leaves data in an EBookEntry object for return to client
private EBookEntry readOneBook(String entryString)
{
EBookEntry book = new EBookEntry();
book.setEtextNum(readIDFromLine(entryString));
book.setCreator(readStringFromEntry(entryString, "<dc:creator"));
book.setTitle(readStringFromEntry(entryString, "<dc:title"));
book.setSubject(readStringFromEntry(entryString, "<dc:subject"));
return book;
}
// helpers
private boolean isDataLine(String line)
{
if (line.length() < 15)
return false;
// check for a line of the form "<pgterms:etext --- "
if ( line.substring(0,14).equals("<pgterms:etext") )
return true;
return false;
}
int readIDFromLine(String line)
{
int startNumPos;
int numLength;
startNumPos = line.indexOf("ID=\"etext") + 9;
numLength = line.substring(startNumPos).indexOf( "\"");
if (startNumPos < 0 || startNumPos > EBookEntry.MAX_STRING
|| numLength < 0 || numLength > 20 )
return 0;
else
return Integer.valueOf(line.substring(startNumPos,
startNumPos + numLength));
}
String readStringFromEntry (String entryString, String delimiter)
{
int start, stop, k;
String stringAfterDelimiter;
if (delimiter.length() < 1 || entryString.length() < delimiter.length())
return "(no data found)";
// first find "<dc:title", e.g.
start = entryString.indexOf(delimiter);
if (start < 0)
return "(no data found)";
stringAfterDelimiter = entryString.substring(start+delimiter.length());
// we're looking for something like ">A ..." rather than ">< ... "
for (k = 0; k < stringAfterDelimiter.length() - 1; k++)
{
// rather than using isLetter() we check manually to avoid foreign
if (stringAfterDelimiter.charAt(k) == '>'
&&
// home-made isLetter()
(
(stringAfterDelimiter.charAt(k+1) >='a'
&& stringAfterDelimiter.charAt(k+1) <= 'z')
||
(stringAfterDelimiter.charAt(k+1) >='A'
&& stringAfterDelimiter.charAt(k+1) <= 'Z')
)
)
break;
}
if (k == stringAfterDelimiter.length() - 1)
return "(no data found)";
// we've got the starting position for the raw data
start = k + 1;
stringAfterDelimiter = stringAfterDelimiter.substring(start); // cut tags
stop = stringAfterDelimiter.indexOf("<"); // by above condition, cannot be 0
return stringAfterDelimiter.substring(0, stop);
}
}
package cs_1c;
import java.util.*;
// FHhashQP class --------------------------------------------
public class FHhashQP<E>
{
protected static final int ACTIVE = 0;
protected static final int EMPTY = 1;
protected static final int DELETED = 2;
static final int INIT_TABLE_SIZE = 97;
static final double INIT_MAX_LAMBDA = 0.49;
protected HashEntry<E>[] mArray;
protected int mSize;
protected int mLoadSize;
protected int mTableSize;
protected double mMaxLambda;
// public methods ---------------------------------
public FHhashQP(int tableSize)
{
mLoadSize = mSize = 0;
if (tableSize < INIT_TABLE_SIZE)
mTableSize = INIT_TABLE_SIZE;
else
mTableSize = nextPrime(tableSize);
allocateArray(); // uses mTableSize;
mMaxLambda = INIT_MAX_LAMBDA;
}
public FHhashQP()
{
this(INIT_TABLE_SIZE);
}
public boolean insert( E x)
{
int bucket = findPos(x);
if ( mArray[bucket].state == ACTIVE )
return false;
mArray[bucket].data = x;
mArray[bucket].state = ACTIVE;
mSize++;
// check load factor
if( ++mLoadSize > mMaxLambda * mTableSize )
rehash();
return true;
}
public boolean remove( E x )
{
int bucket = findPos(x);
if ( mArray[bucket].state != ACTIVE )
return false;
mArray[bucket].state = DELETED;
mSize--; // mLoadSize not dec'd because it counts any non-EMP location
return true;
}
public boolean contains(E x )
{
return mArray[findPos(x)].state == ACTIVE;
}
public int size() { return mSize; }
void makeEmpty()
{
int k, size = mArray.length;
for(k = 0; k < size; k++)
mArray[k].state = EMPTY;
mSize = mLoadSize = 0;
}
public boolean setMaxLambda( double lam )
{
if (lam < .1 || lam > INIT_MAX_LAMBDA )
return false;
mMaxLambda = lam;
return true;
}
// protected methods of class ----------------------
int findPos( E x )
{
int kthOddNum = 1;
int index = myHash(x);
while ( mArray[index].state != EMPTY
&& !mArray[index].data.equals(x) )
{
index += kthOddNum; // k squared = (k-1) squared + kth odd #
kthOddNum += 2; // compute next odd #
if ( index >= mTableSize )
index -= mTableSize;
}
return index;
}
protected void rehash()
{
// we save old list and size then we can reallocate freely
HashEntry<E>[] oldArray = mArray;
int k, oldTableSize = mTableSize;;
mTableSize = nextPrime(2*oldTableSize);
// allocate a larger, empty array
allocateArray(); // uses mTableSize;
// use the insert() algorithm to re-enter old data
mSize = mLoadSize = 0;
for(k = 0; k < oldTableSize; k++)
if (oldArray[k].state == ACTIVE)
insert( oldArray[k].data );
}
protected int myHash(E x)
{
int hashVal;
hashVal = x.hashCode() % mTableSize;
if(hashVal < 0)
hashVal += mTableSize;
return hashVal;
}
protected static int nextPrime(int n)
{
int k, candidate, loopLim;
// loop doesn't work for 2 or 3
if (n <= 2 )
return 2;
else if (n == 3)
return 3;
for (candidate = (n%2 == 0)? n+1 : n ; true ; candidate += 2)
{
// all primes > 3 are of the form 6k +/- 1
loopLim = (int)( (Math.sqrt((double)candidate) + 1)/6 );
// we know it is odd. check for divisibility by 3
if (candidate%3 == 0)
continue;
// now we can check for divisibility of 6k +/- 1 up to sqrt
for (k = 1; k <= loopLim; k++)
{
if (candidate % (6*k - 1) == 0)
break;
if (candidate % (6*k + 1) == 0)
break;
}
if (k > loopLim)
return candidate;
}
}
void allocateArray()
{
int k;
mArray = new HashEntry[mTableSize];
for (k = 0; k < mTableSize; k++)
mArray[k] = new HashEntry<E>();
}
}
import java.util.NoSuchElementException;
import cs_1c.*;
public class FHhashQPwFind<KeyType, E extends Comparable<KeyType>> extends
FHhashQP<E> {
public E find(KeyType key) throws NoSuchElementException {
E queriedObj = mArray[findPosKey(key)].data;
if (queriedObj != null) {
return queriedObj;
} else
throw new NoSuchElementException();
}
int myHashKey(KeyType key) {
int hashVal;
hashVal = key.hashCode() % mTableSize;
if (hashVal < 0)
hashVal += mTableSize;
return hashVal;
}
int findPosKey(KeyType key) {
int kthOddNum = 1;
int index = myHashKey(key);
while (mArray[index].state != EMPTY
&& mArray[index].data.compareTo(key) != 0) {
index += kthOddNum; // k squared = (k-1) squared + kth odd #
kthOddNum += 2; // compute next odd #
if (index >= mTableSize)
index -= mTableSize;
}
return index;
}
}
import java.util.NoSuchElementException;
import cs_1c.EBookEntry;
import cs_1c.EBookEntryReader;
import java.io.IOException;
import java.util.Random;
// ----------- wrapper classes -------------
class EBookCompInt implements Comparable<Integer> {
EBookEntry data;
public EBookCompInt(EBookEntry eBook) {
data = eBook;
}
public int compareTo(Integer textNum) {
return data.getETextNum() - textNum;
}
public boolean equals(EBookCompInt eBook) {
return data.equals(eBook.data);
}
public int hashCode() {
return data.getETextNum();
}
}
class EBookCompString implements Comparable<String> {
public static final int HASH_BY_TITLE = 0;
public static final int HASH_BY_CREATOR = 1;
public static final int HASH_BY_SUBJECT = 2;
int hashState = HASH_BY_TITLE;
EBookEntry data;
public EBookCompString(EBookEntry eBook) {
data = eBook;
}
public int compareTo(String textString) {
switch (hashState) {
case HASH_BY_TITLE:
return data.getTitle().compareTo(textString);
case HASH_BY_CREATOR:
return data.getCreator().compareTo(textString);
case HASH_BY_SUBJECT:
return data.getSubject().compareTo(textString);
}
// this should not happen
return -1;
}
public void setState(int state) {
hashState = state;
}
public boolean equals(EBookCompInt eBook) {
return data.equals(eBook.data);
}
public int hashCode() {
switch (hashState) {
case HASH_BY_TITLE:
return data.getTitle().hashCode();
case HASH_BY_CREATOR:
return data.getCreator().hashCode();
case HASH_BY_SUBJECT:
return data.getSubject().hashCode();
}
// should not happen
return -1;
}
}
// ------------------------------------------------------
public class Main {
public static final int NUM_RANDOM_INDICES = 25;
// ------- main --------------
public static void main(String[] args) throws Exception {
// create a QP hash table of EBooks ..
FHhashQPwFind<Integer, EBookCompInt> hashTableInt = new FHhashQPwFind<Integer, EBookCompInt>();
FHhashQPwFind<String, EBookCompString> hashTableTitle = new FHhashQPwFind<String, EBookCompString>();
FHhashQPwFind<String, EBookCompString> hashTableCreator = new FHhashQPwFind<String, EBookCompString>();
FHhashQPwFind<String, EBookCompString> hashTableSubject = new FHhashQPwFind<String, EBookCompString>();
// get book array
EBookEntry[] bookEntries = getDataFromFile("catalog-short4.txt",
hashTableInt, hashTableTitle, hashTableCreator, hashTableSubject);
Random rand = new Random();
// display NUM_RANDOM_INDICES books from array, should find the book
// successfully
System.out.println("Display NUM_RANDOM_INDICES books from array");
System.out.println();
for (int k = 0; k < NUM_RANDOM_INDICES; k++) {
int randomBookNumber = rand.nextInt(bookEntries.length);
EBookEntry book = bookEntries[randomBookNumber];
System.out.println(book);
// find book by text number
System.out.println("------FINDING BY TEXT NUMBER------");
int number = book.getETextNum();
try {
EBookCompInt eCompInt = hashTableInt.find(number);
if (eCompInt.data.getETextNum() == number)
System.out.println("Successfully find book text number#: "
+ number);
else
System.out.println("Not found book text number#: " + number);
} catch (NoSuchElementException e) {
System.out.println("Not found book text number#: " + number);
}
System.out.println();
// find book by title
System.out
.println("------FINDING BY TEXT TITLE, CREATOR AND SUBJECT------");
String title = book.getTitle();
try {
EBookCompString eCompStr = hashTableTitle.find(title);
if (eCompStr.data.getTitle().equals(title))
System.out.println("Successfully find book title: " + title);
else
System.out.println("Not found book title: " + number);
} catch (NoSuchElementException e) {
System.out.println("Not found book title: " + title);
}
System.out.println();
// find book by creator
String creator = book.getCreator();
try {
EBookCompString eCompStr = hashTableCreator.find(creator);
if (eCompStr.data.getCreator().equals(creator))
System.out.println("Successfully find book creator: " + creator);
else
System.out.println("Not found book creator: " + creator);
} catch (NoSuchElementException e) {
System.out.println("Not found book creator: " + creator);
}
System.out.println();
// find book by subject
String subject = book.getSubject();
try {
EBookCompString eCompStr = hashTableSubject.find(subject);
if (eCompStr.data.getSubject().equals(subject))
System.out.println("Successfully find book subject: " + subject);
else
System.out.println("Not found book subject: " + subject);
} catch (NoSuchElementException e) {
System.out.println("Not found book subject: " + subject);
}
System.out.println();
}
// test known failures exceptions:
System.out.println("------ERROR TESTING-------");
try {
hashTableInt.find(-3);
} catch (NoSuchElementException e) {
System.out.println("Not found book text number# -3");
}
try {
hashTableTitle.find("Jack Kerouac");
} catch (NoSuchElementException e) {
System.out.println("Not found book title Jack Kerouac");
}
try {
hashTableCreator.find("Jack creator");
} catch (NoSuchElementException e) {
System.out.println("Not found book creator Jack creator");
}
try {
hashTableSubject.find("Jack subject");
} catch (NoSuchElementException e) {
System.out.println("Not found book subject Jack subject");
}
}
private static EBookEntry[] getDataFromFile(String filename,
FHhashQPwFind<Integer, EBookCompInt> hashTableInt,
FHhashQPwFind<String, EBookCompString> hashTableTitle,
FHhashQPwFind<String, EBookCompString> hashTableCreator,
FHhashQPwFind<String, EBookCompString> hashTableSubject)
throws IOException {
EBookEntryReader bookInput = new EBookEntryReader(filename);
int arraySize;
// how we test the success of the read:
if (bookInput.readError()) {
System.out.println("couldn't open " + bookInput.getFileName()
+ " for input.");
throw new IOException();
}
System.out.println("Successfully read from " + bookInput.getFileName());
// create an array of objects for our own use:
arraySize = bookInput.getNumBooks();
EBookEntry[] bookEntries = new EBookEntry[arraySize];
for (int k = 0; k < arraySize; k++) {
EBookEntry ebook = bookInput.getBook(k);
bookEntries[k] = ebook;
EBookCompInt eCompInt = new EBookCompInt(ebook);
hashTableInt.insert(eCompInt);
EBookCompString eCompString = new EBookCompString(ebook);
eCompString.setState(EBookCompString.HASH_BY_TITLE);
hashTableTitle.insert(eCompString);
eCompString = new EBookCompString(ebook);
eCompString.setState(EBookCompString.HASH_BY_CREATOR);
hashTableCreator.insert(eCompString);
eCompString = new EBookCompString(ebook);
eCompString.setState(EBookCompString.HASH_BY_SUBJECT);
hashTableSubject.insert(eCompString);
}
return bookEntries;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment