Skip to content

Instantly share code, notes, and snippets.

@jrochkind
Created September 23, 2010 16:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jrochkind/593897 to your computer and use it in GitHub Desktop.
Save jrochkind/593897 to your computer and use it in GitHub Desktop.
import org.marc4j.marc.Record;
import org.marc4j.marc.DataField;
import org.marc4j.marc.Subfield;
import org.solrmarc.tools.Utils;
// define the base level indexer so that its methods can be called from the script.
// note that the SolrIndexer code will set this value before the script methods are called.
org.solrmarc.index.SolrIndexer indexer = null;
/** Transformational hashes **/
static HashMap leader_6_7 = null;
public static leader_6_7_map() {
if (leader_6_7 == null) {
leader_6_7 = new HashMap();
leader_6_7.put("aa", "Book"); // May be chapter level.
leader_6_7.put("ab", "Serial"); // serial 'component' may be article
leader_6_7.put("am", "Book");
leader_6_7.put("as", "Serial");
leader_6_7.put("ta", "Book");
leader_6_7.put("tm", "Book");
}
return leader_6_7;
}
static HashMap leader_6 = null;
public static leader_6_map() {
if ( leader_6 == null) {
leader_6 = new HashMap();
leader_6.put("c", "Musical Score");
leader_6.put("d", "Musical Score");
leader_6.put("e", "Map/Globe");
leader_6.put("f", "Map/Globe");
leader_6.put("i", "Non-musical Recording");
leader_6.put("j", "Musical Recording");
leader_6.put("k", "Image");
leader_6.put("m", "Software/Data");
leader_6.put("g", "Video/Film");
}
return leader_6;
}
static HashMap field_007_0 = null;
// Map keys are Chars.
public static field_007_0_map() {
if ( field_007_0 == null) {
field_007_0 = new HashMap();
field_007_0.put('a', "Map/Globe"); // map
field_007_0.put('d', "Map/Globe");// globe
field_007_0.put('k', "Image"); // non projected graphic
field_007_0.put('q', "Musical Score");
field_007_0.put('r', "Image"); // remote sensing image
field_007_0.put('v', "Video/Film"); // video
field_007_0.put('m', "Video/Film"); // motion picture
}
return field_007_0;
}
/**
* Looks up marc values for ordinary indexer spec passed in as argument, removing trailing spaces. Also adds in 490a values, each as a seperate
value, for each 490 with indicator 1 == 0
* @param record auto passed in by solrmarc
* @param spec an ordinary solrmarc field spec, or empty string.
* @return Set of Strings suitable for Series title facet, including your spec and appropriate 490$a values.
*/
Set getFormatFacet(Record record)
{
LinkedHashSet resultSet = new LinkedHashSet();
String leader = record.getLeader().toString();
//Set field006 = indexer.getFieldList(record, "006");
//String field008 = indexer.getFieldList(record, "008");
String gmd = indexer.getFirstFieldVal(record, "245h");
Set field007Set = indexer.getFieldList(record, "007");
Set fields007Byte0 = new LinkedHashSet();
Set fields007Byte1 = new LinkedHashSet();
for ( String field007 : field007Set) {
if (field007.length() > 0) { fields007Byte0.add( field007.charAt(0)); }
if (field007.length() >= 2) { fields007Byte1.add( field007.charAt(1)); }
}
// First try bytes 6 and 7 of leader against our two byte
// leader hash.
String mapped_leader_67 = leader_6_7_map().get( leader.substring(6, 8) );
if (mapped_leader_67 != null) { resultSet.add( mapped_leader_67 ); }
// If we didn't get one there, we can try just byte 6.
if (resultSet.size() == 0) {
String mapped_leader_6 = leader_6_map().get( leader.substring(6,7) );
if (mapped_leader_6 != null) { resultSet.add( mapped_leader_6); }
}
// If we still didn't get one, give field(s) 007 byte 0
if ( resultSet.size() == 0) {
for ( aByte : fields007Byte0) {
mapped = field_007_0_map().get( aByte );
if (mapped != null) { resultSet.add( mapped ); }
}
}
/* Okay, now add on our carrier/physical type things. Manuscript, Microform, Online, etc. */
// Manuscript
char leader_06 = (leader!= null && leader.length() > 5) ? leader.charAt(6) : 0;
char leader_08 = (leader != null && leader.length() > 7) ? leader.charAt(8) : 0;
// leader 6 t=Manuscript Language Material, d=Manuscript Music,
// f=Manuscript Cartograhpic
if ( leader_06 == 't' || leader_06 == 'd' || leader_06 == 'f' ||
// leader 06 = 'b' is obsolete, but if it exists it means archival countrl
leader_06 == 'b' ||
// leader 8 a=archival control
leader_08 == 'a'
) {
resultSet.add("Manuscript/Archive");
}
// Microform
// field 007 byte 0 means microfilm. But many of our microform items
// don't have an 007. Theoretically we could look at the very confusing
// 006 and 008, but those don't seem to be filled out either, need to
// talk to a cataloger about that. Meanwhile, we'll resort to GMD.
if ( fields007Byte0.contains('h') ||
leader_6 == 'h' || // leader06 h is obsolete, but might still be in data.
(gmd != null && gmd.startsWith("[microform]")))
{
resultSet.add("Microform");
}
// Legacy "Available Online" just checked for the presence of an 856
// with an http, as far as I can tell. This isn't sufficient to indicate
// available online, the 856 http might not be full text. We'll try 007
// and GMD for now. 006 and 008 might have helped if they were present,
// but I think they generally aren't.
// Electronic resource => "Online".
//field 007[0]=='c' means 'electronic resource', for field007[0]=='c',
//field007[1]=='r' means 'remote access'.
if ( (fields007Byte0.contains('c') && fields007Byte1.contains('r') ) ||
// if the GMD is electronic resource, we count is as online only
// if NO 007[0] is 'c', cause otherwise we already know it's
//electronic but not online, or the above clause would have caught
//it.
( (! fields007Byte0.contains('c')) && gmd != null &&
gmd.startsWith("[electronic resource]")
)
)
{
resultSet.add("Online");
}
// THESES. If it has a 502, it's a thesis.
if ( record.getVariableFields("502").size() > 0) {
resultSet.add("Dissertation/Thesis");
// Let's say if it's a thesis, it's not a 'Book.
resultSet.remove("Book");
// Thought about removing Theses from "Manuscripts" bucket too,
// but some of em really are ancient manuscript rare books type
// stuff, even though some of them aren't. oh well.
}
// If no other format at all, we'll still call it "Other".
if (resultSet.size() == 0) {
resultSet.add("Other");
}
return resultSet;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment