This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/****************************************************************************** | |
Japan Marc Dat file | |
******************************************************************************/ | |
const MATERIALS = new Map([ | |
['a','図書'], | |
['c','楽譜'], | |
['e','地図資料'], | |
['g','映像資料'], | |
['i','録音資料(音楽録音資料を除く)'], | |
['j','音楽録音資料'], | |
['k','静止画資料'], | |
['m','電子資料'], | |
['t','文字資料(書写資料)'] | |
]); | |
async function processFile(blob,types,onprocess) | |
{ | |
const blobLen = blob.size; | |
if(blobLen == 0) | |
throw new Error('blob size is zero'); | |
const marcs = []; | |
let start = 0; | |
let count = 0; | |
do { | |
const label = await blob.slice(start,start + 24).text(); | |
const length = parseInt(label.slice(0,5)); | |
const type = label.slice(6,7); | |
if(types.size == 0 || types.has(type)) | |
{ | |
const blobBibData = blob.slice(start,start + length) | |
const marc = await processMarc(label,length,blobBibData); | |
marc.header.type = type; | |
marc.header.length = length; | |
onprocess(count,start / blobLen,marc) | |
count++; | |
} | |
else | |
{ | |
onprocess(count,start / blobLen); | |
} | |
start+=length; | |
} while(start < blobLen); | |
return marcs; | |
} | |
async function processMarc(label,length,blob) | |
{ | |
const baseaddress = parseInt(label.slice(12,17)); | |
const directories = await _parseDirectory(blob.slice(24,baseaddress)); | |
const header = _createHeader(label); | |
const fields = await _parseDataFields(directories,blob.slice(baseaddress)); | |
const bibliography = _convert(header.materialType,fields); | |
return { | |
header: header, | |
fields: fields, | |
bibliography: bibliography | |
}; | |
// return _convert(header.materialType,fields); | |
} | |
function _createHeader(label) | |
{ | |
const type = label.slice(6,7); | |
return { | |
'status': label.slice(5,6), | |
'level': label.slice(7,8), | |
'controlTypeCode': label.slice(8,9), | |
'charset': label.slice(9,10) === 'a' ? 'UTF-8' : 'OHTERS', | |
'materialType': type, | |
'materialName': MATERIALS.get(type), | |
'indicatorLength': parseInt(label.slice(10,11)), | |
'subfieldIdLength': parseInt(label.slice(11,12)), | |
'baseaddress': parseInt(label.slice(12,17)), | |
'encodeLevel': label.slice(17,18), | |
'descListFormat': label.slice(18,19), | |
'MVSMRLebel': label.slice(19,20), | |
'datafieldLength': parseInt(label.slice(20,21)), | |
'firstLocationLength': parseInt(label.slice(21,22)), | |
'unknownLength': parseInt(label.slice(22,23)), | |
'unknownItem': label.slice(23,24) | |
}; | |
} | |
async function _parseDirectory(blob) | |
{ | |
const directories = (await blob.text()).slice(0,-1); | |
const len = directories.length; | |
const rva = []; | |
for(let i = 0;i < len ;i+=12) | |
{ | |
let data = directories.slice(i,i+12); | |
rva.push({ | |
id: data.slice(0,3), | |
length: parseInt(data.slice(3,7)), | |
offset: parseInt(data.slice(7,12)) | |
}); | |
} | |
return rva; | |
} | |
async function _parseDataFields(directories,blob) | |
{ | |
let fields = {}; | |
for(let directory of directories) | |
{ | |
let {id,length,offset} = directory; | |
let val = await blob.slice(offset,offset + length - 1).text(); | |
val = | |
val.split('\u001f') | |
.map(v => v.trim()) | |
.filter(v => v.length > 0) | |
.map(v => parseInt(id) < 10 ? v : (v.slice(0,1) + '\t' + v.slice(1))); | |
if(val.length == 0) | |
continue; | |
let tmp = {}; | |
for(let v of val) | |
{ | |
if(v.indexOf('\t') < 0) | |
{ | |
tmp = v; | |
continue; | |
} | |
let t = v.split('\t'); | |
if(t[0] in tmp) | |
{ | |
if('array' !== instancetype(tmp[t[0]])) | |
tmp[t[0]] = [tmp[t[0]]]; | |
tmp[t[0]].push(t[1]); | |
} | |
else | |
{ | |
tmp[t[0]] = t[1]; | |
} | |
} | |
val = tmp; | |
if(id in fields) | |
{ | |
if(instancetype(fields[id]) !== 'array') | |
fields[id] = [fields[id]]; | |
fields[id].push(val); | |
} | |
else | |
{ | |
fields[id] = val; | |
} | |
}; | |
return fields; | |
} | |
function _convert(type,fields) | |
{ | |
let bib = new Map; | |
bib.set('bib_book_title',getValueFixArray(fields,'245','a')); | |
bib.set('bib_book_title_yomi',getYomi(fields,'245')); | |
bib.set('bib_book_subtitle',getValueFixArray(fields,'740','a')); | |
bib.set('bib_book_subtitle_yomi',getYomi(fields,'740')); | |
bib.set('bib_series',getValueFixArray(fields,'490','a')); | |
bib.set('bib_series_yomi',getYomi(fields,'490')); | |
bib.set('bib_author',getValueFixArray(fields,'700','a','/')); | |
bib.set('bib_author_yomi',getYomi(fields,'700')); | |
bib.set('bib_version',getValueFixArray(fields,'250','a')); | |
let author = bib.get('bib_author_yomi').replace(/[^アイウエオカ-ヂツ-モヤユヨラ-ロワオン]+/g,''); | |
if(author.length > 0) | |
bib.set('bib_book_symbol',author.slice(0,1)); | |
setBibPublish(fields,bib); | |
bib.set('bib_price',getValueFixArray(fields,'020','c')); | |
bib.set('bib_origin',getValueFixArray(fields,'740','a','|')); | |
let content_intro = getContentIntro(fields); | |
if(content_intro) | |
bib.set('bib_content_intro',content_intro); | |
bib.set('bib_note',getNote(fields)); | |
bib.set('bib_page_nums',getValueFixArray(fields,'300','a')); | |
bib.set('bib_book_size',getValueFixArray(fields,'300','c')); | |
bib.set('bib_language_name',getValueFixArray(fields,'040','b')); | |
bib.set('bib_material_name',MATERIALS.get(type)); | |
bib.set('bib_frequency_name',getValueFixArray(fields,'310','a')); | |
bib.set('bib_general_subject',getValueFixArray(fields,'650','a','/')); | |
bib.set('bib_general_subject_yomi',getYomi(fields,'650')); | |
bib.set('bib_class_symbol_code',getClassSymbolNdc(fields)); | |
bib.set('bib_mark_id','JP'+getValue(fields,'015','a')); | |
let isbn = getValue(fields,'020','a'); | |
if(instancetype(isbn) === 'array') | |
isbn = isbn.shift().a; | |
bib.set('bib_isbn',clearText(isbn)); | |
return Object.fromEntries(bib); | |
} | |
function setBibPublish(fields,bib) | |
{ | |
let ref; | |
if('260' in fields) | |
ref = '260'; | |
else if('264' in fields) | |
ref ='264'; | |
else | |
return; | |
let publisher = getValueItemArray(fields,ref,'b'); | |
let publisher_yomi = getYomi(fields,ref,'b'); | |
if(publisher_yomi.length > 0) | |
publisher = [publisher,publisher_yomi].join('/'); | |
bib.set('bib_publish_place',getValueItemArray(fields,ref,'a')); | |
bib.set('bib_publisher',publisher); | |
bib.set('bib_publish_year',getValueItemArray(fields,ref,'c')); | |
} | |
function getNote(fields,delimitor) | |
{ | |
let d = delimitor || '\n'; | |
let refs = ['500','504', '521']; | |
let generate = function(ref) | |
{ | |
let rv = getValueFixArray(fields,ref,'a','\n'); | |
if(instancetype(rv) === 'array') | |
{ | |
rv = rv.map(v => clearText(v['a'])).filter(v => v ? true : false).join(delimitor); | |
} | |
return rv; | |
}; | |
refs.map(ref => generate(ref)); | |
} | |
function getContentIntro(fields,delimitor) | |
{ | |
let d = delimitor || '\n'; | |
let rv = ''; | |
let field = getValueFixArray(fields,'505','a'); | |
if(instancetype(field) === 'array') | |
{ | |
field = field.filter(v => 'a' in v); | |
if(field.length > 0) | |
rv = field.map(v => v['a']).join(d); | |
} | |
else | |
{ | |
rv = field; | |
} | |
return rv; | |
} | |
function getClassSymbolNdc(fields) | |
{ | |
const field = getValue(fields,'084','2'); | |
const type = instancetype(field); | |
let rv = ''; | |
switch(type) | |
{ | |
case 'string': | |
if(field.match(/^njb/)) | |
rv = getValue(fields,'084','a'); | |
break; | |
case 'array': | |
let ar = field.filter(v => v['2'].match(/^njb/) ? true : false); | |
if(ar.length > 0) | |
rv = (ar.shift())['a']; | |
break; | |
} | |
return rv; | |
} | |
function getYomi(fields,ref,key) | |
{ | |
let k = key || 'a'; | |
let imp = src => { | |
if(!src) | |
return ''; | |
let occu = (src.split('-'))[1]; | |
let field = getValue(fields,'880'); | |
if(!occu || instancetype(field) !== 'array') | |
return ''; | |
let restr = `^${ref}-${occu}`; | |
let re = new RegExp(`^${ref}-${occu}`); | |
let index = field.findIndex(v => v['6'].match(re) ? true : false); | |
let yomi = ''; | |
let ps = (s,o) => { | |
if(!s) | |
return ''; | |
if(s.match(/,$/)) | |
{ | |
s = clearText(s); | |
if(('d' in o) && o['d']) | |
s += `,${o['d']}`; | |
} | |
else | |
{ | |
s = clearText(s); | |
} | |
return s; | |
}; | |
if(index >= 0) | |
{ | |
let obj = field[index]; | |
yomi = obj[k]; | |
if(instancetype(yomi) === 'array') | |
{ | |
yomi = yomi.map(v => ps(v,obj)).join(''); | |
} | |
else | |
{ | |
yomi = ps(yomi,obj); | |
} | |
} | |
return yomi; | |
}; | |
let fieldvalue = getValue(fields,ref,'6'); | |
let rv = instancetype(fieldvalue) === 'array' ? fieldvalue.map(v => ('6' in v) ? imp(v['6']) : v[k]) : [imp(fieldvalue)]; | |
return rv.map(v => clearText(v)).join('/'); | |
} | |
function getValueItemArray(fields,id,key,delimitor) | |
{ | |
let rv = []; | |
let data = getValue(fields,id,key,delimitor); | |
if(instancetype(data) === 'array') | |
{ | |
data.forEach(v => { | |
if(key in v) | |
{ | |
if(instancetype(v[key]) === 'array') | |
rv.push(v[key].map(el => clearText(el)).filter(el => el.length > 0)); | |
else | |
rv.push(clearText(v[key])); | |
} | |
}); | |
} | |
else if(instancetype(data) === 'string') | |
{ | |
if(delimitor === undefined) | |
delimitor = ';'; | |
rv = data.split(delimitor); | |
} | |
return rv.flat().join(','); | |
} | |
// delimitor: true => shift,false => pop, string => join | |
function getValueFixArray(fields,id,key,delimitor) | |
{ | |
let data = getValue(fields,id,key,delimitor); | |
if(instancetype(data) !== 'array') | |
return data; | |
let va = []; | |
data.forEach(v => { | |
if(instancetype(v[key]) === 'array') | |
{ | |
va.push(v[key].map(v => clearText(v))); | |
va = va.flat(); | |
} | |
else | |
{ | |
va.push(clearText(v[key] || '')); | |
} | |
}); | |
if(instancetype(delimitor) === 'boolean') | |
return delimitor ? va.shift() : va.pop(); | |
else | |
return va.join(delimitor || ' '); | |
} | |
// delimitor: true => | |
function getValue(fields,id,key,delimitor) | |
{ | |
let rv = ''; | |
if(delimitor === undefined) | |
delimitor = ';'; | |
if(id in fields) | |
{ | |
let field = fields[id]; | |
switch(instancetype(field)) | |
{ | |
case 'array': | |
return field; | |
case 'object': | |
rv = (key in field) ? field[key] : ''; | |
break; | |
case 'string': | |
default: | |
rv = field; | |
break; | |
} | |
rv = instancetype(rv) === 'array' ? rv.map(v => clearText(v)).join(delimitor) : clearText(rv); | |
} | |
return rv; | |
} | |
function clearText(str) | |
{ | |
if(instancetype(str) !== 'string') | |
return str; | |
let re = new RegExp('[,\.\/;=\+:]+$'); | |
return str.replace(re,'').trim(); | |
} | |
function strtodate(str) | |
{ | |
if('string' !== instancetype(str)) | |
throw new Error('Argument type error: first argumnet must be string type'); | |
return new Date( | |
parseInt(str.slice(0,4)), | |
parseInt(str.slice(4,2)) - 1, | |
parseInt(str.slice(6,2)), | |
parseInt(str.slice(8,2)), | |
parseInt(str.slice(10,2)), | |
parseInt(str.slice(12,2)) | |
); | |
} | |
function instancetype(obj) | |
{ | |
return Object.prototype.toString.call(obj).slice(8,-1).toLowerCase(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment