Skip to content

Instantly share code, notes, and snippets.

@earlgreyxxx
Created June 1, 2022 13:50
Show Gist options
  • Save earlgreyxxx/ca63afb187beda946d30480c7098f67c to your computer and use it in GitHub Desktop.
Save earlgreyxxx/ca63afb187beda946d30480c7098f67c to your computer and use it in GitHub Desktop.
/******************************************************************************
Japan Marc Dat file
******************************************************************************/
const MATERIALS = new Map([
['a','図書'],
['c','楽譜'],
['e','地図資料'],
['g','映像資料'],
['i','録音資料(音楽録音資料を除く)'],
['j','音楽録音資料'],
['k','静止画資料'],
['m','電子資料'],
['t','文字資料(書写資料)']
]);
async function processFile(blob,types,onprocess)
{
const blobLen = blob.size;
if(blobLen == 0)
throw new Error('blob size is zero');
const marcs = [];
let start = 0;
let count = 0;
do {
const label = await blob.slice(start,start + 24).text();
const length = parseInt(label.slice(0,5));
const type = label.slice(6,7);
if(types.size == 0 || types.has(type))
{
const blobBibData = blob.slice(start,start + length)
const marc = await processMarc(label,length,blobBibData);
marc.header.type = type;
marc.header.length = length;
onprocess(count,start / blobLen,marc)
count++;
}
else
{
onprocess(count,start / blobLen);
}
start+=length;
} while(start < blobLen);
return marcs;
}
async function processMarc(label,length,blob)
{
const baseaddress = parseInt(label.slice(12,17));
const directories = await _parseDirectory(blob.slice(24,baseaddress));
const header = _createHeader(label);
const fields = await _parseDataFields(directories,blob.slice(baseaddress));
const bibliography = _convert(header.materialType,fields);
return {
header: header,
fields: fields,
bibliography: bibliography
};
// return _convert(header.materialType,fields);
}
function _createHeader(label)
{
const type = label.slice(6,7);
return {
'status': label.slice(5,6),
'level': label.slice(7,8),
'controlTypeCode': label.slice(8,9),
'charset': label.slice(9,10) === 'a' ? 'UTF-8' : 'OHTERS',
'materialType': type,
'materialName': MATERIALS.get(type),
'indicatorLength': parseInt(label.slice(10,11)),
'subfieldIdLength': parseInt(label.slice(11,12)),
'baseaddress': parseInt(label.slice(12,17)),
'encodeLevel': label.slice(17,18),
'descListFormat': label.slice(18,19),
'MVSMRLebel': label.slice(19,20),
'datafieldLength': parseInt(label.slice(20,21)),
'firstLocationLength': parseInt(label.slice(21,22)),
'unknownLength': parseInt(label.slice(22,23)),
'unknownItem': label.slice(23,24)
};
}
async function _parseDirectory(blob)
{
const directories = (await blob.text()).slice(0,-1);
const len = directories.length;
const rva = [];
for(let i = 0;i < len ;i+=12)
{
let data = directories.slice(i,i+12);
rva.push({
id: data.slice(0,3),
length: parseInt(data.slice(3,7)),
offset: parseInt(data.slice(7,12))
});
}
return rva;
}
async function _parseDataFields(directories,blob)
{
let fields = {};
for(let directory of directories)
{
let {id,length,offset} = directory;
let val = await blob.slice(offset,offset + length - 1).text();
val =
val.split('\u001f')
.map(v => v.trim())
.filter(v => v.length > 0)
.map(v => parseInt(id) < 10 ? v : (v.slice(0,1) + '\t' + v.slice(1)));
if(val.length == 0)
continue;
let tmp = {};
for(let v of val)
{
if(v.indexOf('\t') < 0)
{
tmp = v;
continue;
}
let t = v.split('\t');
if(t[0] in tmp)
{
if('array' !== instancetype(tmp[t[0]]))
tmp[t[0]] = [tmp[t[0]]];
tmp[t[0]].push(t[1]);
}
else
{
tmp[t[0]] = t[1];
}
}
val = tmp;
if(id in fields)
{
if(instancetype(fields[id]) !== 'array')
fields[id] = [fields[id]];
fields[id].push(val);
}
else
{
fields[id] = val;
}
};
return fields;
}
function _convert(type,fields)
{
let bib = new Map;
bib.set('bib_book_title',getValueFixArray(fields,'245','a'));
bib.set('bib_book_title_yomi',getYomi(fields,'245'));
bib.set('bib_book_subtitle',getValueFixArray(fields,'740','a'));
bib.set('bib_book_subtitle_yomi',getYomi(fields,'740'));
bib.set('bib_series',getValueFixArray(fields,'490','a'));
bib.set('bib_series_yomi',getYomi(fields,'490'));
bib.set('bib_author',getValueFixArray(fields,'700','a','/'));
bib.set('bib_author_yomi',getYomi(fields,'700'));
bib.set('bib_version',getValueFixArray(fields,'250','a'));
let author = bib.get('bib_author_yomi').replace(/[^アイウエオカ-ヂツ-モヤユヨラ-ロワオン]+/g,'');
if(author.length > 0)
bib.set('bib_book_symbol',author.slice(0,1));
setBibPublish(fields,bib);
bib.set('bib_price',getValueFixArray(fields,'020','c'));
bib.set('bib_origin',getValueFixArray(fields,'740','a','|'));
let content_intro = getContentIntro(fields);
if(content_intro)
bib.set('bib_content_intro',content_intro);
bib.set('bib_note',getNote(fields));
bib.set('bib_page_nums',getValueFixArray(fields,'300','a'));
bib.set('bib_book_size',getValueFixArray(fields,'300','c'));
bib.set('bib_language_name',getValueFixArray(fields,'040','b'));
bib.set('bib_material_name',MATERIALS.get(type));
bib.set('bib_frequency_name',getValueFixArray(fields,'310','a'));
bib.set('bib_general_subject',getValueFixArray(fields,'650','a','/'));
bib.set('bib_general_subject_yomi',getYomi(fields,'650'));
bib.set('bib_class_symbol_code',getClassSymbolNdc(fields));
bib.set('bib_mark_id','JP'+getValue(fields,'015','a'));
let isbn = getValue(fields,'020','a');
if(instancetype(isbn) === 'array')
isbn = isbn.shift().a;
bib.set('bib_isbn',clearText(isbn));
return Object.fromEntries(bib);
}
function setBibPublish(fields,bib)
{
let ref;
if('260' in fields)
ref = '260';
else if('264' in fields)
ref ='264';
else
return;
let publisher = getValueItemArray(fields,ref,'b');
let publisher_yomi = getYomi(fields,ref,'b');
if(publisher_yomi.length > 0)
publisher = [publisher,publisher_yomi].join('/');
bib.set('bib_publish_place',getValueItemArray(fields,ref,'a'));
bib.set('bib_publisher',publisher);
bib.set('bib_publish_year',getValueItemArray(fields,ref,'c'));
}
function getNote(fields,delimitor)
{
let d = delimitor || '\n';
let refs = ['500','504', '521'];
let generate = function(ref)
{
let rv = getValueFixArray(fields,ref,'a','\n');
if(instancetype(rv) === 'array')
{
rv = rv.map(v => clearText(v['a'])).filter(v => v ? true : false).join(delimitor);
}
return rv;
};
refs.map(ref => generate(ref));
}
function getContentIntro(fields,delimitor)
{
let d = delimitor || '\n';
let rv = '';
let field = getValueFixArray(fields,'505','a');
if(instancetype(field) === 'array')
{
field = field.filter(v => 'a' in v);
if(field.length > 0)
rv = field.map(v => v['a']).join(d);
}
else
{
rv = field;
}
return rv;
}
function getClassSymbolNdc(fields)
{
const field = getValue(fields,'084','2');
const type = instancetype(field);
let rv = '';
switch(type)
{
case 'string':
if(field.match(/^njb/))
rv = getValue(fields,'084','a');
break;
case 'array':
let ar = field.filter(v => v['2'].match(/^njb/) ? true : false);
if(ar.length > 0)
rv = (ar.shift())['a'];
break;
}
return rv;
}
function getYomi(fields,ref,key)
{
let k = key || 'a';
let imp = src => {
if(!src)
return '';
let occu = (src.split('-'))[1];
let field = getValue(fields,'880');
if(!occu || instancetype(field) !== 'array')
return '';
let restr = `^${ref}-${occu}`;
let re = new RegExp(`^${ref}-${occu}`);
let index = field.findIndex(v => v['6'].match(re) ? true : false);
let yomi = '';
let ps = (s,o) => {
if(!s)
return '';
if(s.match(/,$/))
{
s = clearText(s);
if(('d' in o) && o['d'])
s += `,${o['d']}`;
}
else
{
s = clearText(s);
}
return s;
};
if(index >= 0)
{
let obj = field[index];
yomi = obj[k];
if(instancetype(yomi) === 'array')
{
yomi = yomi.map(v => ps(v,obj)).join('');
}
else
{
yomi = ps(yomi,obj);
}
}
return yomi;
};
let fieldvalue = getValue(fields,ref,'6');
let rv = instancetype(fieldvalue) === 'array' ? fieldvalue.map(v => ('6' in v) ? imp(v['6']) : v[k]) : [imp(fieldvalue)];
return rv.map(v => clearText(v)).join('/');
}
function getValueItemArray(fields,id,key,delimitor)
{
let rv = [];
let data = getValue(fields,id,key,delimitor);
if(instancetype(data) === 'array')
{
data.forEach(v => {
if(key in v)
{
if(instancetype(v[key]) === 'array')
rv.push(v[key].map(el => clearText(el)).filter(el => el.length > 0));
else
rv.push(clearText(v[key]));
}
});
}
else if(instancetype(data) === 'string')
{
if(delimitor === undefined)
delimitor = ';';
rv = data.split(delimitor);
}
return rv.flat().join(',');
}
// delimitor: true => shift,false => pop, string => join
function getValueFixArray(fields,id,key,delimitor)
{
let data = getValue(fields,id,key,delimitor);
if(instancetype(data) !== 'array')
return data;
let va = [];
data.forEach(v => {
if(instancetype(v[key]) === 'array')
{
va.push(v[key].map(v => clearText(v)));
va = va.flat();
}
else
{
va.push(clearText(v[key] || ''));
}
});
if(instancetype(delimitor) === 'boolean')
return delimitor ? va.shift() : va.pop();
else
return va.join(delimitor || ' ');
}
// delimitor: true =>
function getValue(fields,id,key,delimitor)
{
let rv = '';
if(delimitor === undefined)
delimitor = ';';
if(id in fields)
{
let field = fields[id];
switch(instancetype(field))
{
case 'array':
return field;
case 'object':
rv = (key in field) ? field[key] : '';
break;
case 'string':
default:
rv = field;
break;
}
rv = instancetype(rv) === 'array' ? rv.map(v => clearText(v)).join(delimitor) : clearText(rv);
}
return rv;
}
function clearText(str)
{
if(instancetype(str) !== 'string')
return str;
let re = new RegExp('[,\.\/;=\+:]+$');
return str.replace(re,'').trim();
}
function strtodate(str)
{
if('string' !== instancetype(str))
throw new Error('Argument type error: first argumnet must be string type');
return new Date(
parseInt(str.slice(0,4)),
parseInt(str.slice(4,2)) - 1,
parseInt(str.slice(6,2)),
parseInt(str.slice(8,2)),
parseInt(str.slice(10,2)),
parseInt(str.slice(12,2))
);
}
function instancetype(obj)
{
return Object.prototype.toString.call(obj).slice(8,-1).toLowerCase();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment