lancejpollard/model.js

## model.js

const { uniq, omit } = require('lodash')
const serialization = require('./serialization')
const knex = require('../../knex')
const toneText = require('../../../../../../drumwork/deck/tone')
const callText = require('../../../../../../drumwork/deck/call')

let objectKeyToIdMap
let objectIdToKeyMap
let languageMap
let writingSystemMap
let propertyNameToIdMap
let propertyIdToNameMap
let encodingTypeToIdMap
let encodingIdToTypeMap
let contentSourceIdMap
let contentSourceKeyMap

let bootstrapped = false

module.exports = {
  getByValue,
}

async function getObjectTypeId(key) {
  await bootstrap()
  return objectKeyToIdMap[key].table_id
}

async function getPropertyNameById(id) {
  await bootstrap()
  return propertyIdToNameMap[id].key
}

async function getPropertyIdByName(name) {
  await bootstrap()
  return propertyNameToIdMap[name].id
}

async function getContentSourceKeyById(id) {
  await bootstrap()
  return contentSourceIdMap[id].key
}

async function getObjectTypeKey(id) {
  await bootstrap()
  return objectIdToKeyMap[id].table_name
}

async function getEncodingTypeKey(id) {
  await bootstrap()
  return encodingIdToTypeMap[id].key
}

async function getByValue(value) {
  if (value.length > 256) {
    return
  }

  if (value.match(/^\d+$/)) {
    return await getInteger(value)
  } else {
    return await getText(value)
  }
}

async function getText(text) {
  const record = await knex('word_transcriptions').where('text', text).first()
  return resolveText(record ?? { value: text })
}

async function resolveText(record) {
  if (record.id) {
    return await resolvePersistentText(record)
  } else {
    return await resolveDynamicText(record)
  }
}

async function resolveDynamicText(record) {
  // find other words that sum to its sum
  const outputWord = {
    transcriptions: [record],
    definitions: [],
    pronunciations: []
  }
}

async function resolvePersistentText(record) {
  const word = await knex('words').select('*').where('id', record.word_id).first()
  const transcriptions = await knex('word_transcriptions').select('*').where('word_id', word.id)
  const transcriptionsMap = transcriptions.reduce((m, x) => {
    m[x.id] = x
    return m
  }, {})
  const pronunciations = await knex('word_pronunciations').select('*').where('word_id', record.word_id)
  const definitions = await knex('word_definitions').select('*').where('word_id', record.word_id)
  const wordPropertyAssignments = await knex('property_assignments')
    .select('*')
    .where('object_type_id', await getObjectTypeId('words'))
    .where('object_id', word.id)
  const transcriptionPropertyAssignments = await knex('property_assignments')
    .select('*')
    .where('object_type_id', await getObjectTypeId('word_transcriptions'))
    .whereIn('object_id', transcriptions.map(x => x.id))
  const propertyQueries = {
    text_properties: [],
    boolean_properties: [],
    integer_properties: [],
    json_properties: [],
  }
  const propertyResults = {}
  const assignments = wordPropertyAssignments.concat(transcriptionPropertyAssignments)

  for (const assign of assignments) {
    const property_table_key = await getObjectTypeKey(assign.property_type_id)
    const queries = propertyQueries[property_table_key]
    queries.push(assign.property_id)
  }

  for (const key in propertyQueries) {
    const propertyTypeId = await getObjectTypeId(key)
    const ids = propertyQueries[key]
    const records = await knex(key).select('*').whereIn('id', ids)
    records.forEach(record => {
      if (key === 'integer_properties') {
        record.value = BigInt(record.value)
      } else if (key === 'boolean_properties') {
        record.value = JSON.parse(record.value)
      }
      propertyResults[`${propertyTypeId}:${record.id}`] = record
    })
  }

  const wordsMap = {
    [word.id]: word
  }

  await assignProperties(transcriptionsMap, transcriptionPropertyAssignments, propertyResults)
  await assignProperties(wordsMap, wordPropertyAssignments, propertyResults)

  const outputTranscriptions = []
  const outputDefinitions = []
  const outputPronunciations = []

  for (const transcription of transcriptions) {
    const outputTranscription = omit(transcription, ['id', 'word_id', 'encoding_type_id'])
    outputTranscription.encoding_type = await getEncodingTypeKey(transcription.encoding_type_id)
    outputTranscriptions.push(outputTranscription)
  }

  for (const definition of definitions) {
    outputDefinitions.push({
      content_source: {
        key: await getContentSourceKeyById(definition.content_source_id)
      },
      text: definition.text
    })
  }

  for (const pronunciation of pronunciations) {
    outputPronunciations.push({
      tone: toneText(pronunciation.text),
      callText: callText(pronunciation.text),
      callAscii: pronunciation.text,
    })
  }

  const baseTranscription = outputTranscriptions[0]

  const sumAssociations = await getWordSumAssociations(baseTranscription)

  // find similar ones to resolve to it.

  const outputWord = {
    type: 'word',
    ...(omit(word, ['id', 'language_id', 'weight'])),
    transcriptions: outputTranscriptions,
    definitions: outputDefinitions,
    pronunciations: outputPronunciations,
    sumAssociations
  }

  return outputWord
}

async function getPropertyIdsMatching(pattern) {
  const ids = []
  for (const name in propertyNameToIdMap) {
    const propertyNameRecord = propertyNameToIdMap[name]
    if (name.match(pattern)) {
      ids.push(propertyNameRecord.id)
    }
  }
  return ids
}

async function getWordSumAssociations(transcription) {
  const integers = []
  for (const name in transcription.gematria) {
    const data = transcription.gematria[name]
    integers.push(data.sum, data.peak)
  }
  const uniqIntegers = uniq(integers)
  const integerPropertyTypeId = await getObjectTypeId('integer_properties')
  const gematriaPropertyNameIds = await getPropertyIdsMatching(/gematria\/.+\/sum/)
  const transcriptionObjectTypeId = await getObjectTypeId('word_transcriptions')
  const words = await knex('property_assignments')
    .select([
      'words.id as id'
    ])
    .innerJoin('integer_properties', 'property_assignments.property_id', 'integer_properties.id')
    .where('property_assignments.property_type_id', integerPropertyTypeId)
    .where('property_assignments.object_type_id', transcriptionObjectTypeId)
    .whereIn('property_assignments.property_name_id', gematriaPropertyNameIds)
    .whereIn('integer_properties.value', uniqIntegers)
    .innerJoin('word_transcriptions', 'property_assignments.object_id', 'word_transcriptions.id')
    .innerJoin('words', 'word_transcriptions.word_id', 'words.id')
    .orderBy('words.weight', 'desc')
    .limit(100)

  const transcriptions = await knex('word_transcriptions')
    .distinct()
    .select([
      'word_transcriptions.*',
    ])
    .whereIn('word_id', words.map(x => x.id))
    .innerJoin('property_assignments', 'word_transcriptions.id', 'property_assignments.object_id')
    // .where('property_assignments.object_type_id', transcriptionObjectTypeId)
    .whereIn('property_assignments.property_name_id', gematriaPropertyNameIds)
    .innerJoin('integer_properties', 'property_assignments.property_id', 'integer_properties.id')
    .whereIn('integer_properties.value', uniqIntegers)

  const transcriptionsMap = transcriptions.reduce((m, x) => {
    m[x.id] = x
    return m
  }, {})

  const transcriptionPropertyAssignments = await knex('property_assignments')
    .select('*')
    .where('object_type_id', transcriptionObjectTypeId)
    .whereIn('object_id', transcriptions.map(x => x.id))
    .whereIn('property_assignments.property_name_id', gematriaPropertyNameIds)
    .innerJoin('integer_properties', 'property_assignments.property_id', 'integer_properties.id')
    .whereIn('integer_properties.value', uniqIntegers)

  const propertyQueries = {
    text_properties: [],
    boolean_properties: [],
    integer_properties: [],
    json_properties: [],
  }
  const propertyResults = {}

  for (const assign of transcriptionPropertyAssignments) {
    const property_table_key = await getObjectTypeKey(assign.property_type_id)
    const queries = propertyQueries[property_table_key]
    queries.push(assign.property_id)
  }

  for (const key in propertyQueries) {
    const propertyTypeId = await getObjectTypeId(key)
    const ids = propertyQueries[key]
    const records = await knex(key).select('*').whereIn('id', ids)
    records.forEach(record => {
      if (key === 'integer_properties') {
        record.value = BigInt(record.value)
      } else if (key === 'boolean_properties') {
        record.value = JSON.parse(record.value)
      }
      propertyResults[`${propertyTypeId}:${record.id}`] = record
    })
  }

  await assignProperties(transcriptionsMap, transcriptionPropertyAssignments, propertyResults)

  const transcriptionsByWordId = transcriptions.reduce((m, x) => {
    const array = m[x.word_id] = m[x.word_id] ?? []
    array.push(x)
    return m
  }, {})

  const outputWords = []

  for (const word of words) {
    const outputTranscriptions = transcriptionsByWordId[word.id].map(x => omit(x, ['id', 'word_id', 'encoding_type_id']))
    outputWords.push({
      type: 'word',
      ...(omit(word, ['id', 'language_id', 'weight'])),
      transcriptions: outputTranscriptions,
      definitions: [],
      pronunciations: []
    })
  }

  return outputWords
}

async function assignProperties(map, assignments, results) {
  for (const assign of assignments) {
    const propertyName = await getPropertyNameById(assign.property_name_id)
    const parts = propertyName.split('/')
    let node = map[assign.object_id]
    let i = 0
    while (i < parts.length - 1) {
      node = node[parts[i]] = node[parts[i]] ?? {}
      i++
    }
    node[parts[i]] = results[`${assign.property_type_id}:${assign.property_id}`].value
  }
}

async function getInteger(value) {
  const bigint = BigInt(value)
  const int = Number(bigint)
  if (String(int) === bigint.toString()) {
    return await getSmallInteger(int, value)
  } else {
    return await getBigInteger(bigint, value)
  }
}

async function getBigInteger(int, value) {
  const record = await knex('big_integers').where('value', value).first()
  return resolveBigInteger(record ?? { value: int, string: value })
}

async function getSmallInteger(int, value) {
  const record = await knex('integers').where('value', int).first()
  return resolveSmallInteger(record ?? { value: int, string: value })
}

async function getList(offset = 0, pageSize = 100) {

}

async function bootstrap() {
  if (bootstrapped) {
    return
  }

  if (!objectKeyToIdMap) {
    const records = await knex('object_types').select('*')
    objectKeyToIdMap = records.reduce((m, x) => {
      m[x.table_name] = x
      return m
    }, {})
    objectIdToKeyMap = records.reduce((m, x) => {
      m[x.table_id] = x
      return m
    }, {})
  }

  if (!languageMap) {
    languageMap = (await knex('languages').select('*')).reduce((m, x) => {
      m[x.iso2] = x
      return m
    }, {})
  }

  if (!writingSystemMap) {
    writingSystemMap = (await knex('writing_systems').select('*')).reduce((m, x) => {
      m[x.code] = x
      return m
    }, {})
  }

  if (!propertyIdToNameMap) {
    const records = await knex('property_names').select('*')
    propertyIdToNameMap = records.reduce((m, x) => {
      m[x.id] = x
      return m
    }, {})
    propertyNameToIdMap = records.reduce((m, x) => {
      m[x.key] = x
      return m
    }, {})
  }

  if (!encodingTypeToIdMap) {
    const records = await knex('encoding_types').select('*')
    encodingTypeToIdMap = records.reduce((m, x) => {
      m[x.key] = x
      return m
    }, {})
    encodingIdToTypeMap = records.reduce((m, x) => {
      m[x.id] = x
      return m
    }, {})
  }

  if (!contentSourceIdMap) {
    const records = await knex('content_sources').select('*')
    contentSourceKeyMap = records.reduce((m, x) => {
      m[x.key] = x
      return m
    }, {})
    contentSourceIdMap = records.reduce((m, x) => {
      m[x.id] = x
      return m
    }, {})
  }

  bootstrapped = true
}

	const { uniq, omit } = require('lodash')
	const serialization = require('./serialization')
	const knex = require('../../knex')
	const toneText = require('../../../../../../drumwork/deck/tone')
	const callText = require('../../../../../../drumwork/deck/call')

	let objectKeyToIdMap
	let objectIdToKeyMap
	let languageMap
	let writingSystemMap
	let propertyNameToIdMap
	let propertyIdToNameMap
	let encodingTypeToIdMap
	let encodingIdToTypeMap
	let contentSourceIdMap
	let contentSourceKeyMap

	let bootstrapped = false

	module.exports = {
	getByValue,
	}

	async function getObjectTypeId(key) {
	await bootstrap()
	return objectKeyToIdMap[key].table_id
	}

	async function getPropertyNameById(id) {
	await bootstrap()
	return propertyIdToNameMap[id].key
	}

	async function getPropertyIdByName(name) {
	await bootstrap()
	return propertyNameToIdMap[name].id
	}

	async function getContentSourceKeyById(id) {
	await bootstrap()
	return contentSourceIdMap[id].key
	}

	async function getObjectTypeKey(id) {
	await bootstrap()
	return objectIdToKeyMap[id].table_name
	}

	async function getEncodingTypeKey(id) {
	await bootstrap()
	return encodingIdToTypeMap[id].key
	}

	async function getByValue(value) {
	if (value.length > 256) {
	return
	}

	if (value.match(/^\d+$/)) {
	return await getInteger(value)
	} else {
	return await getText(value)
	}
	}

	async function getText(text) {
	const record = await knex('word_transcriptions').where('text', text).first()
	return resolveText(record ?? { value: text })
	}

	async function resolveText(record) {
	if (record.id) {
	return await resolvePersistentText(record)
	} else {
	return await resolveDynamicText(record)
	}
	}

	async function resolveDynamicText(record) {
	// find other words that sum to its sum
	const outputWord = {
	transcriptions: [record],
	definitions: [],
	pronunciations: []
	}
	}

	async function resolvePersistentText(record) {
	const word = await knex('words').select('*').where('id', record.word_id).first()
	const transcriptions = await knex('word_transcriptions').select('*').where('word_id', word.id)
	const transcriptionsMap = transcriptions.reduce((m, x) => {
	m[x.id] = x
	return m
	}, {})
	const pronunciations = await knex('word_pronunciations').select('*').where('word_id', record.word_id)
	const definitions = await knex('word_definitions').select('*').where('word_id', record.word_id)
	const wordPropertyAssignments = await knex('property_assignments')
	.select('*')
	.where('object_type_id', await getObjectTypeId('words'))
	.where('object_id', word.id)
	const transcriptionPropertyAssignments = await knex('property_assignments')
	.select('*')
	.where('object_type_id', await getObjectTypeId('word_transcriptions'))
	.whereIn('object_id', transcriptions.map(x => x.id))
	const propertyQueries = {
	text_properties: [],
	boolean_properties: [],
	integer_properties: [],
	json_properties: [],
	}
	const propertyResults = {}
	const assignments = wordPropertyAssignments.concat(transcriptionPropertyAssignments)

	for (const assign of assignments) {
	const property_table_key = await getObjectTypeKey(assign.property_type_id)
	const queries = propertyQueries[property_table_key]
	queries.push(assign.property_id)
	}

	for (const key in propertyQueries) {
	const propertyTypeId = await getObjectTypeId(key)
	const ids = propertyQueries[key]
	const records = await knex(key).select('*').whereIn('id', ids)
	records.forEach(record => {
	if (key === 'integer_properties') {
	record.value = BigInt(record.value)
	} else if (key === 'boolean_properties') {
	record.value = JSON.parse(record.value)
	}
	propertyResults[`${propertyTypeId}:${record.id}`] = record
	})
	}

	const wordsMap = {
	[word.id]: word
	}

	await assignProperties(transcriptionsMap, transcriptionPropertyAssignments, propertyResults)
	await assignProperties(wordsMap, wordPropertyAssignments, propertyResults)

	const outputTranscriptions = []
	const outputDefinitions = []
	const outputPronunciations = []

	for (const transcription of transcriptions) {
	const outputTranscription = omit(transcription, ['id', 'word_id', 'encoding_type_id'])
	outputTranscription.encoding_type = await getEncodingTypeKey(transcription.encoding_type_id)
	outputTranscriptions.push(outputTranscription)
	}

	for (const definition of definitions) {
	outputDefinitions.push({
	content_source: {
	key: await getContentSourceKeyById(definition.content_source_id)
	},
	text: definition.text
	})
	}

	for (const pronunciation of pronunciations) {
	outputPronunciations.push({
	tone: toneText(pronunciation.text),
	callText: callText(pronunciation.text),
	callAscii: pronunciation.text,
	})
	}

	const baseTranscription = outputTranscriptions[0]

	const sumAssociations = await getWordSumAssociations(baseTranscription)

	// find similar ones to resolve to it.

	const outputWord = {
	type: 'word',
	...(omit(word, ['id', 'language_id', 'weight'])),
	transcriptions: outputTranscriptions,
	definitions: outputDefinitions,
	pronunciations: outputPronunciations,
	sumAssociations
	}

	return outputWord
	}

	async function getPropertyIdsMatching(pattern) {
	const ids = []
	for (const name in propertyNameToIdMap) {
	const propertyNameRecord = propertyNameToIdMap[name]
	if (name.match(pattern)) {
	ids.push(propertyNameRecord.id)
	}
	}
	return ids
	}

	async function getWordSumAssociations(transcription) {
	const integers = []
	for (const name in transcription.gematria) {
	const data = transcription.gematria[name]
	integers.push(data.sum, data.peak)
	}
	const uniqIntegers = uniq(integers)
	const integerPropertyTypeId = await getObjectTypeId('integer_properties')
	const gematriaPropertyNameIds = await getPropertyIdsMatching(/gematria\/.+\/sum/)
	const transcriptionObjectTypeId = await getObjectTypeId('word_transcriptions')
	const words = await knex('property_assignments')
	.select([
	'words.id as id'
	])
	.innerJoin('integer_properties', 'property_assignments.property_id', 'integer_properties.id')
	.where('property_assignments.property_type_id', integerPropertyTypeId)
	.where('property_assignments.object_type_id', transcriptionObjectTypeId)
	.whereIn('property_assignments.property_name_id', gematriaPropertyNameIds)
	.whereIn('integer_properties.value', uniqIntegers)
	.innerJoin('word_transcriptions', 'property_assignments.object_id', 'word_transcriptions.id')
	.innerJoin('words', 'word_transcriptions.word_id', 'words.id')
	.orderBy('words.weight', 'desc')
	.limit(100)

	const transcriptions = await knex('word_transcriptions')
	.distinct()
	.select([
	'word_transcriptions.*',
	])
	.whereIn('word_id', words.map(x => x.id))
	.innerJoin('property_assignments', 'word_transcriptions.id', 'property_assignments.object_id')
	// .where('property_assignments.object_type_id', transcriptionObjectTypeId)
	.whereIn('property_assignments.property_name_id', gematriaPropertyNameIds)
	.innerJoin('integer_properties', 'property_assignments.property_id', 'integer_properties.id')
	.whereIn('integer_properties.value', uniqIntegers)

	const transcriptionsMap = transcriptions.reduce((m, x) => {
	m[x.id] = x
	return m
	}, {})

	const transcriptionPropertyAssignments = await knex('property_assignments')
	.select('*')
	.where('object_type_id', transcriptionObjectTypeId)
	.whereIn('object_id', transcriptions.map(x => x.id))
	.whereIn('property_assignments.property_name_id', gematriaPropertyNameIds)
	.innerJoin('integer_properties', 'property_assignments.property_id', 'integer_properties.id')
	.whereIn('integer_properties.value', uniqIntegers)

	const propertyQueries = {
	text_properties: [],
	boolean_properties: [],
	integer_properties: [],
	json_properties: [],
	}
	const propertyResults = {}

	for (const assign of transcriptionPropertyAssignments) {
	const property_table_key = await getObjectTypeKey(assign.property_type_id)
	const queries = propertyQueries[property_table_key]
	queries.push(assign.property_id)
	}

	for (const key in propertyQueries) {
	const propertyTypeId = await getObjectTypeId(key)
	const ids = propertyQueries[key]
	const records = await knex(key).select('*').whereIn('id', ids)
	records.forEach(record => {
	if (key === 'integer_properties') {
	record.value = BigInt(record.value)
	} else if (key === 'boolean_properties') {
	record.value = JSON.parse(record.value)
	}
	propertyResults[`${propertyTypeId}:${record.id}`] = record
	})
	}

	await assignProperties(transcriptionsMap, transcriptionPropertyAssignments, propertyResults)

	const transcriptionsByWordId = transcriptions.reduce((m, x) => {
	const array = m[x.word_id] = m[x.word_id] ?? []
	array.push(x)
	return m
	}, {})

	const outputWords = []

	for (const word of words) {
	const outputTranscriptions = transcriptionsByWordId[word.id].map(x => omit(x, ['id', 'word_id', 'encoding_type_id']))
	outputWords.push({
	type: 'word',
	...(omit(word, ['id', 'language_id', 'weight'])),
	transcriptions: outputTranscriptions,
	definitions: [],
	pronunciations: []
	})
	}

	return outputWords
	}

	async function assignProperties(map, assignments, results) {
	for (const assign of assignments) {
	const propertyName = await getPropertyNameById(assign.property_name_id)
	const parts = propertyName.split('/')
	let node = map[assign.object_id]
	let i = 0
	while (i < parts.length - 1) {
	node = node[parts[i]] = node[parts[i]] ?? {}
	i++
	}
	node[parts[i]] = results[`${assign.property_type_id}:${assign.property_id}`].value
	}
	}

	async function getInteger(value) {
	const bigint = BigInt(value)
	const int = Number(bigint)
	if (String(int) === bigint.toString()) {
	return await getSmallInteger(int, value)
	} else {
	return await getBigInteger(bigint, value)
	}
	}

	async function getBigInteger(int, value) {
	const record = await knex('big_integers').where('value', value).first()
	return resolveBigInteger(record ?? { value: int, string: value })
	}

	async function getSmallInteger(int, value) {
	const record = await knex('integers').where('value', int).first()
	return resolveSmallInteger(record ?? { value: int, string: value })
	}

	async function getList(offset = 0, pageSize = 100) {

	}

	async function bootstrap() {
	if (bootstrapped) {
	return
	}

	if (!objectKeyToIdMap) {
	const records = await knex('object_types').select('*')
	objectKeyToIdMap = records.reduce((m, x) => {
	m[x.table_name] = x
	return m
	}, {})
	objectIdToKeyMap = records.reduce((m, x) => {
	m[x.table_id] = x
	return m
	}, {})
	}

	if (!languageMap) {
	languageMap = (await knex('languages').select('*')).reduce((m, x) => {
	m[x.iso2] = x
	return m
	}, {})
	}

	if (!writingSystemMap) {
	writingSystemMap = (await knex('writing_systems').select('*')).reduce((m, x) => {
	m[x.code] = x
	return m
	}, {})
	}

	if (!propertyIdToNameMap) {
	const records = await knex('property_names').select('*')
	propertyIdToNameMap = records.reduce((m, x) => {
	m[x.id] = x
	return m
	}, {})
	propertyNameToIdMap = records.reduce((m, x) => {
	m[x.key] = x
	return m
	}, {})
	}

	if (!encodingTypeToIdMap) {
	const records = await knex('encoding_types').select('*')
	encodingTypeToIdMap = records.reduce((m, x) => {
	m[x.key] = x
	return m
	}, {})
	encodingIdToTypeMap = records.reduce((m, x) => {
	m[x.id] = x
	return m
	}, {})
	}

	if (!contentSourceIdMap) {
	const records = await knex('content_sources').select('*')
	contentSourceKeyMap = records.reduce((m, x) => {
	m[x.key] = x
	return m
	}, {})
	contentSourceIdMap = records.reduce((m, x) => {
	m[x.id] = x
	return m
	}, {})
	}

	bootstrapped = true
	}