qgustavor/fix-subtitles.js

## fix-subtitles.js
import { parseFlags } from 'https://deno.land/x/cliffy@v0.25.7/flags/mod.ts'
import parse from 'https://cdn.skypack.dev/pin/@qgustavor/ass-parser@v0.2.2-3MSleKF9Tb13M9TVjKIV/mode=imports/optimized/@qgustavor/ass-parser.js'
import stringify from 'https://cdn.skypack.dev/pin/@qgustavor/ass-stringify@v0.1.8-dniPlKxAanwMSbtHFyUD/mode=imports/optimized/@qgustavor/ass-stringify.js'
import JSON5 from 'https://deno.land/x/json5@v1.0.0/mod.ts'

const defaultConfig = {
  targetDir: '..',
  filenameReplacement: null,
  handleLineBreaks: false,
  shiftTimes: false,
  fontScale: 1,
  replacements: []
}
const storedConfig = JSON5.parse(await Deno.readTextFile('config.json').catch(() => '{}'))
const { flags } = parseFlags(Deno.args, {
  flags: [{
    name: 'targetDir',
    aliases: ['target-dir'],
  }, {
    name: 'animeId',
    aliases: ['anime-id'],
    type: 'number'
  }, {
    name: 'handleLineBreaks',
    aliases: ['handle-line-breaks'],
    type: 'boolean'
  }, {
    name: 'shiftTimes',
    aliases: ['shift-times'],
    type: 'number'
  }, {
    name: 'fontScale',
    aliases: ['font-scale'],
    type: 'number',
    default: 1
  }]
})
const config = {
  ...defaultConfig,
  ...storedConfig,
  ...flags
}

let characterData

try {
  characterData = JSON5.parse(await Deno.readTextFile('characters.json'))
} catch (e) {
  if (e.code !== 'ENOENT') throw e

  if (!config.animeId) config.animeId = Number(prompt('Enter MyAnimeList Anime ID:'))
  if (!config.animeId) throw Error('Invalid or missing anime ID')

  const response = await fetch(`https://api.jikan.moe/v4/anime/${config.animeId}/characters`)
  characterData = await response.json()
  await Deno.writeTextFile('characters.json', JSON.stringify(characterData))
}

const names = characterData.data
  .map(e => e.character.name.replace(', ', ' '))
  .filter(e => !e.match(/\(.*\)/))
const isolatedNames = Array.from(new Set(names.join(' ').split(' ')))
const replacementMap = isolatedNames.map(name => {
  const replacement = name
    .replace(/ou/g, 'o')
    .replace(/([aiueo])\1/g, '$1')
  if (replacement === name) return null
  const regex = new RegExp('(^|\\b|\\\\[Nnh])(' + replacement + ')(\\b|$)', 'gi')
  return [regex, name, replacement]
}).filter(e => e)
const matcher = new RegExp(replacementMap.map(e => e[2]).join('|'))

const japaneseRegex = /^([kstnhmyrgzpbp]?[aiueo]|([sc]h|[knhmrgbp]y)[auo]|[sc]hi|tsu|[fz]u|wa|n|ji|d[aeo])+$/i
const invertedNames = names.map(e => {
  const parts = e.split(' ')
  if (parts.length !== 2 || !parts[0].match(japaneseRegex) || !parts[1].match(japaneseRegex)) {
    return e
  }
  return parts.reverse().join(' ')
})
const invertedMatcher = new RegExp(invertedNames.join('|'), 'g')

for await (const file of Deno.readDir('.')) {
  if (!file.name.endsWith('.ass')) continue
  const data = await Deno.readTextFile(file.name)
  const parsed = parse(data, { comments: true })
  const events = parsed.find(e => e.section === 'Events').body

  for (const event of events) {
    if (event.key !== 'Dialogue') continue

    let text = event.value.Text
    if (text.match(matcher)) {
      for (const [replacement, name] of replacementMap) {
        text = text.replaceAll(replacement, (all, prefix, oldWord) => {
          const allCaps = !oldWord.match(/[a-z]/)
          return prefix + (allCaps ? name.toUpperCase() : name)
        })
      }
    }

    text = text.replaceAll(invertedMatcher, e => {
      const index = invertedNames.indexOf(e)
      if (index === -1) {
        console.log('Inverted name matching error with', e)
        return e
      }
      return names[index]
    })

    if (config.handleLineBreaks && !text.match(/\{.*\\(pos|move|clip).*\}/)) {
      text = text
        // Removes line breaks before single words
        .replace(/\s*\\N\s*(\S+\s*$)/m, ' $1')
        // Moves line breaks close to punctuation
        .replace(/([,.?!]|-(?!\w))([^,.?!-]{1,4})\s*\\N\s*/m, '$1\\N$2 ')
        // Removes repeated spaces and trim
        .replace(/ +/g, ' ').trim()

      if (text.replaceAll(/\{.*?\}/g, '').length < 45) {
        text = text
          // Removes line breaks from short lines
          .replace(/([^?!.]*)\\N(.*?)$/, '$1 $2')
          // Removes repeated spaces
          .replace(/ +/g, ' ')
      }
    }

    // Handle replacements
    for (const [searchStr, replacement] of config.replacements) {
      const search = parseRegex(searchStr)
      text = text.replaceAll(search, replacement)
    }

    event.value.Text = text

    // Shift timings
    if (config.shiftTimes) {
      event.value.Start = handleShift(event.value.Start, config.shiftTimes)
      event.value.End = handleShift(event.value.End, config.shiftTimes)
    }
  }

  // Handle font sizes
  if (config.fontScale !== 1) {
    parsed.find(e => e.section.includes('Styles')).body.forEach(style => {
      if (style.key !== 'Style') return
      style.value.Fontsize = Math.round(style.value.Fontsize * config.fontScale)
    })
  }

  const filename = config.filenameReplacement
    ? file.name.replaceAll(parseRegex(config.filenameReplacement[0]), config.filenameReplacement[1])
    : file.name
  const targetDir = new URL(config.targetDir, import.meta.url)
  const targetPath = new URL(filename, targetDir.href)
  await Deno.writeTextFile(targetPath, stringify(parsed))
}

function handleShift (time, delta) {
  return new Date(1000 * Math.max(0, time.split(':').reduce((sum, e) => sum * 60 + Number(e), 0) + delta)).toISOString().slice(12, 22)
}

// Edited from regex-parser@2.2.11
function parseRegex (input) {
  // Validate input
  if (typeof input !== 'string') {
    throw new Error('Invalid input. Input must be a string')
  }

  // Parse input
  const m = input.match(/(\/?)(.+)\1([a-z]*)/i)

  // Require valid flags
  if (!m[3] || !/^(?!.*?(.).*?\1)[gmixXsuUAJ]+$/.test(m[3])) {
    return input
  }

  // Create the regular expression
  return new RegExp(m[2], m[3])
}

/*
Example config.json:
{
  replacements: [
    // NO sempai
    ['/SEMPAI/g', 'SENPAI'],
    ['/([Ss])empai/g', '$1enpai'],
    // Shin'ichi
    ['Shinichi', "Shin'ichi"],
    // Missing spaces
    ['/(\\.\\.\\.|…|[.?!])([A-Za-z0-9])/g', '$1 $2'],
    // Remove honorifics (mostly are wrong, add the correct ones using honorifics-fixer)
    ['/-(senpai|sama|kun|chan|sensei)(\\b|$)/g', '']
  ]
}
*/
	import { parseFlags } from 'https://deno.land/x/cliffy@v0.25.7/flags/mod.ts'
	import parse from 'https://cdn.skypack.dev/pin/@qgustavor/ass-parser@v0.2.2-3MSleKF9Tb13M9TVjKIV/mode=imports/optimized/@qgustavor/ass-parser.js'
	import stringify from 'https://cdn.skypack.dev/pin/@qgustavor/ass-stringify@v0.1.8-dniPlKxAanwMSbtHFyUD/mode=imports/optimized/@qgustavor/ass-stringify.js'
	import JSON5 from 'https://deno.land/x/json5@v1.0.0/mod.ts'

	const defaultConfig = {
	targetDir: '..',
	filenameReplacement: null,
	handleLineBreaks: false,
	shiftTimes: false,
	fontScale: 1,
	replacements: []
	}
	const storedConfig = JSON5.parse(await Deno.readTextFile('config.json').catch(() => '{}'))
	const { flags } = parseFlags(Deno.args, {
	flags: [{
	name: 'targetDir',
	aliases: ['target-dir'],
	}, {
	name: 'animeId',
	aliases: ['anime-id'],
	type: 'number'
	}, {
	name: 'handleLineBreaks',
	aliases: ['handle-line-breaks'],
	type: 'boolean'
	}, {
	name: 'shiftTimes',
	aliases: ['shift-times'],
	type: 'number'
	}, {
	name: 'fontScale',
	aliases: ['font-scale'],
	type: 'number',
	default: 1
	}]
	})
	const config = {
	...defaultConfig,
	...storedConfig,
	...flags
	}

	let characterData

	try {
	characterData = JSON5.parse(await Deno.readTextFile('characters.json'))
	} catch (e) {
	if (e.code !== 'ENOENT') throw e

	if (!config.animeId) config.animeId = Number(prompt('Enter MyAnimeList Anime ID:'))
	if (!config.animeId) throw Error('Invalid or missing anime ID')

	const response = await fetch(`https://api.jikan.moe/v4/anime/${config.animeId}/characters`)
	characterData = await response.json()
	await Deno.writeTextFile('characters.json', JSON.stringify(characterData))
	}

	const names = characterData.data
	.map(e => e.character.name.replace(', ', ' '))
	.filter(e => !e.match(/\(.*\)/))
	const isolatedNames = Array.from(new Set(names.join(' ').split(' ')))
	const replacementMap = isolatedNames.map(name => {
	const replacement = name
	.replace(/ou/g, 'o')
	.replace(/([aiueo])\1/g, '$1')
	if (replacement === name) return null
	const regex = new RegExp('(^\|\\b\|\\\\[Nnh])(' + replacement + ')(\\b\|$)', 'gi')
	return [regex, name, replacement]
	}).filter(e => e)
	const matcher = new RegExp(replacementMap.map(e => e[2]).join('\|'))

	const japaneseRegex = /^([kstnhmyrgzpbp]?[aiueo]\|([sc]h\|[knhmrgbp]y)[auo]\|[sc]hi\|tsu\|[fz]u\|wa\|n\|ji\|d[aeo])+$/i
	const invertedNames = names.map(e => {
	const parts = e.split(' ')
	if (parts.length !== 2 \|\| !parts[0].match(japaneseRegex) \|\| !parts[1].match(japaneseRegex)) {
	return e
	}
	return parts.reverse().join(' ')
	})
	const invertedMatcher = new RegExp(invertedNames.join('\|'), 'g')

	for await (const file of Deno.readDir('.')) {
	if (!file.name.endsWith('.ass')) continue
	const data = await Deno.readTextFile(file.name)
	const parsed = parse(data, { comments: true })
	const events = parsed.find(e => e.section === 'Events').body

	for (const event of events) {
	if (event.key !== 'Dialogue') continue

	let text = event.value.Text
	if (text.match(matcher)) {
	for (const [replacement, name] of replacementMap) {
	text = text.replaceAll(replacement, (all, prefix, oldWord) => {
	const allCaps = !oldWord.match(/[a-z]/)
	return prefix + (allCaps ? name.toUpperCase() : name)
	})
	}
	}

	text = text.replaceAll(invertedMatcher, e => {
	const index = invertedNames.indexOf(e)
	if (index === -1) {
	console.log('Inverted name matching error with', e)
	return e
	}
	return names[index]
	})

	if (config.handleLineBreaks && !text.match(/\{.\\(pos\|move\|clip).\}/)) {
	text = text
	// Removes line breaks before single words
	.replace(/\s\\N\s(\S+\s*$)/m, ' $1')
	// Moves line breaks close to punctuation
	.replace(/([,.?!]\|-(?!\w))([^,.?!-]{1,4})\s\\N\s/m, '$1\\N$2 ')
	// Removes repeated spaces and trim
	.replace(/ +/g, ' ').trim()

	if (text.replaceAll(/\{.*?\}/g, '').length < 45) {
	text = text
	// Removes line breaks from short lines
	.replace(/([^?!.])\\N(.?)$/, '$1 $2')
	// Removes repeated spaces
	.replace(/ +/g, ' ')
	}
	}

	// Handle replacements
	for (const [searchStr, replacement] of config.replacements) {
	const search = parseRegex(searchStr)
	text = text.replaceAll(search, replacement)
	}

	event.value.Text = text

	// Shift timings
	if (config.shiftTimes) {
	event.value.Start = handleShift(event.value.Start, config.shiftTimes)
	event.value.End = handleShift(event.value.End, config.shiftTimes)
	}
	}

	// Handle font sizes
	if (config.fontScale !== 1) {
	parsed.find(e => e.section.includes('Styles')).body.forEach(style => {
	if (style.key !== 'Style') return
	style.value.Fontsize = Math.round(style.value.Fontsize * config.fontScale)
	})
	}

	const filename = config.filenameReplacement
	? file.name.replaceAll(parseRegex(config.filenameReplacement[0]), config.filenameReplacement[1])
	: file.name
	const targetDir = new URL(config.targetDir, import.meta.url)
	const targetPath = new URL(filename, targetDir.href)
	await Deno.writeTextFile(targetPath, stringify(parsed))
	}

	function handleShift (time, delta) {
	return new Date(1000 * Math.max(0, time.split(':').reduce((sum, e) => sum * 60 + Number(e), 0) + delta)).toISOString().slice(12, 22)
	}

	// Edited from regex-parser@2.2.11
	function parseRegex (input) {
	// Validate input
	if (typeof input !== 'string') {
	throw new Error('Invalid input. Input must be a string')
	}

	// Parse input
	const m = input.match(/(\/?)(.+)\1([a-z]*)/i)

	// Require valid flags
	if (!m[3] \|\| !/^(?!.?(.).?\1)[gmixXsuUAJ]+$/.test(m[3])) {
	return input
	}

	// Create the regular expression
	return new RegExp(m[2], m[3])
	}

	/*
	Example config.json:
	{
	replacements: [
	// NO sempai
	['/SEMPAI/g', 'SENPAI'],
	['/([Ss])empai/g', '$1enpai'],
	// Shin'ichi
	['Shinichi', "Shin'ichi"],
	// Missing spaces
	['/(\\.\\.\\.\|…\|[.?!])([A-Za-z0-9])/g', '$1 $2'],
	// Remove honorifics (mostly are wrong, add the correct ones using honorifics-fixer)
	['/-(senpai\|sama\|kun\|chan\|sensei)(\\b\|$)/g', '']
	]
	}
	*/