Skip to content

Instantly share code, notes, and snippets.

@qgustavor
Created February 1, 2023 00:44
Show Gist options
  • Save qgustavor/3c9e14fdb80f14a5267f5a0477f01933 to your computer and use it in GitHub Desktop.
Save qgustavor/3c9e14fdb80f14a5267f5a0477f01933 to your computer and use it in GitHub Desktop.
A customizable script that fixes common issues in anime subtitles such as wrong romanization, bad line breaking, basic time shifts and font scaling
import { parseFlags } from 'https://deno.land/x/cliffy@v0.25.7/flags/mod.ts'
import parse from 'https://cdn.skypack.dev/pin/@qgustavor/ass-parser@v0.2.2-3MSleKF9Tb13M9TVjKIV/mode=imports/optimized/@qgustavor/ass-parser.js'
import stringify from 'https://cdn.skypack.dev/pin/@qgustavor/ass-stringify@v0.1.8-dniPlKxAanwMSbtHFyUD/mode=imports/optimized/@qgustavor/ass-stringify.js'
import JSON5 from 'https://deno.land/x/json5@v1.0.0/mod.ts'
const defaultConfig = {
targetDir: '..',
filenameReplacement: null,
handleLineBreaks: false,
shiftTimes: false,
fontScale: 1,
replacements: []
}
const storedConfig = JSON5.parse(await Deno.readTextFile('config.json').catch(() => '{}'))
const { flags } = parseFlags(Deno.args, {
flags: [{
name: 'targetDir',
aliases: ['target-dir'],
}, {
name: 'animeId',
aliases: ['anime-id'],
type: 'number'
}, {
name: 'handleLineBreaks',
aliases: ['handle-line-breaks'],
type: 'boolean'
}, {
name: 'shiftTimes',
aliases: ['shift-times'],
type: 'number'
}, {
name: 'fontScale',
aliases: ['font-scale'],
type: 'number',
default: 1
}]
})
const config = {
...defaultConfig,
...storedConfig,
...flags
}
let characterData
try {
characterData = JSON5.parse(await Deno.readTextFile('characters.json'))
} catch (e) {
if (e.code !== 'ENOENT') throw e
if (!config.animeId) config.animeId = Number(prompt('Enter MyAnimeList Anime ID:'))
if (!config.animeId) throw Error('Invalid or missing anime ID')
const response = await fetch(`https://api.jikan.moe/v4/anime/${config.animeId}/characters`)
characterData = await response.json()
await Deno.writeTextFile('characters.json', JSON.stringify(characterData))
}
const names = characterData.data
.map(e => e.character.name.replace(', ', ' '))
.filter(e => !e.match(/\(.*\)/))
const isolatedNames = Array.from(new Set(names.join(' ').split(' ')))
const replacementMap = isolatedNames.map(name => {
const replacement = name
.replace(/ou/g, 'o')
.replace(/([aiueo])\1/g, '$1')
if (replacement === name) return null
const regex = new RegExp('(^|\\b|\\\\[Nnh])(' + replacement + ')(\\b|$)', 'gi')
return [regex, name, replacement]
}).filter(e => e)
const matcher = new RegExp(replacementMap.map(e => e[2]).join('|'))
const japaneseRegex = /^([kstnhmyrgzpbp]?[aiueo]|([sc]h|[knhmrgbp]y)[auo]|[sc]hi|tsu|[fz]u|wa|n|ji|d[aeo])+$/i
const invertedNames = names.map(e => {
const parts = e.split(' ')
if (parts.length !== 2 || !parts[0].match(japaneseRegex) || !parts[1].match(japaneseRegex)) {
return e
}
return parts.reverse().join(' ')
})
const invertedMatcher = new RegExp(invertedNames.join('|'), 'g')
for await (const file of Deno.readDir('.')) {
if (!file.name.endsWith('.ass')) continue
const data = await Deno.readTextFile(file.name)
const parsed = parse(data, { comments: true })
const events = parsed.find(e => e.section === 'Events').body
for (const event of events) {
if (event.key !== 'Dialogue') continue
let text = event.value.Text
if (text.match(matcher)) {
for (const [replacement, name] of replacementMap) {
text = text.replaceAll(replacement, (all, prefix, oldWord) => {
const allCaps = !oldWord.match(/[a-z]/)
return prefix + (allCaps ? name.toUpperCase() : name)
})
}
}
text = text.replaceAll(invertedMatcher, e => {
const index = invertedNames.indexOf(e)
if (index === -1) {
console.log('Inverted name matching error with', e)
return e
}
return names[index]
})
if (config.handleLineBreaks && !text.match(/\{.*\\(pos|move|clip).*\}/)) {
text = text
// Removes line breaks before single words
.replace(/\s*\\N\s*(\S+\s*$)/m, ' $1')
// Moves line breaks close to punctuation
.replace(/([,.?!]|-(?!\w))([^,.?!-]{1,4})\s*\\N\s*/m, '$1\\N$2 ')
// Removes repeated spaces and trim
.replace(/ +/g, ' ').trim()
if (text.replaceAll(/\{.*?\}/g, '').length < 45) {
text = text
// Removes line breaks from short lines
.replace(/([^?!.]*)\\N(.*?)$/, '$1 $2')
// Removes repeated spaces
.replace(/ +/g, ' ')
}
}
// Handle replacements
for (const [searchStr, replacement] of config.replacements) {
const search = parseRegex(searchStr)
text = text.replaceAll(search, replacement)
}
event.value.Text = text
// Shift timings
if (config.shiftTimes) {
event.value.Start = handleShift(event.value.Start, config.shiftTimes)
event.value.End = handleShift(event.value.End, config.shiftTimes)
}
}
// Handle font sizes
if (config.fontScale !== 1) {
parsed.find(e => e.section.includes('Styles')).body.forEach(style => {
if (style.key !== 'Style') return
style.value.Fontsize = Math.round(style.value.Fontsize * config.fontScale)
})
}
const filename = config.filenameReplacement
? file.name.replaceAll(parseRegex(config.filenameReplacement[0]), config.filenameReplacement[1])
: file.name
const targetDir = new URL(config.targetDir, import.meta.url)
const targetPath = new URL(filename, targetDir.href)
await Deno.writeTextFile(targetPath, stringify(parsed))
}
function handleShift (time, delta) {
return new Date(1000 * Math.max(0, time.split(':').reduce((sum, e) => sum * 60 + Number(e), 0) + delta)).toISOString().slice(12, 22)
}
// Edited from regex-parser@2.2.11
function parseRegex (input) {
// Validate input
if (typeof input !== 'string') {
throw new Error('Invalid input. Input must be a string')
}
// Parse input
const m = input.match(/(\/?)(.+)\1([a-z]*)/i)
// Require valid flags
if (!m[3] || !/^(?!.*?(.).*?\1)[gmixXsuUAJ]+$/.test(m[3])) {
return input
}
// Create the regular expression
return new RegExp(m[2], m[3])
}
/*
Example config.json:
{
replacements: [
// NO sempai
['/SEMPAI/g', 'SENPAI'],
['/([Ss])empai/g', '$1enpai'],
// Shin'ichi
['Shinichi', "Shin'ichi"],
// Missing spaces
['/(\\.\\.\\.|…|[.?!])([A-Za-z0-9])/g', '$1 $2'],
// Remove honorifics (mostly are wrong, add the correct ones using honorifics-fixer)
['/-(senpai|sama|kun|chan|sensei)(\\b|$)/g', '']
]
}
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment