Skip to content

Instantly share code, notes, and snippets.

Last active January 2, 2017 22:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yu-tang/6526991 to your computer and use it in GitHub Desktop.
Save yu-tang/6526991 to your computer and use it in GitHub Desktop.
OmegaT 用スクリプト。全分節の全参考訳文を列挙するサンプル。
// demonstrate how to get matches for each entry
// original concept from
// by Kos Ivantsov 2013-09-10
// +
// Some hacks and refactoring
// by Yu-Tang
// +
// GUI independence and many improvments taken from
// by cienislaw 2013-09-19
// +
// Add multithreading
// by Yu-Tang 2013-09-21
// +
// File selector taken from
// by Kos Ivantsov + mod by Yu-Tang 2013-09-22
// ===================================================================
// Note: Currently this script doesn't care multiple-translations.
// If the project has segments with alternate translation,
// we did not ensure a outcome.
//################## config start ##################
SELECT_FILES = 'no' // 'yes' to specify file(s) for export via file chooser. else for whole project.
MIN_SIMILARITY = 50 // minimum matche similarity against score. 0-100. 0 means everything.
SIMILARITY_TYPE = 'adjustedScore' // ( score / scoreNoStem / adjustedScore)
//################## config end ##################
import groovy.swing.SwingBuilder
import groovy.time.TimeCategory
import groovy.time.TimeDuration
import groovy.transform.Synchronized
import org.omegat.core.Core
import org.omegat.core.matching.NearString
import org.omegat.gui.matches.FindMatchesThread
import org.omegat.gui.scripting.IScriptLogger
import org.omegat.util.TMXWriter2
import javax.swing.*
import javax.swing.filechooser.FileFilter
import javax.swing.filechooser.FileSystemView
import java.util.concurrent.atomic.AtomicInteger
class MatchesExportThread extends FindMatchesThread {
private Merger merger
private SourceTextEntry entry
private IProject project
public MatchesExportThread(final Merger merger, final IProject project, final SourceTextEntry entry) {
super(null, project, entry);
this.merger = merger
this.entry = entry
this.project = project
// super class gets null as matcherPane argument.
// So we need to override all methods which refer to matcherPane.
protected boolean isEntryChanged() {
//return currentlyProcessedEntry != pane.currentlyProcessedEntry;
return false
public void run() {
private void export() {
// is this source already processed?
if (entry.duplicate != SourceTextEntry.DUPLICATE.NONE) {
if (! merger.addProcessedSource(entry.srcText)) {
return // skip already processed source
// is this segment already translated?
TMXEntry info = project.getTranslationInfo(entry)
if (info.isTranslated()) {
merger.writeEntry info
// search matches
try {
merger.setFoundResult search()
} catch (Exception ex) {
merger.setError ex, entry
* Custom TMXWriter class
* <ul><li>TU countable.
* <li>Unique by source and translation pair. Not allow to duplicate TUs.
* </ul>
class HashedTMXWriter extends TMXWriter2 {
private MessageDigest md = MessageDigest.getInstance('SHA-256')
private HashSet<ByteArrayWrapper> set = new HashSet<ByteArrayWrapper>()
private int similarity
private String similarityType
private int fuzzyTUCount = 0
* @param file
* @param props ProjectProperties
* @param similarity
* @param similarityType
public HashedTMXWriter(File file, ProjectProperties props, int similarity, String similarityType) {
true, // levelTwo
true) // forceValidTMX
this.similarity = similarity
this.similarityType = similarityType
* Write one entry.
* @param entry TMXEntry
public void writeEntry(TMXEntry entry, boolean isFuzzy = false) {
ByteArrayWrapper wrap = getByteArrayWrapper(entry)
if (set.add(wrap)) {
writeEntry entry.source, entry.translation, entry, null
if (isFuzzy)
* Write one entry.
* @param match NearString
public void writeEntry(NearString match) {
// filtering with simirality
if (match.scores[0][similarityType] < similarity) {
TMXEntry entry = new TMXEntry(
new PrepareTMXEntry(
source: match.source,
translation: match.translation,
changer: match.changer,
changeDate: match.changedDate,
creator: match.creator,
creationDate: match.creationDate,
note: null,
otherProperties: match.props
true, // defaultTranslation
null) // ExternalLinked
writeEntry entry, true
public int getTUCount() {
public int getTranslatedTUCount() {
getTUCount() - getFuzzyTUCount()
public int getFuzzyTUCount() {
private byte[] getHash(String message) {
md.digest message.getBytes('UTF-8')
private ByteArrayWrapper getByteArrayWrapper(TMXEntry entry) {
byte[] bytes = (entry.source + '\0' + entry.translation).bytes
new ByteArrayWrapper(bytes)
// inner class - byte array wrapper for appropreate equality
private final class ByteArrayWrapper {
private final byte[] data
public ByteArrayWrapper(byte[] data) {
if (! data) {
throw new NullPointerException()
} = data
public boolean equals(Object other) {
if (! (other instanceof ByteArrayWrapper)) {
return false
Arrays.equals data, ((ByteArrayWrapper)other).data
public int hashCode() {
return Arrays.hashCode(data)
* Collect matches, filtering and write to the file.
class Merger extends Thread {
private List<SourceTextEntry> entries
private File fileTMX
private HashedTMXWriter writer
private int maxOfThreads
private IProject project
private IScriptLogger console
private int sizeOfEntries
private final AtomicInteger currentEntry = new AtomicInteger()
private final AtomicInteger runners = new AtomicInteger()
private failed = 0
private final Date timeStart = new Date()
private final Set<String> processedSources = Collections.synchronizedSet(new HashSet<String>())
public synchronized void start() {
// get rid of autosave during processing
sizeOfEntries = entries.size()
// create matches search threads
int cnt = maxOfThreads
while (cnt-- && createThread()) ;
public void setError(final Exception error, final SourceTextEntry entry) {
new SwingBuilder().doLater() {
console.println "Error on seg. #${entry.entryNum()}: ${entry.srcText}"
console.println error
* Write TUs from matches.
* @param entry TMXEntry
public void setFoundResult(final List<NearString> matches) {
matches.each writer.&writeEntry
* Write one TU from translated segment.
* @param entry TMXEntry
public void writeEntry(TMXEntry entry) {
writer.writeEntry entry
* create new MatchesExportThread thread
* @return Returns true if new thread was created and false otherwise.
public boolean createThread() {
int index = currentEntry.getAndIncrement()
boolean createMore = index < sizeOfEntries
if (createMore) {
new MatchesExportThread(this, project, entries[index]).start()
* Adds the source string to HashSet if it is not already present.
* @param source String
* @return true if this set did not already contain the specified element
public boolean addProcessedSource(String source) {
/* called when each thread completed */
public void onThreadComplete() {
if (runners.decrementAndGet() == 0) {
/* called when all threads completed */
private void onExit() {
// delete TMX file when it has no TU
if (writer.TUCount == 0) {
def folder = fileTMX.parentFile
if (folder.list().size() == 0)
folder.delete() // delete empty folder too
// output summary
consolePrintln getSummary()
// restore AutoSave
private void consolePrintln(Object... args) {
new SwingBuilder().doLater() {args.each console.&println}
private String getSummary() {
TimeDuration td = TimeCategory.minus( new Date(), this.timeStart )
"""Exported ${writer.TUCount} TUs (translated ${writer.translatedTUCount} + fuzzy ${writer.fuzzyTUCount}).\
${failed ? ' failed ' + failed + '.' : ''}
It took ${td.hours ? td.hours + ' h ' : ''}${td.minutes} min ${td.seconds} sec ${td.millis} ms"""
* FileSystemView for restricted browse only under the specified directory
public class DirectoryRestrictedFileSystemView extends FileSystemView {
private File rootDirectory
public boolean isRoot(File f) {
return rootDirectory.equals(f)
public File[] getRoots() {
return [rootDirectory] as File[]
public File getHomeDirectory() {
return rootDirectory
public File getParentDirectory(File dir) {
return super.getParentDirectory(dir)
protected File createFileSystemRoot(File f) {
throw new UnsupportedOperationException("Not supported yet.")
public File createNewFolder(File containingDir) throws IOException {
throw new UnsupportedOperationException("Not supported yet.")
* WhiteList based FileFilter
public class WhiteListFilter extends FileFilter{
private List<File> whiteList
public boolean accept(File f){
// the file filter must show also directories, in order to be able to look into them
f.isDirectory() || whiteList.contains(f)
public String getDescription(){
return "OmegaT Source files";
File getDestTmxFile() {
def folder = new File(project.projectProperties.projectRoot, 'tmx_export')
if (! folder.exists())
new File(folder, 'exported_relevant.tmx')
List<SourceTextEntry> getSelectedFilesEentries() {
String sourceRoot = project.projectProperties.sourceRoot
File rootDir = new File(sourceRoot)
List<SourceTextEntry> entries = new ArrayList<SourceTextEntry>()
def projectFiles = project.projectFiles
JFileChooser fc = new JFileChooser(
new DirectoryRestrictedFileSystemView(rootDirectory: rootDir))
fc.acceptAllFileFilterUsed = false
fc.addChoosableFileFilter new WhiteListFilter(
whiteList: projectFiles.collect() { new File(rootDir, it.filePath) })
fc.dialogTitle = 'Choose files to export'
fc.fileSelectionMode = JFileChooser.FILES_ONLY
fc.multiSelectionEnabled = true
if (fc.showOpenDialog(mainWindow.applicationFrame) != JFileChooser.APPROVE_OPTION) {
console.println 'Canceled'
return entries
def files = fc.selectedFiles
def lenSourceDirPath = sourceRoot.size()
console.println "Choosed ${files.size()} file(s)."
files.each() { file ->
def fi = projectFiles.find() {
it.filePath == file.canonicalPath[lenSourceDirPath..-1] }
entries += fi.entries
// main flow
// check prerequisite
if (! project.isProjectLoaded()) {
console.println "no project found."
def entries = SELECT_FILES == 'yes' ?
getSelectedFilesEentries() :
// abort when no entry
if (entries.size() == 0) {
console.println "no entry found."
File tmx = getDestTmxFile()
HashedTMXWriter writer = new HashedTMXWriter(tmx,
writer.writeComment " Default translations "
int processors = Runtime.runtime.availableProcessors()
new Merger(entries: entries,
fileTMX: tmx,
writer: writer,
maxOfThreads: processors,
project: project,
console: console)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment