Skip to content

Instantly share code, notes, and snippets.

@danielnaber
Created August 10, 2018 07:47
Show Gist options
  • Save danielnaber/6f738fca065e87a5d067710aabaa1883 to your computer and use it in GitHub Desktop.
Save danielnaber/6f738fca065e87a5d067710aabaa1883 to your computer and use it in GitHub Desktop.
package com.optimaize.langdetect;
import com.google.common.base.Optional;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfile;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
class LangDetectBug {
// Not properly detected as German with shortTextAlgorithm(500):
private final static String text = "Den Vogel kennt man am Gesang, den Topf an dem Klang, den Esel an den Ohren und am Gesang den Toren.";
public static void main(String[] args) throws IOException {
LanguageProfileReader profileReader = new LanguageProfileReader();
List<LanguageProfile> profiles = profileReader.read(Arrays.asList("de", "nl"));
LanguageDetector languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard())
.shortTextAlgorithm(500)
.withProfiles(profiles)
.build();
Optional<LdLocale> detect = languageDetector.detect(text);
System.out.println(detect);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment