Skip to content

Instantly share code, notes, and snippets.

Created February 11, 2019 12:31
Show Gist options
  • Save maidis/237fc5a791e9f3f2a9f2ebd40ab5c9fb to your computer and use it in GitHub Desktop.
Save maidis/237fc5a791e9f3f2a9f2ebd40ab5c9fb to your computer and use it in GitHub Desktop.
Nutuk Frekans Analizi
#include <iostream>
#include <algorithm>
#include <fstream>
#include <string>
#include <unordered_map>
#include <set>
#include <functional>
int main()
std::unordered_map<std::string, int> frekans;
std::fstream text("olanlar.txt");
std::string word;
text >> word;
typedef std::function<bool(std::pair<std::string, int>, std::pair<std::string, int>)> Comparator;
Comparator compFunctor =
[](std::pair<std::string, int> elem1,std::pair<std::string, int> elem2)
return elem1.second > elem2.second;
std::multiset<std::pair<std::string, int>, Comparator> setOfWords(
frekans.begin(), frekans.end(), compFunctor);
for (std::pair<std::string, int> element : setOfWords)
std::cout << element.first << " :: " << element.second << std::endl;
bey :: 2359
paşa :: 2278
millet :: 1751
milliye :: 1157
hükümet :: 1090
efendi :: 1043
istanbul :: 918
meclis :: 914
kendi :: 831
karşı :: 701
suret :: 677
vaziyet :: 647
devlet :: 632
ordu :: 629
telgraf :: 624
ben :: 619
sivas :: 618
bütün :: 618
ali :: 614
hareket :: 605
tarih :: 601
memleket :: 566
kumandan :: 563
kuvvet :: 560
kemal :: 558
arz :: 552
fırka :: 539
kabul :: 539
büyük :: 530
kongre :: 524
kumanda :: 520
ankara :: 513
hazret :: 507
cemiyet :: 506
zaman :: 493
mustafa :: 493
cevap :: 492
kolordu :: 415
şifre :: 413
vatan :: 406
vekil :: 390
hak :: 390
heyet :: 389
rauf :: 386
idare :: 380
umumi :: 380
nazar :: 379
evvel :: 372
vesika :: 365
husus :: 364
mesele :: 363
cephe :: 359
taraf :: 359
çalış :: 359
maksat :: 357
hukuk :: 355
ingiliz :: 353
reis :: 348
rica :: 346
vali :: 346
vazife :: 341
lüzum :: 340
erzurum :: 335
düşman :: 334
ara :: 334
nazır :: 331
namına :: 326
yalnız :: 326
teşkil :: 317
gönder :: 316
işgal :: 312
baş :: 310
emir :: 309
milli :: 309
anadol :: 307
kabine :: 306
aynı :: 306
nokta :: 305
teklif :: 305
teşkilat :: 304
mütalaa :: 289
intihap :: 284
mühim :: 280
mebus :: 280
icap :: 274
temsili :: 271
riyaset :: 271
söz :: 269
zevat :: 263
esas :: 260
netice :: 260
fikir :: 255
dikkat :: 251
türk :: 246
ismet :: 241
talep :: 240
takip :: 239
karar :: 238
devam :: 237
yeni :: 235
umumiye :: 233
cemal :: 229
vilayet :: 229
tayin :: 228
ferit :: 223
doğru :: 223
murahhas :: 216
hilafet :: 212
cumhuriyet :: 211
taarruz :: 211
itilaf :: 208
tatbik :: 207
dahiliye :: 204
temas :: 202
havali :: 202
arzu :: 202
istiklal :: 202
ilan :: 201
telakki :: 200
sebep :: 199
tamamen :: 199
efendim :: 199
türki :: 198
trakya :: 197
arkadaş :: 195
itimat :: 195
package zemberek.examples.morphology;
import zemberek.core.logging.Log;
import zemberek.morphology.TurkishMorphology;
import zemberek.morphology.analysis.SingleAnalysis;
import zemberek.morphology.analysis.WordAnalysis;
import java.util.Scanner;
public class StemmingAndLemmatization {
public static void main(String[] args) {
TurkishMorphology morphology = TurkishMorphology.createWithDefaults();
String word = "muvaffakiyetsizleştirici";
try {
BufferedWriter writer = new BufferedWriter(new FileWriter("/home/maidis/İndirilenler/zemberek-nlp-master/examples/src/main/java/zemberek/examples/morphology/olmayanlar.txt"));
BufferedWriter writer2 = new BufferedWriter(new FileWriter("/home/maidis/İndirilenler/zemberek-nlp-master/examples/src/main/java/zemberek/examples/morphology/olanlar.txt"));
Scanner sc2 = null;
try {
sc2 = new Scanner(new File("/home/maidis/İndirilenler/zemberek-nlp-master/examples/src/main/java/zemberek/examples/morphology/text.txt"));
} catch (FileNotFoundException e) {
while (sc2.hasNextLine()) {
Scanner s2 = new Scanner(sc2.nextLine());
while (s2.hasNext()) {
String s =;
s = s.replaceAll("\\s*\\p{Punct}+\\s*$", "");
if (s.length()>2) {
try {
WordAnalysis results = morphology.analyze(s);
SingleAnalysis result = results.getAnalysisResults().get(0);;
writer2.write(result.getLemmas().get(0).toString() + '\n');
} catch (IndexOutOfBoundsException e) {
writer.write(s + '\n');
} catch( ioe) {
//you write here code if an ioexcepion happens. You can leave it empty if you want
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment