Skip to content

Instantly share code, notes, and snippets.

View ratsgo's full-sized avatar
💭
I may be slow to respond.

gichang.lee ratsgo

💭
I may be slow to respond.
View GitHub Profile
# -*- coding: utf-8 -*-
"""
Kakao Hangul Analyzer III
__version__ = '0.3'
__author__ = 'Kakao Corp.'
__copyright__ = 'Copyright (C) 2018-, Kakao Corp. All rights reserved.'
__license__ = 'Apache 2.0'
@ratsgo
ratsgo / avltree.py
Last active October 29, 2020 14:59
outputdebug = True
def debug(msg):
if outputdebug:
print(msg)
class Node():
def __init__(self, key):
import frontmatter
import glob
import yaml
import string
from konlpy.tag import Komoran
from sklearn.feature_extraction.text import TfidfVectorizer
def get_posts(folder='C:/Users/ratsgo/GoogleDrive/내폴더/git/blog/_posts'):
result = {}
for filepath in glob.glob(folder + "/*"):
from collections import Counter
import random
def p_topic_given_document(topic, d, alpha=0.1):
return ((document_topic_counts[d][topic] + alpha) /
(document_lengths[d] + K * alpha))
def p_word_given_topic(word, topic, beta=0.1):
return ((topic_word_counts[topic][word] + beta) /
(topic_counts[topic] + V * beta))
# -*- coding: utf-8 -*-
# Copyright (c) 2012, Chi-En Wu
from math import log
def _normalize_prob(prob, item_set):
result = {}
if prob is None:
number = len(item_set)
for item in item_set:
result[item] = 1.0 / number
@ratsgo
ratsgo / analysis.R
Last active November 26, 2017 17:14
Word2Vec 분석
library(stringr)
# loading
DTM <- readRDS('dtm.rds')
vec <- read.csv('word2vec.txt', fileEncoding='utf-8', sep=" ", header=F, skip=1)
# distance matrix
distance <- dist(vec[,-1])
distance <- as.matrix(distance)
colnames(distance) <- vec[,1]
from collections import defaultdict
from pandas import read_table
import numpy as np
import math
class NaiveBayesClassifier:
def __init__(self, k=0.5):
self.k = k
self.word_probs = []
"""This file contains code for use with "Think Bayes",
by Allen B. Downey, available from greenteapress.com
Copyright 2012 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
"""This file contains class definitions for:
Hist: represents a histogram (map from values to integer frequencies).
import numpy as np
import pandas as pd
import re
import tensorflow as tf
import random
import pickle
from collections import defaultdict
import operator
####################################################
import os
import time
import datetime
from tensorflow import flags
import tensorflow as tf
import numpy as np
class TextCNN(object):
"""
A CNN for text classification.