ratsgo/origin.py

## origin.py
import os
import time
import datetime
from tensorflow import flags
import tensorflow as tf
import numpy as np

class TextCNN(object):
    """
    A CNN for text classification.
    Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.

    <Parameters>
        - sequence_length: 최대 문장 길이
        - num_classes: 클래스 개수
        - vocab_size: 등장 단어 수
        - embedding_size: 각 단어에 해당되는 임베디드 벡터의 차원
        - filter_sizes: convolutional filter들의 사이즈 (= 각 filter가 몇 개의 단어를 볼 것인가?) (예: "3, 4, 5")
        - num_filters: 각 filter size 별 filter 수
        - l2_reg_lambda: 각 weights, biases에 대한 l2 regularization 정도
    """

    def __init__(
            self, sequence_length, num_classes, embedding_size,
            filter_sizes, num_filters, l2_reg_lambda=0.0):
        self.input_x = tf.placeholder(tf.float32, [None, sequence_length, embedding_size], name="input_x")
        input_x_expanded = tf.expand_dims(self.input_x, -1)
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        l2_loss = tf.constant(0.0)

        # Create a convolution + maxpool layer for each filter size
        self.h_outputs = []
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                pad_input = tf.pad(input_x_expanded, [[0, 0], [1, filter_size - 2], [0, 0], [0, 0]],
                                   mode='CONSTANT')
                Wc = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                bc = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    pad_input,
                    Wc,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, bc), name="relu")
                # Average pooling over the outputs
                pooled = tf.nn.avg_pool(
                    h,
                    ksize=[1, sequence_length, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                self.h_outputs.append(h)
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        h_pool = tf.concat(3, pooled_outputs)
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            h_drop = tf.nn.dropout(h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            self.finW = tf.get_variable(
                    "finW",
                    shape=[num_filters_total, num_classes],
                    initializer=tf.contrib.layers.xavier_initializer())
            b = tf.constant(0.0, shape=[num_classes], name="b")
            l2_loss += tf.nn.l2_loss(self.finW)
            scores = tf.nn.xw_plus_b(h_drop, self.finW, b, name="scores")
            self.predictions = tf.argmax(scores, 1, name="predictions")

        # Calculate Mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(scores, self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

# data loading
import cnn_movie_advanced_padding_tool_word2vec as tool
data_path = 'C:/Users/ratsgo/GoogleDrive/내폴더/textmining/data/eng_review_corpus.csv' # 영문
corpus, points = tool.loading_rdata(data_path, minlength=10, eng=True, num=False, punc=False)
#data_path = 'C:/Users/ratsgo/GoogleDrive/내폴더/textmining/data/watcha_movie_review_spacecorrected_noisedeleted.csv' # 한글
#corpus, points = tool.loading_rdata(data_path, minlength=10, eng=False, num=False, punc=False)
train_idx, _ = tool.get_train_idx(len(corpus),train_prop=1)
word2vec_path= 'C:/textmining/eng_moviereview_word2vec.pickle' # 영문
#word2vec_path= 'C:/textmining/moviereview_word2vec.pickle' # 한글
embedding_model=tool.load_word2vec(word2vec_path)
max_document_length = 30
num_classes = 2

'''
# 이 부분은 기존 입력값 만드는 부분
rawcontents = tool.cut(corpus, cut=3)
raw_x, vocabulary, vocab_size, vocab_processor = tool.make_raw_input(rawcontents,max_document_length)
print('사전단어수 : %s' % (vocab_size))
y = tool.make_output(points, threshold=2.5)
raw_x_train, raw_x_test, y_train, y_test, train_idx = tool.divide(raw_x,y,train_prop=0.9)
#word2vec_x_train, word2vec_x_test, y_train, y_test, train_idx = tool.divide(word2vec_x,y,train_prop=0.9)
'''

# Model Hyperparameters
flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of embedded vector (default: 128)")
flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
flags.DEFINE_float("l2_reg_lambda", 0.1, "L2 regularization lambda (default: 0.0)")

# Training parameters
flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
flags.DEFINE_integer("num_epochs", 3, "Number of training epochs (default: 200)")
flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")

# Misc Parameters
flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# 3. train the model and test
with tf.Graph().as_default():
    sess = tf.Session()
    with sess.as_default():
        cnn = TextCNN(sequence_length=max_document_length,
                      num_classes=num_classes,
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)
        '''
        cnn = TextCNN(sequence_length=raw_x_train.shape[1],
                      num_classes=y_train.shape[1],
                      vocab_size=vocab_size,
                      embedding_size=FLAGS.embedding_dim,
                      filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                      num_filters=FLAGS.num_filters,
                      l2_reg_lambda=FLAGS.l2_reg_lambda)
        '''
        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        def train_step(x_batch, y_batch):
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
            }
            _, step, loss, accuracy = sess.run(
                [train_op, global_step, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))

        # Generate batches
        batches = tool.get_batch(list(zip(corpus, points)), FLAGS.batch_size, FLAGS.num_epochs, train_idx,
                                 word2vec=True, max_document_length=max_document_length, word2vec_model=embedding_model)

        # Training loop
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)

# act map * weights 저장
import collections
filter_sizes=list(map(int, FLAGS.filter_sizes.split(",")))
fin_weights = sess.run(cnn.finW)
batches = tool.get_batch(list(zip(corpus, points)), FLAGS.batch_size, 1, train_idx,
                         word2vec=True, max_document_length=max_document_length, word2vec_model=embedding_model)
results = []
doc_idx = 0
for num, batch in enumerate(batches):
    if num % 100 == 0:
        print("batch {}".format(num))
    x_batch, y_batch = zip(*batch)
    if len(x_batch) == FLAGS.batch_size:
        actmaps, predictions = sess.run([cnn.h_outputs, cnn.predictions],
                                        feed_dict={cnn.input_x: x_batch,
                                                   cnn.input_y: y_batch,
                                                   cnn.dropout_keep_prob: 1.0})
        for batch_idx in range(FLAGS.batch_size):
            combined_actmap = \
                np.zeros((max_document_length, len(filter_sizes) * FLAGS.num_filters))
            start = 0
            end = FLAGS.num_filters
            for actmap_idx in range(len(actmaps)):
                combined_actmap[:, start:end] = \
                    actmaps[actmap_idx][batch_idx].reshape(
                        (max_document_length, FLAGS.num_filters))
                start += FLAGS.num_filters
                end += FLAGS.num_filters
            batch_result = np.dot(combined_actmap, fin_weights)
            batch_result = batch_result[:, predictions[batch_idx]]
            fin_result = collections.OrderedDict()
            text = corpus[train_idx[doc_idx]].split()
            for word_idx, score in enumerate(batch_result):
                if word_idx < len(text):
                    fin_result[text[word_idx]] = score
                else:
                    fin_result[word_idx] = score
            preinfo = {'실제값' : y_batch[batch_idx], '예측값' : predictions[batch_idx]}
            results.append([preinfo,fin_result])
            doc_idx += 1

# 상위 5개에 별표 치기
star_results = []
for result_idx, data in enumerate(results):
    if result_idx % 10000 == 0:
        print("data {}".format(result_idx))
    tmp = sorted(data[1].items(), key=lambda x: x[1], reverse=True)[0:5] ##
    tmp = [score_tuple[0] for score_tuple in tmp]
    star_result = collections.OrderedDict()
    for score_tuple in data[1].items():
        star_result[score_tuple[0]] = ''
    for num in range(len(tmp)):
        star_result[tmp[num]] = '*' * (num + 1)
    star_results.append([data[0],star_result])

# 상위 5개에 포함돼 있는 단어 사전 만들기
positive_dict = collections.defaultdict(int)
negative_dict = collections.defaultdict(int)
for result_idx, data in enumerate(results):
    if result_idx % 10000 == 0:
        print("data {}".format(result_idx))
    tmp = sorted(data[1].items(), key=lambda x: x[1], reverse=True)[0:5] ##
    tmp = [score_tuple[0] for score_tuple in tmp]
    if data[0]['예측값'] == 0:
        for num in range(len(tmp)):
            if type(tmp[num]) == int:
                continue
            positive_dict[tmp[num]] += 1
    else:
        for num in range(len(tmp)):
            if type(tmp[num]) == int:
                continue
            negative_dict[tmp[num]] += 1

positive_dict = sorted(positive_dict.items(), key=lambda x: x[1], reverse=True)
negative_dict = sorted(negative_dict.items(), key=lambda x: x[1], reverse=True)

# 결과 저장
import pickle
with open('result_epoch3_minleng10_maxleng30_cut3_word2vec_english.pickle', 'wb') as f:
    pickle.dump(results,f)
	import os
	import time
	import datetime
	from tensorflow import flags
	import tensorflow as tf
	import numpy as np

	class TextCNN(object):
	"""
	A CNN for text classification.
	Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.

	<Parameters>
	- sequence_length: 최대 문장 길이
	- num_classes: 클래스 개수
	- vocab_size: 등장 단어 수
	- embedding_size: 각 단어에 해당되는 임베디드 벡터의 차원
	- filter_sizes: convolutional filter들의 사이즈 (= 각 filter가 몇 개의 단어를 볼 것인가?) (예: "3, 4, 5")
	- num_filters: 각 filter size 별 filter 수
	- l2_reg_lambda: 각 weights, biases에 대한 l2 regularization 정도
	"""

	def __init__(
	self, sequence_length, num_classes, embedding_size,
	filter_sizes, num_filters, l2_reg_lambda=0.0):
	self.input_x = tf.placeholder(tf.float32, [None, sequence_length, embedding_size], name="input_x")
	input_x_expanded = tf.expand_dims(self.input_x, -1)
	self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
	self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
	l2_loss = tf.constant(0.0)

	# Create a convolution + maxpool layer for each filter size
	self.h_outputs = []
	pooled_outputs = []
	for i, filter_size in enumerate(filter_sizes):
	with tf.name_scope("conv-maxpool-%s" % filter_size):
	# Convolution Layer
	filter_shape = [filter_size, embedding_size, 1, num_filters]
	pad_input = tf.pad(input_x_expanded, [[0, 0], [1, filter_size - 2], [0, 0], [0, 0]],
	mode='CONSTANT')
	Wc = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
	bc = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
	conv = tf.nn.conv2d(
	pad_input,
	Wc,
	strides=[1, 1, 1, 1],
	padding="VALID",
	name="conv")
	# Apply nonlinearity
	h = tf.nn.relu(tf.nn.bias_add(conv, bc), name="relu")
	# Average pooling over the outputs
	pooled = tf.nn.avg_pool(
	h,
	ksize=[1, sequence_length, 1, 1],
	strides=[1, 1, 1, 1],
	padding='VALID',
	name="pool")
	self.h_outputs.append(h)
	pooled_outputs.append(pooled)

	# Combine all the pooled features
	num_filters_total = num_filters * len(filter_sizes)
	h_pool = tf.concat(3, pooled_outputs)
	h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

	# Add dropout
	with tf.name_scope("dropout"):
	h_drop = tf.nn.dropout(h_pool_flat, self.dropout_keep_prob)

	# Final (unnormalized) scores and predictions
	with tf.name_scope("output"):
	self.finW = tf.get_variable(
	"finW",
	shape=[num_filters_total, num_classes],
	initializer=tf.contrib.layers.xavier_initializer())
	b = tf.constant(0.0, shape=[num_classes], name="b")
	l2_loss += tf.nn.l2_loss(self.finW)
	scores = tf.nn.xw_plus_b(h_drop, self.finW, b, name="scores")
	self.predictions = tf.argmax(scores, 1, name="predictions")

	# Calculate Mean cross-entropy loss
	with tf.name_scope("loss"):
	losses = tf.nn.softmax_cross_entropy_with_logits(scores, self.input_y)
	self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

	# Accuracy
	with tf.name_scope("accuracy"):
	correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
	self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

	# data loading
	import cnn_movie_advanced_padding_tool_word2vec as tool
	data_path = 'C:/Users/ratsgo/GoogleDrive/내폴더/textmining/data/eng_review_corpus.csv' # 영문
	corpus, points = tool.loading_rdata(data_path, minlength=10, eng=True, num=False, punc=False)
	#data_path = 'C:/Users/ratsgo/GoogleDrive/내폴더/textmining/data/watcha_movie_review_spacecorrected_noisedeleted.csv' # 한글
	#corpus, points = tool.loading_rdata(data_path, minlength=10, eng=False, num=False, punc=False)
	train_idx, _ = tool.get_train_idx(len(corpus),train_prop=1)
	word2vec_path= 'C:/textmining/eng_moviereview_word2vec.pickle' # 영문
	#word2vec_path= 'C:/textmining/moviereview_word2vec.pickle' # 한글
	embedding_model=tool.load_word2vec(word2vec_path)
	max_document_length = 30
	num_classes = 2

	'''
	# 이 부분은 기존 입력값 만드는 부분
	rawcontents = tool.cut(corpus, cut=3)
	raw_x, vocabulary, vocab_size, vocab_processor = tool.make_raw_input(rawcontents,max_document_length)
	print('사전단어수 : %s' % (vocab_size))
	y = tool.make_output(points, threshold=2.5)
	raw_x_train, raw_x_test, y_train, y_test, train_idx = tool.divide(raw_x,y,train_prop=0.9)
	#word2vec_x_train, word2vec_x_test, y_train, y_test, train_idx = tool.divide(word2vec_x,y,train_prop=0.9)
	'''

	# Model Hyperparameters
	flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of embedded vector (default: 128)")
	flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
	flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
	flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
	flags.DEFINE_float("l2_reg_lambda", 0.1, "L2 regularization lambda (default: 0.0)")

	# Training parameters
	flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
	flags.DEFINE_integer("num_epochs", 3, "Number of training epochs (default: 200)")
	flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
	flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
	flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")

	# Misc Parameters
	flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
	flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

	FLAGS = tf.flags.FLAGS
	FLAGS._parse_flags()
	print("\nParameters:")
	for attr, value in sorted(FLAGS.__flags.items()):
	print("{}={}".format(attr.upper(), value))
	print("")

	# 3. train the model and test
	with tf.Graph().as_default():
	sess = tf.Session()
	with sess.as_default():
	cnn = TextCNN(sequence_length=max_document_length,
	num_classes=num_classes,
	embedding_size=FLAGS.embedding_dim,
	filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
	num_filters=FLAGS.num_filters,
	l2_reg_lambda=FLAGS.l2_reg_lambda)
	'''
	cnn = TextCNN(sequence_length=raw_x_train.shape[1],
	num_classes=y_train.shape[1],
	vocab_size=vocab_size,
	embedding_size=FLAGS.embedding_dim,
	filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
	num_filters=FLAGS.num_filters,
	l2_reg_lambda=FLAGS.l2_reg_lambda)
	'''
	# Define Training procedure
	global_step = tf.Variable(0, name="global_step", trainable=False)
	optimizer = tf.train.AdamOptimizer(1e-3)
	grads_and_vars = optimizer.compute_gradients(cnn.loss)
	train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

	# Initialize all variables
	sess.run(tf.global_variables_initializer())

	def train_step(x_batch, y_batch):
	feed_dict = {
	cnn.input_x: x_batch,
	cnn.input_y: y_batch,
	cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
	}
	_, step, loss, accuracy = sess.run(
	[train_op, global_step, cnn.loss, cnn.accuracy],
	feed_dict)
	time_str = datetime.datetime.now().isoformat()
	print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))

	# Generate batches
	batches = tool.get_batch(list(zip(corpus, points)), FLAGS.batch_size, FLAGS.num_epochs, train_idx,
	word2vec=True, max_document_length=max_document_length, word2vec_model=embedding_model)

	# Training loop
	for batch in batches:
	x_batch, y_batch = zip(*batch)
	train_step(x_batch, y_batch)
	current_step = tf.train.global_step(sess, global_step)

	# act map * weights 저장
	import collections
	filter_sizes=list(map(int, FLAGS.filter_sizes.split(",")))
	fin_weights = sess.run(cnn.finW)
	batches = tool.get_batch(list(zip(corpus, points)), FLAGS.batch_size, 1, train_idx,
	word2vec=True, max_document_length=max_document_length, word2vec_model=embedding_model)
	results = []
	doc_idx = 0
	for num, batch in enumerate(batches):
	if num % 100 == 0:
	print("batch {}".format(num))
	x_batch, y_batch = zip(*batch)
	if len(x_batch) == FLAGS.batch_size:
	actmaps, predictions = sess.run([cnn.h_outputs, cnn.predictions],
	feed_dict={cnn.input_x: x_batch,
	cnn.input_y: y_batch,
	cnn.dropout_keep_prob: 1.0})
	for batch_idx in range(FLAGS.batch_size):
	combined_actmap = \
	np.zeros((max_document_length, len(filter_sizes) * FLAGS.num_filters))
	start = 0
	end = FLAGS.num_filters
	for actmap_idx in range(len(actmaps)):
	combined_actmap[:, start:end] = \
	actmaps[actmap_idx][batch_idx].reshape(
	(max_document_length, FLAGS.num_filters))
	start += FLAGS.num_filters
	end += FLAGS.num_filters
	batch_result = np.dot(combined_actmap, fin_weights)
	batch_result = batch_result[:, predictions[batch_idx]]
	fin_result = collections.OrderedDict()
	text = corpus[train_idx[doc_idx]].split()
	for word_idx, score in enumerate(batch_result):
	if word_idx < len(text):
	fin_result[text[word_idx]] = score
	else:
	fin_result[word_idx] = score
	preinfo = {'실제값' : y_batch[batch_idx], '예측값' : predictions[batch_idx]}
	results.append([preinfo,fin_result])
	doc_idx += 1

	# 상위 5개에 별표 치기
	star_results = []
	for result_idx, data in enumerate(results):
	if result_idx % 10000 == 0:
	print("data {}".format(result_idx))
	tmp = sorted(data[1].items(), key=lambda x: x[1], reverse=True)[0:5] ##
	tmp = [score_tuple[0] for score_tuple in tmp]
	star_result = collections.OrderedDict()
	for score_tuple in data[1].items():
	star_result[score_tuple[0]] = ''
	for num in range(len(tmp)):
	star_result[tmp[num]] = '' (num + 1)
	star_results.append([data[0],star_result])

	# 상위 5개에 포함돼 있는 단어 사전 만들기
	positive_dict = collections.defaultdict(int)
	negative_dict = collections.defaultdict(int)
	for result_idx, data in enumerate(results):
	if result_idx % 10000 == 0:
	print("data {}".format(result_idx))
	tmp = sorted(data[1].items(), key=lambda x: x[1], reverse=True)[0:5] ##
	tmp = [score_tuple[0] for score_tuple in tmp]
	if data[0]['예측값'] == 0:
	for num in range(len(tmp)):
	if type(tmp[num]) == int:
	continue
	positive_dict[tmp[num]] += 1
	else:
	for num in range(len(tmp)):
	if type(tmp[num]) == int:
	continue
	negative_dict[tmp[num]] += 1

	positive_dict = sorted(positive_dict.items(), key=lambda x: x[1], reverse=True)
	negative_dict = sorted(negative_dict.items(), key=lambda x: x[1], reverse=True)

	# 결과 저장
	import pickle
	with open('result_epoch3_minleng10_maxleng30_cut3_word2vec_english.pickle', 'wb') as f:
	pickle.dump(results,f)