Minsub Won MSWon

## One_hot.py
def One_hot(data):

  index_dict = {value:index for index,value in enumerate(set(data))}
  result = []

  for value in data:

    one_hot = np.zeros(len(index_dict))
    index = index_dict[value]
    one_hot[index] = 1

## Zero_padding.py
def Zero_padding(train_batch_X, Batch_size, Maxseq_length, Vector_size):

  zero_pad = np.zeros((Batch_size, Maxseq_length, Vector_size))

  for i in range(Batch_size):
    zero_pad[i,:np.shape(train_batch_X[i])[0],:np.shape(train_batch_X[i])[1]] = train_batch_X[i]

  return zero_pad

## read_data.py
def read_data(filename):
    with open(filename, 'r',encoding='utf-8') as f:
        data = [line.split('\t') for line in f.read().splitlines()]
        data = data[1:]   # header 제외 #
    return data

## tokenize.py
from konlpy.tag import Twitter

def tokenize(doc):

    return ['/'.join(t) for t in pos_tagger.pos(doc, norm=True, stem=True)]

## Crawling_Movie_rating.R
library(rvest)
library(stringr)

## 인피니티워 네이버 평점 페이지 주소 ##
main_url = "https://movie.naver.com/movie/bi/mi/pointWriteFormList.nhn?code=136315&type=after&onlyActualPointYn=N&order=newest&page="


reply_list = character()
star_list = numeric()
date_list = character()

## Crawling_Images.R
library(rvest)
library(stringr)

url = "http://news.naver.com/main/read.nhn?mode=LSD&mid=shm&sid1=100&oid=421&aid=0003064130" ## with image

sess <- read_html(url)
node <- html_node(sess,"#articleBodyContents img")
imgurl <- html_attr(node,"src")

download.file(imgurl, destfile = "image_test.jpeg" , method = 'curl')

## Bi_LSTM_stateoutput.py
import numpy as np
import tensorflow as tf

X = tf.placeholder(tf.float32, [None, 2, 3])
seq_length = tf.placeholder(tf.int32, [None])

X_batch = np.array([
          [[0,1,2], [9,8,7]], ## data 1
          [[3,4,5], [0,0,0]], ## data 2
          [[6,7,8], [6,5,4]], ## data 3

## tf_Partial_GPU.py
import os
import tensorflow as tf

os.environ['CUDA_VISIBLE_DEVICES'] = "1"

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

## pad_seq.py
from keras.preprocessing.sequence import pad_sequences

train_seq = [[1,2,3],[4,7,9,1]]
pad_sequences(train_seq, maxlen = 5, padding = "post")

## Remove_pattern.py
import re

test = "What do you need?"
re.sub("do", "", test)
## "What  you need?" ##
	def One_hot(data):

	index_dict = {value:index for index,value in enumerate(set(data))}
	result = []

	for value in data:

	one_hot = np.zeros(len(index_dict))
	index = index_dict[value]
	one_hot[index] = 1
	def Zero_padding(train_batch_X, Batch_size, Maxseq_length, Vector_size):

	zero_pad = np.zeros((Batch_size, Maxseq_length, Vector_size))

	for i in range(Batch_size):
	zero_pad[i,:np.shape(train_batch_X[i])[0],:np.shape(train_batch_X[i])[1]] = train_batch_X[i]

	return zero_pad
	def read_data(filename):
	with open(filename, 'r',encoding='utf-8') as f:
	data = [line.split('\t') for line in f.read().splitlines()]
	data = data[1:] # header 제외 #
	return data
	from konlpy.tag import Twitter

	def tokenize(doc):

	return ['/'.join(t) for t in pos_tagger.pos(doc, norm=True, stem=True)]
	library(rvest)
	library(stringr)

	## 인피니티워 네이버 평점 페이지 주소 ##
	main_url = "https://movie.naver.com/movie/bi/mi/pointWriteFormList.nhn?code=136315&type=after&onlyActualPointYn=N&order=newest&page="


	reply_list = character()
	star_list = numeric()
	date_list = character()
	library(rvest)
	library(stringr)

	url = "http://news.naver.com/main/read.nhn?mode=LSD&mid=shm&sid1=100&oid=421&aid=0003064130" ## with image

	sess <- read_html(url)
	node <- html_node(sess,"#articleBodyContents img")
	imgurl <- html_attr(node,"src")

	download.file(imgurl, destfile = "image_test.jpeg" , method = 'curl')
	import numpy as np
	import tensorflow as tf

	X = tf.placeholder(tf.float32, [None, 2, 3])
	seq_length = tf.placeholder(tf.int32, [None])

	X_batch = np.array([
	[[0,1,2], [9,8,7]], ## data 1
	[[3,4,5], [0,0,0]], ## data 2
	[[6,7,8], [6,5,4]], ## data 3
	import os
	import tensorflow as tf

	os.environ['CUDA_VISIBLE_DEVICES'] = "1"

	config = tf.ConfigProto()
	config.gpu_options.allow_growth = True
	sess = tf.Session(config=config)
	from keras.preprocessing.sequence import pad_sequences

	train_seq = [[1,2,3],[4,7,9,1]]
	pad_sequences(train_seq, maxlen = 5, padding = "post")
	import re

	test = "What do you need?"
	re.sub("do", "", test)
	## "What you need?" ##