Skip to content

Instantly share code, notes, and snippets.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#encoding: utf-8
from gensim.corpora import WikiCorpus
import codecs
import os
path_for_save_resault = '/home/ubuntu/Documents/hw_background_gene/'
wiki_jpn = WikiCorpus('/home/ubuntu/Documents/hw_background_gene/jawiki-latest-pages-articles.xml.bz2')
with codecs.open(os.path.join(path_for_save_resault,"wiki_jpn.txt") , "w" ,'utf-8') as output:
@yingminc
yingminc / vizual_ensemble.py
Created October 23, 2017 06:04
visualization for ensemble weights, threshold and f1_score of binary classification
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix,f1_score
from bokeh.io import show, output_file
from bokeh.models import (ColumnDataSource,HoverTool,FixedTicker,PrintfTickFormatter)
from bokeh.plotting import figure
def prabtoclass(series,hold):
#turn probability to binary label by target threshold
return [1 if i>=hold else 0 for i in series]
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@yingminc
yingminc / load_weather_us.ipynb
Created September 26, 2017 06:05
load weather
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@yingminc
yingminc / word2vec_test_jp_bokeh.py
Created September 26, 2017 06:02
word2vec for Japanese (visualization with bokeh)
#-*- encoding: utf-8 -*-
from __future__ import division
import math
import struct
import numpy as np
from multiprocessing import Pool, Value, Array
from sklearn.manifold import TSNE
import scipy
import codecs
import argparse
@yingminc
yingminc / load_weather_observation_format.py
Last active November 28, 2017 05:09
load weather and set the datetime
import pandas as pd
from datetime import datetime,timedelta
def dealwithshit(d):
return d.apply(lambda x: x.split(' ')[0] if isinstance(x,str) else x).replace({'--':0}).apply(pd.to_numeric,errors='coerce')
class ob:
@yingminc
yingminc / bokeh_widgets_practice.py
Last active November 28, 2017 05:13
visualization practice/ bokeh/ slider/ time series / button / animation
# coding: utf-8
import pandas as pd
from bokeh.core.properties import field
from bokeh.io import curdoc,output_notebook
from bokeh.layouts import layout,widgetbox,row
from bokeh.models import (
ColumnDataSource, HoverTool, SingleIntervalTicker, Slider,DateRangeSlider, Button, Label,RelativeDelta,
CategoricalColorMapper,HBox, Select
@yingminc
yingminc / holiday_info.ipynb
Created August 9, 2017 02:08
generate holiday information
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.