This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
# Goal: parse house information for each district from websites | |
# for each district, get 「土地區段位置或建物區門牌」,「建物型態」,「建物現況格局」,「坪數」,「屋齡」,「總價元」,「資料來源」into csv file | |
# Procedure: | |
# 1. get the number of page for each district by parsing first html content | |
# 2. for each district put all html page together, use htmlparser to parse content and save data into file | |
import sys | |
import math |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding=utf-8 | |
import distance_fun | |
import timeit | |
import numpy as np | |
import matplotlib.pyplot as plt | |
t_python = timeit.Timer("clustering.kp_distance(np.array([1, 2, 4, '住', '公寓']), np.array([2, 2, 2, '商', '公寓']), 0.5)" , "import clustering\nimport numpy as np") | |
time_python = t_python.timeit(1000000) | |
print 'python code execution time: ' + str(time_python) + 's' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def kp_distance(object_x, object_y, Wc): | |
''' Distance function for two objects | |
''' | |
res = 0.0 | |
for attr_idx in xrange(0, len(object_x)): | |
cur_type = type(object_x[attr_idx]) | |
if( (cur_type == str) or (cur_type == np.string_) ): | |
# categorical attribute | |
if (object_x[attr_idx] == object_y[attr_idx]): | |
res = res + Wc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cimport numpy as np | |
cimport cython | |
import numpy as np | |
def kp_distance(np.ndarray object_x, np.ndarray object_y, float Wc): | |
''' Distance function for two objects | |
''' | |
cdef float res = 0.0 | |
cdef unsigned attr_idx | |
for attr_idx in xrange(0, len(object_x)): | |
cur_type = type(object_x[attr_idx]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
# Goal: | |
# parse different city's website | |
# for each city, get 「土地區段位置或建物區門牌」,「建物型態」,「建物現況格局」,「坪數」,「屋齡」,「總價元」,「資料來源」into csv file | |
# step: | |
# 1. get number of page by parsing string | |
# 2. put all html page together, use htmlparser to get content | |
# 3. parsing content and save it |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "Cure.h" | |
#include<iostream> | |
#include<cmath> | |
#include<algorithm> | |
#include<memory.h> | |
using namespace std; | |
bool operator< (const struct CLUSTER_PAIR &a, const struct CLUSTER_PAIR &b){ | |
return (a.distance < b.distance); |