Jun-Wei Lin jwlin

## views.py
from django.http import HttpResponse
from django.shortcuts import get_object_or_404, render
from .models import Account, Client

def show(request):
    uname = request.POST["uname"]
    if "injection" in request.POST:
        # Use raw SQL and string concatenation, resulting in SQL injection
        accounts = Account.objects.raw(f'SELECT * FROM atm_account as a, atm_client as c WHERE a.client_id=c.id and c.username="{uname}"')
    else:

## extract_features.py
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Extract features from htmls
"""

import sys, os, random, datetime
from bs4 import BeautifulSoup
# preprocess.py 檔案在這裡

## getTopic.py
def getTopic(self, ai, imgtopic, Dict):
    # JW:　features 是準備要被預測 topic 的變數, 應該要挪到下方 prediction 開始前, 程式比較連貫
    #extrat the features of the element
    features = str(re.sub(' +', ' ', ' '.join(self.extract_features(ai, imgtopic, Dict, 1))))
    #print (features)

    # JW:　從這裡開始可以獨立切一個 function, 只呼叫一次, 把 train 好的 model 存起來
    #open training data file
    current_dir = os.path.dirname(_file_)
    corpus_dir = os.path.join(current_dir, 'corpus', 'all-corpus')

## cs295-first-look.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                jwlin
                / cs295-first-look.ipynb
            
            
              Created
              February 14, 2018 00:40
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## blog_hahow_crawler_3.py
with open('hahow_courses.json', 'r', encoding='utf-8') as f:
    courses = json.load(f)

# 取出程式類課程的募資價/上線價/學生數，並顯示統計資料
pre_order_prices = list()
prices = list()
tickets = list()
lengths = list()
for c in courses:
    if '55de81ac9d1fa51000f94770' in c['categories']:

## blog_hahow_crawler_2.json
{
    "_id": "58744feda8aae907000d06c0",
    "categories": [
      "55de81ac9d1fa51000f94770",
      "55de81929d1fa51000f94769"
    ],
    "coverImage": {
      "_id": "588421e46ecf3a0700b7a31d",
      "url": "https://hahow.in/images/588421e46ecf3a0700b7a31d"
    },

## blog_hahow_crawler_1.py
def crawl():
    # 初始 API: https://api.hahow.in/api/courses?limit=12&status=PUBLISHED
    # 接續 API: https://api.hahow.in/api/courses?latestId=54d5a117065a7e0e00725ac0&latestValue=2015-03-27T15:38:27.187Z&limit=30&status=PUBLISHED
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                             'AppleWebKit/537.36 (KHTML, like Gecko) '
                             'Chrome/59.0.3071.115 Safari/537.36'}
    url = 'https://api.hahow.in/api/courses'
    courses = list()
    resp_courses = requests.get(url + '?limit=30&status=PUBLISHED', headers=headers).json()
    while resp_courses:  # 有回傳資料則繼續下一輪擷取

## hahow_courses.json
[
  {
    "_id": "58d5c70c27ea7d070060160e",
    "categories": [
      "55de81ac9d1fa51000f94770",
      "55de81929d1fa51000f94769",
      "55de81879d1fa51000f94766"
    ],
    "coverImage": {
      "_id": "58f318cc4909c907004ac575",

## hahow_crawler.py
import requests
import json
import time
import numpy as np
import os

category = {
    '55de818a9d1fa51000f94767': '生活',
    '55de818d9d1fa51000f94768': '藝術',
    '55de819a9d1fa51000f9476b': '運動',

## example.json
[
  {
    "href": "/bbs/Beauty/M.1482072854.A.DDC.html",
    "num_image": 3,
    "push_count": 18,
    "title": "[神人] 長榮空姐"
  },
  {
    "href": "/bbs/Beauty/M.1482075654.A.C1D.html",
    "num_image": 7,
	from django.http import HttpResponse
	from django.shortcuts import get_object_or_404, render
	from .models import Account, Client

	def show(request):
	uname = request.POST["uname"]
	if "injection" in request.POST:
	# Use raw SQL and string concatenation, resulting in SQL injection
	accounts = Account.objects.raw(f'SELECT * FROM atm_account as a, atm_client as c WHERE a.client_id=c.id and c.username="{uname}"')
	else:
	#!/usr/bin/python
	# -- coding: utf-8 --

	"""
	Extract features from htmls
	"""

	import sys, os, random, datetime
	from bs4 import BeautifulSoup
	# preprocess.py 檔案在這裡
	def getTopic(self, ai, imgtopic, Dict):
	# JW:　features 是準備要被預測 topic 的變數, 應該要挪到下方 prediction 開始前, 程式比較連貫
	#extrat the features of the element
	features = str(re.sub(' +', ' ', ' '.join(self.extract_features(ai, imgtopic, Dict, 1))))
	#print (features)

	# JW:　從這裡開始可以獨立切一個 function, 只呼叫一次, 把 train 好的 model 存起來
	#open training data file
	current_dir = os.path.dirname(_file_)
	corpus_dir = os.path.join(current_dir, 'corpus', 'all-corpus')
	with open('hahow_courses.json', 'r', encoding='utf-8') as f:
	courses = json.load(f)

	# 取出程式類課程的募資價/上線價/學生數，並顯示統計資料
	pre_order_prices = list()
	prices = list()
	tickets = list()
	lengths = list()
	for c in courses:
	if '55de81ac9d1fa51000f94770' in c['categories']:
	{
	"_id": "58744feda8aae907000d06c0",
	"categories": [
	"55de81ac9d1fa51000f94770",
	"55de81929d1fa51000f94769"
	],
	"coverImage": {
	"_id": "588421e46ecf3a0700b7a31d",
	"url": "https://hahow.in/images/588421e46ecf3a0700b7a31d"
	},
	def crawl():
	# 初始 API: https://api.hahow.in/api/courses?limit=12&status=PUBLISHED
	# 接續 API: https://api.hahow.in/api/courses?latestId=54d5a117065a7e0e00725ac0&latestValue=2015-03-27T15:38:27.187Z&limit=30&status=PUBLISHED
	headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
	'AppleWebKit/537.36 (KHTML, like Gecko) '
	'Chrome/59.0.3071.115 Safari/537.36'}
	url = 'https://api.hahow.in/api/courses'
	courses = list()
	resp_courses = requests.get(url + '?limit=30&status=PUBLISHED', headers=headers).json()
	while resp_courses: # 有回傳資料則繼續下一輪擷取
	[
	{
	"_id": "58d5c70c27ea7d070060160e",
	"categories": [
	"55de81ac9d1fa51000f94770",
	"55de81929d1fa51000f94769",
	"55de81879d1fa51000f94766"
	],
	"coverImage": {
	"_id": "58f318cc4909c907004ac575",
	import requests
	import json
	import time
	import numpy as np
	import os

	category = {
	'55de818a9d1fa51000f94767': '生活',
	'55de818d9d1fa51000f94768': '藝術',
	'55de819a9d1fa51000f9476b': '運動',
	[
	{
	"href": "/bbs/Beauty/M.1482072854.A.DDC.html",
	"num_image": 3,
	"push_count": 18,
	"title": "[神人] 長榮空姐"
	},
	{
	"href": "/bbs/Beauty/M.1482075654.A.C1D.html",
	"num_image": 7,