Skip to content

Instantly share code, notes, and snippets.

Jun-Wei Lin jwlin

Block or report user

Report or block jwlin

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View tutorial4_demo.py
import requests
import time
from bs4 import BeautifulSoup
import os
import re
import urllib.request
import json
PTT_URL = 'https://www.ptt.cc'
View tutorial3_demo.py
import requests
import time
from bs4 import BeautifulSoup
def get_web_page(url):
resp = requests.get(
url=url,
cookies={'over18': '1'}
)
View beautifulsoup_demo.py
from bs4 import BeautifulSoup
html_doc = """
<html>
<head>
<title>我是網頁標題</title>
<style>
.large {
color:blue;
text-align: center;
View requirement.txt
beautifulsoup4
cycler
matplotlib
numpy
pyparsing
python-dateutil
pytz
requests
six
View requests_demo.py
import requests
def get_web_page(url):
resp = requests.get(
url=url,
cookies={'over18': '1'}
)
if resp.status_code != 200:
print('Invalid url:', resp.url)
View generate_input.py
import os, json
from bs4 import BeautifulSoup
import preprocess
if __name__ == '__main__':
current_dir = os.path.dirname(__file__)
form_dir = os.path.join(current_dir, 'forms')
input_dir = os.path.join(current_dir, 'corpus', 'all-input')
input_types = ['text', 'email', 'password']
View feature_extraction.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Extract features from htmls
"""
import sys, os, random, datetime
from bs4 import BeautifulSoup
from preprocess import extract_features
You can’t perform that action at this time.