Skip to content

Instantly share code, notes, and snippets.

View seozed's full-sized avatar
🌴
On vacation

zed seozed

🌴
On vacation
View GitHub Profile
@seozed
seozed / mysql_connect.py
Last active May 11, 2020 06:03
[Database connect in python] #mysql #python
import pymysql.cursors
# Connect to the database
connection = pymysql.connect(host='localhost',
user='user',
password='passwd',
db='db',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
@seozed
seozed / clean html.py
Last active May 11, 2020 06:07
优雅的过滤HTML
from w3lib.html import remove_tags, strip_html5_whitespace
# keep参数为需要保留的标签名称
remove_tags(text, keep=('img',))
# 移除HTML标签,并删除前后的空白字符
def clean_tags(text, which_ones=(), keep=(), encoding=None) -> str:
if not text:
return None
content = remove_tags(text, which_ones, keep, encoding)
@seozed
seozed / jsonpath.py
Last active May 11, 2020 06:19
[jsonpath] 优化提取方法 #jsonpath
from jsonpath import jsonpath as _jsonpath
def jsonpath(obj, expr):
"""
优先项:如果匹配到的结果只有一个,则直接pop出该结果
"""
result = _jsonpath(obj, expr)
if isinstance(result, list) and len(result) == 1:
@seozed
seozed / Convert unicode to normal string.py
Last active May 11, 2020 09:52
[转换unicode字符串为正常字符串] 如\xa0... #unicode
import unicodedata
s = 'T-shirt\xa0\xa0短袖圆领衫,\u3000体恤衫\xa0买一件\t吧'
unicodedata.normalize('NFKC', s)
# T-shirt 短袖圆领衫, 体恤衫 买一件 吧
# 日常爬虫抓取数据中常遇到此类问题,使用率较高
def unicode_normalize(unistr, form='NFKC'):
"""
@seozed
seozed / snippet in scrapy.py
Last active May 20, 2020 09:07
[提升scrapy编码效率的片段]
from scrapy.http import Response
# 声明response类型,帮助IDE完成自动补全
def parse(self, response: Response):
pass
@seozed
seozed / click_by_bounds.js
Created May 20, 2020 09:16
[click_by_bounds] #autojs
/** 通过元素的坐标进行点击事件
*
* @param {*} selector 元素选择器
*/
function click_by_bounds(selector){
var b = selector.findOne().bounds();
return click(b.centerX(), b.centerY());
}
@seozed
seozed / crate_decorator.py
Created May 21, 2020 01:51
快速声明一个装饰器
from functools import wraps
def timethis(func):
'''
Create a decorator .
'''
@wraps(func)
def wrapper(*args, **kwargs):
@seozed
seozed / mongo.js
Last active May 21, 2020 10:32
mongo 统计字段的长度并汇总
db.getCollection("mastersportal_counrse").aggregate([
{
"$project": {
"overview": {
"$strLenCP": "$overview"
},
"about": {
"$strLenCP": "$about"
}
@seozed
seozed / ratelimit.py
Created June 8, 2020 08:06
[限制客户端请求速度的装饰器] #qps
from ratelimit import limits, sleep_and_retry
import requests
FIFTEEN_MINUTES = 900
@sleep_and_retry
@limits(calls=15, period=FIFTEEN_MINUTES)
# 900秒内最多请求15次。
def call_api(url):
@seozed
seozed / dictwriter_for_csv.py
Last active September 7, 2021 07:01
将字典列表写入到csv文件中
import csv
# 字典列表
result_list = []
with open('weibo_data.csv', 'w', newline='', encoding='gbk') as csv_file:
# header field
fieldnames = result_list[0].keys()
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(result_list)