Skip to content

Instantly share code, notes, and snippets.

View seozed's full-sized avatar
🌴
On vacation

zed seozed

🌴
On vacation
View GitHub Profile
@seozed
seozed / 词型还原
Created October 12, 2022 08:11
lemmatize.py
import nltk
nltk.download('omw-1.4')
nltk.download('punkt')
from nltk.stem import PorterStemmer
from nltk.stem import LancasterStemmer
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()
import os
@seozed
seozed / MongoDbConnection.py
Last active May 9, 2022 10:04
MongoDbConnection
from pymongo import MongoClient
class MongoDBConnection(object):
"""MongoDB Connection"""
def __init__(self, host='localhost', port=27017):
self.host = host
self.port = port
self.connection = None
def __enter__(self):
self.connection = MongoClient(self.host, self.port)
@seozed
seozed / ratelimit.py
Created June 8, 2020 08:06
[限制客户端请求速度的装饰器] #qps
from ratelimit import limits, sleep_and_retry
import requests
FIFTEEN_MINUTES = 900
@sleep_and_retry
@limits(calls=15, period=FIFTEEN_MINUTES)
# 900秒内最多请求15次。
def call_api(url):
@seozed
seozed / mongo.js
Last active May 21, 2020 10:32
mongo 统计字段的长度并汇总
db.getCollection("mastersportal_counrse").aggregate([
{
"$project": {
"overview": {
"$strLenCP": "$overview"
},
"about": {
"$strLenCP": "$about"
}
@seozed
seozed / crate_decorator.py
Created May 21, 2020 01:51
快速声明一个装饰器
from functools import wraps
def timethis(func):
'''
Create a decorator .
'''
@wraps(func)
def wrapper(*args, **kwargs):
@seozed
seozed / 遍历子控件.js
Last active February 25, 2024 08:12
[example-for-autojs] #autojs
var list = className("AbsListView").findOne();
for(var i = 0; i < list.childCount(); i++){
var child = list.child(i);
log(child.className());
}
@seozed
seozed / click_by_bounds.js
Created May 20, 2020 09:16
[click_by_bounds] #autojs
/** 通过元素的坐标进行点击事件
*
* @param {*} selector 元素选择器
*/
function click_by_bounds(selector){
var b = selector.findOne().bounds();
return click(b.centerX(), b.centerY());
}
@seozed
seozed / snippet in scrapy.py
Last active May 20, 2020 09:07
[提升scrapy编码效率的片段]
from scrapy.http import Response
# 声明response类型,帮助IDE完成自动补全
def parse(self, response: Response):
pass
@seozed
seozed / Convert unicode to normal string.py
Last active May 11, 2020 09:52
[转换unicode字符串为正常字符串] 如\xa0... #unicode
import unicodedata
s = 'T-shirt\xa0\xa0短袖圆领衫,\u3000体恤衫\xa0买一件\t吧'
unicodedata.normalize('NFKC', s)
# T-shirt 短袖圆领衫, 体恤衫 买一件 吧
# 日常爬虫抓取数据中常遇到此类问题,使用率较高
def unicode_normalize(unistr, form='NFKC'):
"""
@seozed
seozed / jsonpath.py
Last active May 11, 2020 06:19
[jsonpath] 优化提取方法 #jsonpath
from jsonpath import jsonpath as _jsonpath
def jsonpath(obj, expr):
"""
优先项:如果匹配到的结果只有一个,则直接pop出该结果
"""
result = _jsonpath(obj, expr)
if isinstance(result, list) and len(result) == 1: