Skip to content

Instantly share code, notes, and snippets.

View seozed's full-sized avatar
🌴
On vacation

zed seozed

🌴
On vacation
View GitHub Profile
@seozed
seozed / Convert unicode to normal string.py
Last active May 11, 2020 09:52
[转换unicode字符串为正常字符串] 如\xa0... #unicode
import unicodedata
s = 'T-shirt\xa0\xa0短袖圆领衫,\u3000体恤衫\xa0买一件\t吧'
unicodedata.normalize('NFKC', s)
# T-shirt 短袖圆领衫, 体恤衫 买一件 吧
# 日常爬虫抓取数据中常遇到此类问题,使用率较高
def unicode_normalize(unistr, form='NFKC'):
"""
@seozed
seozed / jsonpath.py
Last active May 11, 2020 06:19
[jsonpath] 优化提取方法 #jsonpath
from jsonpath import jsonpath as _jsonpath
def jsonpath(obj, expr):
"""
优先项:如果匹配到的结果只有一个,则直接pop出该结果
"""
result = _jsonpath(obj, expr)
if isinstance(result, list) and len(result) == 1:
@seozed
seozed / clean html.py
Last active May 11, 2020 06:07
优雅的过滤HTML
from w3lib.html import remove_tags, strip_html5_whitespace
# keep参数为需要保留的标签名称
remove_tags(text, keep=('img',))
# 移除HTML标签,并删除前后的空白字符
def clean_tags(text, which_ones=(), keep=(), encoding=None) -> str:
if not text:
return None
content = remove_tags(text, which_ones, keep, encoding)
@seozed
seozed / mysql_connect.py
Last active May 11, 2020 06:03
[Database connect in python] #mysql #python
import pymysql.cursors
# Connect to the database
connection = pymysql.connect(host='localhost',
user='user',
password='passwd',
db='db',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
@seozed
seozed / chunked.py
Created January 15, 2020 03:05
对可迭代的对象进行分块
def chunked(iterable, n):
from itertools import islice
from functools import partial
def take(n, iterable):
return list(islice(iterable, n))
return iter(partial(take, n, iter(iterable)), [])
@seozed
seozed / csharp.cs
Last active January 10, 2020 07:12
C# 笔记
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Net;
using System.IO;
namespace ConsoleApp1
{
@seozed
seozed / base62 convert.py
Last active January 10, 2020 07:10
62进制转换
ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
def base62_encode(num, alphabet=ALPHABET):
"""Encode a number in Base X
`num`: The number to encode
`alphabet`: The alphabet to use for encoding
"""
if (num == 0):
@seozed
seozed / thread.py
Last active August 16, 2018 09:29
高度抽象的多线程与多进程库
"""
多进程示例
"""
import time
from multiprocessing import Pool
def run(fn):
time.sleep(1)
print(fn)
@seozed
seozed / used_concurrent.futures.py
Created January 4, 2018 08:14
利用concurrent.futures实现多进程示例
import concurrent.futures
import math
PRIMES = [
112272535095293,
112582705942171,
112272535095293,
115280095190773,
115797848077099,
1099726899285419]
@seozed
seozed / multithreading_demo_1.py
Created January 4, 2018 08:11
一个高度抽象多线程模块示例
import concurrent.futures
import urllib.request
URLS = ['http://www.163.com/',
'http://www.qq.com/',
'http://www.baidu.com/',
'http://www.v2ex.com/',
'http://www.360.com']