zed seozed

## mysql_connect.py
import pymysql.cursors

# Connect to the database
connection = pymysql.connect(host='localhost',
                             user='user',
                             password='passwd',
                             db='db',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

## clean html.py
from w3lib.html import remove_tags, strip_html5_whitespace
# keep参数为需要保留的标签名称
remove_tags(text, keep=('img',))


# 移除HTML标签，并删除前后的空白字符
def clean_tags(text, which_ones=(), keep=(), encoding=None) -> str:
  if not text:
    return None
  content = remove_tags(text, which_ones, keep, encoding)

## jsonpath.py
from jsonpath import jsonpath as _jsonpath


def jsonpath(obj, expr):
  """
  优先项：如果匹配到的结果只有一个，则直接pop出该结果
  """

    result = _jsonpath(obj, expr)
    if isinstance(result, list) and len(result) == 1:

## Convert unicode to normal string.py
import unicodedata
s = 'T-shirt\xa0\xa0短袖圆领衫,\u3000体恤衫\xa0买一件\t吧'
unicodedata.normalize('NFKC', s)
# T-shirt  短袖圆领衫, 体恤衫 买一件 吧

# 日常爬虫抓取数据中常遇到此类问题，使用率较高


    def unicode_normalize(unistr, form='NFKC'):
        """

## snippet in scrapy.py
from scrapy.http import Response

# 声明response类型，帮助IDE完成自动补全
def parse(self, response: Response):
      pass

## click_by_bounds.js
/** 通过元素的坐标进行点击事件
 *
 * @param {*} selector 元素选择器
 */
function click_by_bounds(selector){

    var b = selector.findOne().bounds();
    return click(b.centerX(), b.centerY());
}

## crate_decorator.py
from functools import wraps


def timethis(func):
    '''
    Create a decorator .
    '''

    @wraps(func)
    def wrapper(*args, **kwargs):

## mongo.js
db.getCollection("mastersportal_counrse").aggregate([

    {
        "$project": {
            "overview": {
                "$strLenCP": "$overview"
            },
            "about": {
                "$strLenCP": "$about"
            }

## ratelimit.py
from ratelimit import limits, sleep_and_retry

import requests

FIFTEEN_MINUTES = 900

@sleep_and_retry
@limits(calls=15, period=FIFTEEN_MINUTES)
# 900秒内最多请求15次。
def call_api(url):

## dictwriter_for_csv.py
import csv

# 字典列表
result_list = []
with open('weibo_data.csv', 'w', newline='', encoding='gbk') as csv_file:
    # header field
    fieldnames = result_list[0].keys()
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(result_list)
	import pymysql.cursors

	# Connect to the database
	connection = pymysql.connect(host='localhost',
	user='user',
	password='passwd',
	db='db',
	charset='utf8mb4',
	cursorclass=pymysql.cursors.DictCursor)
	from w3lib.html import remove_tags, strip_html5_whitespace
	# keep参数为需要保留的标签名称
	remove_tags(text, keep=('img',))


	# 移除HTML标签，并删除前后的空白字符
	def clean_tags(text, which_ones=(), keep=(), encoding=None) -> str:
	if not text:
	return None
	content = remove_tags(text, which_ones, keep, encoding)
	from jsonpath import jsonpath as _jsonpath


	def jsonpath(obj, expr):
	"""
	优先项：如果匹配到的结果只有一个，则直接pop出该结果
	"""

	result = _jsonpath(obj, expr)
	if isinstance(result, list) and len(result) == 1:
	import unicodedata
	s = 'T-shirt\xa0\xa0短袖圆领衫,\u3000体恤衫\xa0买一件\t吧'
	unicodedata.normalize('NFKC', s)
	# T-shirt 短袖圆领衫, 体恤衫买一件吧

	# 日常爬虫抓取数据中常遇到此类问题，使用率较高


	def unicode_normalize(unistr, form='NFKC'):
	"""
	from scrapy.http import Response

	# 声明response类型，帮助IDE完成自动补全
	def parse(self, response: Response):
	pass
	/** 通过元素的坐标进行点击事件
	*
	* @param {*} selector 元素选择器
	*/
	function click_by_bounds(selector){

	var b = selector.findOne().bounds();
	return click(b.centerX(), b.centerY());
	}
	from functools import wraps


	def timethis(func):
	'''
	Create a decorator .
	'''

	@wraps(func)
	def wrapper(args, *kwargs):
	db.getCollection("mastersportal_counrse").aggregate([

	{
	"$project": {
	"overview": {
	"$strLenCP": "$overview"
	},
	"about": {
	"$strLenCP": "$about"
	}
	from ratelimit import limits, sleep_and_retry

	import requests

	FIFTEEN_MINUTES = 900

	@sleep_and_retry
	@limits(calls=15, period=FIFTEEN_MINUTES)
	# 900秒内最多请求15次。
	def call_api(url):
	import csv

	# 字典列表
	result_list = []
	with open('weibo_data.csv', 'w', newline='', encoding='gbk') as csv_file:
	# header field
	fieldnames = result_list[0].keys()
	writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
	writer.writeheader()
	writer.writerows(result_list)