Skip to content

Instantly share code, notes, and snippets.

@monsterxcn
Last active September 8, 2021 14:15
Show Gist options
  • Save monsterxcn/e5f4aed7c5346df3587e5417ee0fcbe7 to your computer and use it in GitHub Desktop.
Save monsterxcn/e5f4aed7c5346df3587e5417ee0fcbe7 to your computer and use it in GitHub Desktop.
Python 利用腾讯云 OCR 处理圣遗物截图并自行计算圣遗物评分
# -*- coding: utf-8 -*-
import asyncio, difflib, hashlib, hmac, json, re, time
from datetime import datetime
from httpx import AsyncClient
secretId = "AKIDxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
secretKey = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
imageUrl = "https://gchat.qpic.cn/gchatpic_new/38877256/957765668-2255829091-2FBAE655FCB15D93E873F83E3D361B90/0?term=255"
nameList = [
"角斗士的留恋", "角斗士的归宿", "角斗士的希冀", "角斗士的酣醉", "角斗士的凯旋",
"乐团的晨光", "琴师的箭羽", "终幕的时计", "吟游者之壶", "指挥的礼帽",
"饰金胸花", "追忆之风", "坚铜罗盘", "沉波之盏", "酒渍船帽",
"魔女的炎之花", "魔女常燃之羽", "魔女破灭之时", "魔女的心之火", "焦灼的魔女帽",
"渡火者的决绝", "渡火者的解脱", "渡火者的煎熬", "渡火者的醒悟", "渡火者的智慧",
"历经风雪的思念", "摧冰而行的执望", "冰雪故园的终期", "遍结寒霜的傲骨", "破冰踏雪的回音",
"雷鸟的怜悯", "雷灾的孑遗", "雷霆的时计", "降雷的凶兆", "唤雷的头冠",
"平雷之心", "平雷之羽", "平雷之刻", "平雷之器", "平雷之冠",
"磐陀裂生之花", "嵯峨群峰之翼", "星罗圭璧之晷", "巉岩琢塑之樽", "不动玄石之相",
"夏祭之花", "夏祭终末", "夏祭之刻", "夏祭水玉", "夏祭之面",
"野花记忆的绿野", "猎人青翠的箭羽", "翠绿猎人的笃定", "翠绿猎人的容器", "翠绿的猎人之冠",
"远方的少女之心", "少女飘摇的思念", "少女苦短的良辰", "少女片刻的闲暇", "少女易逝的芳颜",
"宗室之花", "宗室之翎", "宗室时计", "宗室银瓮", "宗室面具",
"染血的铁之心", "染血的黑之羽", "骑士染血之时", "染血骑士之杯", "染血的铁假面",
"无垢之花", "贤医之羽", "停摆之刻", "超越之盏", "嗤笑之面",
"勋绩之花", "昭武翎羽", "金铜时晷", "盟誓金爵", "将帅兜鍪",
"羁缠之花", "思忆之矢", "朝露之时", "祈望之心", "无常之面",
"明威之镡", "切落之羽", "雷云之笼", "绯花之壶", "华饰之兜"
]
posList = ["生之花", "死之羽", "时之沙", "空之杯", "理之冠"]
entryList = [
"攻击力", "防御力", "生命值", "元素充能效率", "元素精通", "暴击率", "暴击伤害", "治疗加成",
"水元素伤害加成", "冰元素伤害加成", "火元素伤害加成", "雷元素伤害加成", "岩元素伤害加成", "风元素伤害加成", "物理伤害加成"
]
entryType = {
"攻击力": "atk", "防御力": "df", "生命值": "hp", "元素充能效率": "er", "元素精通": "em", "暴击率": "cr", "暴击伤害": "cd", "治疗加成": "heal",
"水元素伤害加成": "hydro", "冰元素伤害加成": "cryo", "火元素伤害加成": "pyro", "雷元素伤害加成": "elec", "岩元素伤害加成": "geo", "风元素伤害加成": "anemo", "物理伤害加成": "phys",
# [自行计算] 以下为自行计算使用,非可莉特调数据
"攻击力固定值": "atks", "防御力固定值": "dfs", "生命值固定值": "hps"
}
# 圣遗物信息格式
atfInfo = {
"name": "", "pos": "", "star": 5, "level": 20,
"main_item": {"type": "", "name": "", "value": ""},
"sub_item": [
{"type": "", "name": "", "value": ""},
{"type": "", "name": "", "value": ""},
{"type": "", "name": "", "value": ""},
{"type": "", "name": "", "value": ""},
]
}
# 根据 OCR 结果和置信度写入圣遗物信息,返回圣遗物信息 atfInfo 和完成度 completed
# 对圣遗物的纠错采用 difflib.get_close_matches 获取数组中最接近的一个
async def artifact(atfInfo, detectedText, confidence, completed):
# 先去除副词条前可能误识别的 ,。 符号,腾讯云识别暂未遇到其他字符
txtCorrect = detectedText.replace(",", "").replace("。", "")
# 按照识图结果顺序写入圣遗物信息
# ① 圣遗物名称
if completed == 0:
if confidence < 90:
txtCorrect = difflib.get_close_matches(txtCorrect, nameList, 1, cutoff=0.6)[0]
atfInfo["name"] = txtCorrect
# ② 圣遗物位置
if completed == 1:
if confidence < 90:
txtCorrect = difflib.get_close_matches(txtCorrect, posList, 1, cutoff=0.6)[0]
atfInfo["pos"] = txtCorrect
# ③ 圣遗物主词条
if completed == 2:
if confidence < 90:
txtCorrect = difflib.get_close_matches(txtCorrect, entryList, 1, cutoff=0.6)[0]
atfInfo["main_item"]["name"] = txtCorrect
# ④ 圣遗物主词条属性值
if completed == 3:
atfInfo["main_item"]["value"] = txtCorrect.replace(",", "")
# [自行计算] 区分主词条攻击力固定值属性和攻击力百分比属性等
if (atfInfo["main_item"]["name"] in ["攻击力", "防御力", "生命值"]) and ("%" not in atfInfo["main_item"]["value"]):
atfInfo["main_item"]["type"] = entryType[atfInfo["main_item"]["name"]] + "s"
else:
atfInfo["main_item"]["type"] = entryType[atfInfo["main_item"]["name"]]
# ⑤ 圣遗物等级
if completed == 4:
atfInfo["level"] = int(txtCorrect.replace("+", ""))
# ⑥ 圣遗物副词条
if completed == 5 or completed == 6 or completed == 7 or completed == 8:
matchObj = re.match(r'(.*)\+(.*)', txtCorrect)
if not matchObj:
return atfInfo, completed
# 去除副词条属性中可能出现的 , 符号,例如生命值固定值加到了 1000+ 😅
(subName, subValue) = (matchObj.group(1), matchObj.group(2).replace(",", ""))
if confidence < 90:
subName = difflib.get_close_matches(subName, entryList, 1, cutoff=0.6)[0]
atfInfo["sub_item"][completed - 5]["name"] = subName
atfInfo["sub_item"][completed - 5]["value"] = subValue
# [自行计算] 区分副词条攻击力固定值属性和攻击力百分比属性等
if (subName in ["攻击力", "防御力", "生命值"]) and ("%" not in subValue):
atfInfo["sub_item"][completed - 5]["type"] = entryType[subName] + "s"
else:
atfInfo["sub_item"][completed - 5]["type"] = entryType[subName]
# 执行一次后完成度 + 1
completed += 1
return atfInfo, completed
# [自行计算] 圣遗物副词条评分
async def calculate(atfInfo):
def formula(atf):
# 公式来源《[心得交流] [原神冒险团] 圣遗物评分体系》
# https://ngabbs.com/read.php?tid=23802190
return atf["cr"]*1.5 + atf["heal"]*1.3 + (atf["atk"]+atf["elem"]+atf["hp"])*1.0 + atf["er"]*0.9\
+ (atf["phys"]+atf["df"])*0.80 + atf["cd"]*0.75 + atf["em"]*0.25\
+ (1.0*atf["atks"]/807)*100 + (1.0*atf["hps"]/5000)*100 + (0.8*atf["dfs"]/500)*100
# 所有词条属性值初始赋值为零,后续写入所有数值均不包含 %
temp = {
"atk": 0, "atks": 0, "df": 0, "dfs": 0, "hp": 0, "hps": 0,
"cr": 0, "cd": 0, "er": 0, "em": 0, "heal": 0, "phys": 0, "elem": 0
}
try:
# # 不能计算主词条,否则例如花主词条的生命值将严重影响最终结果
# # 词条为各种元素伤害加成时写入 temp["elem"],其他正常写入
# if atfInfo["main_item"]["type"] in ["hydro", "cryo", "pyro", "elec", "geo", "anemo"]:
# temp["elem"] = float(atfInfo["main_item"]["value"].replace("%",""))
# else:
# temp[atfInfo["main_item"]["type"]] = float(atfInfo["main_item"]["value"].replace("%",""))
# 遍历副词条
for entry in atfInfo["sub_item"]:
if entry["type"] in ["hydro", "cryo", "pyro", "elec", "geo", "anemo"]:
temp["elem"] = float(entry["value"].replace("%",""))
else:
temp[entry["type"]] = float(entry["value"].replace("%",""))
# 按公式计算评分
score = formula(temp)
# 取两位有效数字
scoreStr = str(score).split('.')[0] + '.' + str(score).split('.')[1][:2] + "%"
except Exception as e:
scoreStr = f"副词条评分出错了!\n{str(e)}"
return scoreStr
# 参考 https://cloud.tencent.com/document/api/866/33519#Python 获取腾讯云接口访问签名
async def tencentAccurateOCR(secretId, secretKey, imageUrl):
service = "ocr" # 通用印刷体识别(高精度版)接口请求的服务
host = "ocr.tencentcloudapi.com" # 通用印刷体识别(高精度版)接口请求域名
region = "ap-beijing"
action = "GeneralAccurateOCR" # 通用印刷体识别(高精度版)接口取值:GeneralAccurateOCR
version = "2018-11-19" # 通用印刷体识别(高精度版)接口取值:2018-11-19
algorithm = "TC3-HMAC-SHA256" # 腾讯云签名算法,目前固定为 TC3-HMAC-SHA256
timestamp = int(time.time())
date = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")
# 通用印刷体识别(高精度版)接口请求参数
# https://cloud.tencent.com/document/product/866/34937#2.-.E8.BE.93.E5.85.A5.E5.8F.82.E6.95.B0
params = {"ImageUrl": imageUrl}
payload = json.dumps(params) # HTTP 请求正文
## 拼接规范请求串
httpRequestMethod = "POST" # HTTP 请求方法
canonicalUri = "/" # URI 参数,API 3.0 固定为正斜杠
canonicalQuerystring = "" # 发起 HTTP 请求 URL 中的查询字符串,对于 POST 请求,固定为空字符串
ct = "application/json; charset=utf-8" # HTTP 请求 Content-Type Header
canonicalHeaders = "content-type:%s\nhost:%s\n" % (ct, host) # 参与签名的头部信息
signedHeaders = "content-type;host" # 参与签名的头部信息,说明此次请求有哪些头部参与了签名
# 对 HTTP 请求正文做 SHA256 哈希,然后十六进制编码,最后编码串转换成小写字母
hashedRequestPayload = hashlib.sha256(payload.encode("utf-8")).hexdigest()
# 规范请求串
canonicalRequest = (httpRequestMethod + "\n" + canonicalUri + "\n" + canonicalQuerystring + "\n" +
canonicalHeaders + "\n" + signedHeaders + "\n" + hashedRequestPayload)
## 拼接待签名字符串
# 凭证范围,格式为 Date/service/tc3_request,即日期、所请求的服务和终止字符串(tc3_request)
credentialScope = date + "/" + service + "/" + "tc3_request"
# 前述步骤拼接所得 canonicalRequest 规范请求串的哈希值
hashedCanonicalRequest = hashlib.sha256(canonicalRequest.encode("utf-8")).hexdigest()
# 待签名字符串
string2Sign = algorithm + "\n" + str(timestamp) + "\n" + credentialScope + "\n" + hashedCanonicalRequest
## 计算签名
# 计算签名摘要函数
def sign(key, msg):
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
secretDate = sign(("TC3" + secretKey).encode("utf-8"), date)
secretService = sign(secretDate, service)
secretSigning = sign(secretService, "tc3_request")
signature = hmac.new(secretSigning, string2Sign.encode("utf-8"), hashlib.sha256).hexdigest()
## 拼接 Authorization
authorization = (algorithm + " " + "Credential=" + secretId + "/" + credentialScope + ", " +
"SignedHeaders=" + signedHeaders + ", " + "Signature=" + signature)
## 整理需要的输出数据
endpoint = "https://" + host
headers = {
"Authorization": authorization,
"Content-Type": "application/json; charset=utf-8",
"Host": host,
"X-TC-Action": action,
"X-TC-Timestamp": str(timestamp),
"X-TC-Version": version,
"X-TC-Region": region,
"X-TC-Language": "zh-CN"
}
return endpoint, headers, payload
async def main():
# 获取腾讯云 OCR 参数并发起请求
ocrUrl, postHeaders, postData = await tencentAccurateOCR(secretId, secretKey, imageUrl)
async with AsyncClient() as client:
try:
res = await client.post(ocrUrl, headers=postHeaders, data=postData, timeout=10)
resJson = res.json()
except Exception as e:
print(f"未能成功识别图片!\n{str(e)}")
# 腾讯云 OCR 识别结果
print(resJson)
# {
# 'Response': {
# 'Angel': 359.989990234375,
# 'RequestId': 'c1545a58-bfd7-4b07-8d76-186088a09012',
# 'TextDetections': [
# {
# 'AdvancedInfo': '{"Parag":{"ParagNo":1}}',
# 'Confidence': 99,
# 'DetectedText': '角斗士的酣醉',
# 'ItemPolygon': {'Height': 36, 'Width': 201, 'X': 19, 'Y': 29},
# 'Polygon': [{'X': 19, 'Y': 29}, {'X': 220, 'Y': 29}, {'X': 220, 'Y': 65}, {'X': 19, 'Y': 65}],
# 'WordCoordPoint': [],
# 'Words': []
# }, {
# 'AdvancedInfo': '{"Parag":{"ParagNo":2}}',
# 'Confidence': 99,
# 'DetectedText': '空之杯',
# 'ItemPolygon': {'Height': 27, 'Width': 67, 'X': 18, 'Y': 85},
# 'Polygon': [{'X': 18, 'Y': 85}, {'X': 85, 'Y': 82}, {'X': 86, 'Y': 109}, {'X': 19, 'Y': 112}],
# 'WordCoordPoint': [],
# 'Words': []
# }, {
# ...
# }
# ]
# }
# }
# 处理识图结果
completed = 0
for text in resJson["Response"]["TextDetections"]:
result, completed = await artifact(atfInfo, text["DetectedText"], text["Confidence"], completed)
if completed == 9:
break
# 识图不完整等异常处理
# ...
# 处理后圣遗物信息
print(result)
# {
# 'name': '角斗士的酣醉', 'pos': '空之杯', 'star': 5, 'level': 20,
# 'main_item': {'type': 'phys', 'name': '物理伤害加成', 'value': '58.3%'},
# 'sub_item': [
# {'type': 'em', 'name': '元素精通', 'value': '19'},
# {'type': 'cd', 'name': '暴击伤害', 'value': '33.4%'},
# {'type': 'df', 'name': '防御力', 'value': '5.1%'},
# {'type': 'atk', 'name': '攻击力', 'value': '8.7%'}
# ]
# }
score = await calculate(result)
print(score)
# 38.44%
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment