Created
August 25, 2021 04:09
-
-
Save nan-wang/4eb5cd9963165af2fc0463823816e6fc to your computer and use it in GitHub Desktop.
check encoder outputs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformer_tf_text_encode import TransformerTFTextEncoder | |
from jina import Document, DocumentArray | |
encoder = TransformerTFTextEncoder( | |
pretrained_model_name_or_path='hfl/chinese-legal-electra-small-generator', | |
pooling_strategy='cls' | |
) | |
case_1 = { | |
'query': [ | |
'买卖合同纠纷 裁定书', | |
'裁定书', | |
'买卖合同纠纷'], | |
'matches': [ | |
'买卖合同纠纷一审行政裁定书', | |
'买卖合同纠纷二审行政裁定书', | |
'买卖合同纠纷二审行政判决书', | |
'一审被告金英玉买卖合同纠纷一案', | |
'陈日瑛与房屋买卖合同纠纷二审民事裁定书', | |
'买卖合同纠纷', | |
'买卖合同纠纷裁定书', | |
'买卖合同纠纷 裁定书', | |
'买卖合同纠纷 裁定书', | |
'劳动合同纠纷 裁定书', | |
'裁判书', | |
'审判书' | |
]} | |
target_case = case_1 | |
docs = DocumentArray([Document(text=t) for t in target_case['matches']]) | |
q_docs = DocumentArray([Document(text=t) for t in target_case['query']]) | |
encoder.encode(docs=q_docs, parameters={}) | |
encoder.encode(docs=docs, parameters={}) | |
q_docs.match(docs, metric='cosine', limit=10) | |
for q in q_docs: | |
print('-'*20) | |
print(f'query: {q.text}') | |
for m in q.matches: | |
print(f'match: {m.text}, {m.scores["cosine"].value}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment