Skip to content

Instantly share code, notes, and snippets.

@nan-wang
Created August 25, 2021 04:09
Show Gist options
  • Save nan-wang/4eb5cd9963165af2fc0463823816e6fc to your computer and use it in GitHub Desktop.
Save nan-wang/4eb5cd9963165af2fc0463823816e6fc to your computer and use it in GitHub Desktop.
check encoder outputs
from transformer_tf_text_encode import TransformerTFTextEncoder
from jina import Document, DocumentArray
encoder = TransformerTFTextEncoder(
pretrained_model_name_or_path='hfl/chinese-legal-electra-small-generator',
pooling_strategy='cls'
)
case_1 = {
'query': [
'买卖合同纠纷 裁定书',
'裁定书',
'买卖合同纠纷'],
'matches': [
'买卖合同纠纷一审行政裁定书',
'买卖合同纠纷二审行政裁定书',
'买卖合同纠纷二审行政判决书',
'一审被告金英玉买卖合同纠纷一案',
'陈日瑛与房屋买卖合同纠纷二审民事裁定书',
'买卖合同纠纷',
'买卖合同纠纷裁定书',
'买卖合同纠纷 裁定书',
'买卖合同纠纷 裁定书',
'劳动合同纠纷 裁定书',
'裁判书',
'审判书'
]}
target_case = case_1
docs = DocumentArray([Document(text=t) for t in target_case['matches']])
q_docs = DocumentArray([Document(text=t) for t in target_case['query']])
encoder.encode(docs=q_docs, parameters={})
encoder.encode(docs=docs, parameters={})
q_docs.match(docs, metric='cosine', limit=10)
for q in q_docs:
print('-'*20)
print(f'query: {q.text}')
for m in q.matches:
print(f'match: {m.text}, {m.scores["cosine"].value}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment