Last active
June 10, 2024 05:52
-
-
Save mahiya/26cf7eebb563871b2d3592f2dd62515f to your computer and use it in GitHub Desktop.
旧 Azure Document Intelligence SDK の使い方
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install azure-ai-formrecognizer==3.3.0 | |
import json | |
from azure.core.credentials import AzureKeyCredential | |
from azure.ai.formrecognizer import DocumentAnalysisClient | |
# Azure Document Intelligence のエンドポイントとキーを設定 | |
endpoint = "https://xxx.cognitiveservices.azure.com/" | |
key = "" | |
# 処理対象の PDF ファイルのパスを設定 | |
file_path = "sample.pdf" | |
# Document Intelligence を使用するためのクライアントを生成 | |
client = DocumentAnalysisClient(endpoint=endpoint, credential=AzureKeyCredential(key)) | |
# Document Intelligence で PDF ファイルを OCR 分析する | |
with open(file_path, "rb") as f: | |
poller = client.begin_analyze_document("prebuilt-layout", document=f, locale="ja-JP") | |
result = poller.result() | |
# OCR 分析結果を JSON ファイルに保存 | |
with open("ocr_result.json", "w", encoding="utf-8") as f: | |
json.dump(result.to_dict(), f, ensure_ascii=False, indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment