mahiya/document_intelligence.py

## document_intelligence.py
# pip install azure-ai-formrecognizer==3.3.0
import json
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient

# Azure Document Intelligence のエンドポイントとキーを設定
endpoint = "https://xxx.cognitiveservices.azure.com/"
key = ""

# 処理対象の PDF ファイルのパスを設定
file_path = "sample.pdf"

# Document Intelligence を使用するためのクライアントを生成
client = DocumentAnalysisClient(endpoint=endpoint, credential=AzureKeyCredential(key))

# Document Intelligence で PDF ファイルを OCR 分析する
with open(file_path, "rb") as f:
    poller = client.begin_analyze_document("prebuilt-layout", document=f, locale="ja-JP")
    result = poller.result()

# OCR 分析結果を JSON ファイルに保存
with open("ocr_result.json", "w", encoding="utf-8") as f:
    json.dump(result.to_dict(), f, ensure_ascii=False, indent=4)
	# pip install azure-ai-formrecognizer==3.3.0
	import json
	from azure.core.credentials import AzureKeyCredential
	from azure.ai.formrecognizer import DocumentAnalysisClient

	# Azure Document Intelligence のエンドポイントとキーを設定
	endpoint = "https://xxx.cognitiveservices.azure.com/"
	key = ""

	# 処理対象の PDF ファイルのパスを設定
	file_path = "sample.pdf"

	# Document Intelligence を使用するためのクライアントを生成
	client = DocumentAnalysisClient(endpoint=endpoint, credential=AzureKeyCredential(key))

	# Document Intelligence で PDF ファイルを OCR 分析する
	with open(file_path, "rb") as f:
	poller = client.begin_analyze_document("prebuilt-layout", document=f, locale="ja-JP")
	result = poller.result()

	# OCR 分析結果を JSON ファイルに保存
	with open("ocr_result.json", "w", encoding="utf-8") as f:
	json.dump(result.to_dict(), f, ensure_ascii=False, indent=4)