AI搜索開放平臺支持通過SDK的方式調用文檔內容解析服務。
前提條件
已開通AI搜索開放平臺服務,具體操作請參見開通服務。
已通過API-KEY完成身份鑒權,獲取鑒權信息請參見獲取API-KEY。
參數說明
請求體body最大不能超過8MB。
更多參數說明請參見文檔內容解析。
import base64
import os
import time
from alibabacloud_tea_openapi.models import Config
from alibabacloud_searchplat20240529.client import Client
from alibabacloud_searchplat20240529.models import (
CreateDocumentAnalyzeTaskRequestDocument,
CreateDocumentAnalyzeTaskRequestOutput,
CreateDocumentAnalyzeTaskRequest,
CreateDocumentAnalyzeTaskResponse,
GetDocumentAnalyzeTaskStatusRequest,
GetDocumentAnalyzeTaskStatusResponse
)
if __name__ == '__main__':
config = Config(bearer_token="替換為您的API-KEY",
# endpoint: 配置統一的請求入口 需要去掉http://
endpoint="替換API訪問地址",
# 支持 protocol 配置 HTTPS/HTTP
protocol="http")
client = Client(config=config)
# URL模式
document = CreateDocumentAnalyzeTaskRequestDocument(
url="http://test.pdf",
file_type="pdf"
)
# 本地模式,需要額外指定file_name
# file_path = "path/to/xxx.pdf"
# document = CreateDocumentAnalyzeTaskRequestDocument(
# content=base64.b64encode(open(file_path,'rb').read()).decode(),
# file_name=os.path.basename(file_path)
# )
output = CreateDocumentAnalyzeTaskRequestOutput(image_storage="url")
request = CreateDocumentAnalyzeTaskRequest(document=document, output=output)
# default:替換工作空間名稱, ops-document-analyze-001: 服務id
response: CreateDocumentAnalyzeTaskResponse = client.create_document_analyze_task(
"default", "ops-document-analyze-001", request)
task_id = response.body.result.task_id
print("task_id: " + task_id)
request = GetDocumentAnalyzeTaskStatusRequest(task_id=task_id)
while True:
response: GetDocumentAnalyzeTaskStatusResponse = client.get_document_analyze_task_status(
"default", "ops-document-analyze-001", request)
status = response.body.result.status
print("status: " + status)
if status == "PENDING":
time.sleep(5)
elif status == "SUCCESS":
data = response.body.result.data
usage = response.body.usage
print("content:\n" + data.content[:1000] + "\n")
print("page count: " + str(data.page_num))
print("usage: " + str(usage))
break
else:
print(response.body.result)
break
文檔內容是否對您有幫助?