Daily Log - 17/08/2024
MAK KA WAI
Posted on August 17, 2024
Since the Azure OpenAI Service Quota had been increased, I tried to use gpt-4o to read pdf and image. Success
import base64
import io
import os
import sys
from openai import AzureOpenAI
from PIL import Image
from pdf2image import convert_from_path
from dotenv import load_dotenv
load_dotenv()
POPPLER_PATH = r"E:\CustomPrograms\poppler-24.07.0\Library\bin"
# Azure OpenAI 設置
client = AzureOpenAI(
api_key = os.environ['AZURE_OPENAI_API_KEY'],
api_version = os.getenv('AZURE_OPENAI_API_VERSION'),
azure_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
)
development = azure_endpoint = os.getenv('AZURE_OPENAI_DEPLOYMENT')
def encode_image(image):
if isinstance(image, str): # 如果是文件路徑
with open(image, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
elif isinstance(image, Image.Image): # 如果是 PIL Image 對象
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='JPEG')
return base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
else:
raise ValueError("Unsupported image type")
def analyze_image_content(base64_image, prompt):
# 確保 prompt 是 UTF-8 編碼的字符串
prompt = prompt.encode('utf-8').decode('utf-8')
try:
response = client.chat.completions.create(
model=development,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
max_tokens=500
)
return response.choices[0].message.content
except Exception as e:
print(f"API 請求錯誤:{str(e)}")
return None
def analyze_image(image_path, prompt):
base64_image = encode_image(image_path)
return analyze_image_content(base64_image, prompt)
def analyze_pdf(pdf_path, prompt):
try:
images = convert_from_path(pdf_path, poppler_path=POPPLER_PATH)
except Exception as e:
print(f"錯誤:無法處理 PDF 文件。錯誤信息:{str(e)}")
print("請確保已正確安裝 Poppler 並設置了正確的路徑。")
print(f"當前設置的 Poppler 路徑:{POPPLER_PATH}")
return None
results = []
for i, image in enumerate(images):
base64_image = encode_image(image)
result = analyze_image_content(base64_image, f"{prompt} (Page {i+1})")
if result:
results.append(result)
return results
def analyze_document(file_path, prompt):
"""
分析文檔(PDF或圖像)並返回結果。
"""
_, file_extension = os.path.splitext(file_path)
if file_extension.lower() == '.pdf':
results = analyze_pdf(file_path, prompt)
if results:
for i, result in enumerate(results):
print(f"Page {i+1} analysis:")
print(result)
print("---")
elif file_extension.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.gif']:
result = analyze_image(file_path, prompt)
print("Image analysis:")
print(result)
else:
print("不支持的文件類型。請提供 PDF 或圖像文件。")
def main():
if len(sys.argv) < 2:
print("使用方法: python app.py <文件路徑>")
sys.exit(1)
file_path = sys.argv[1]
prompt = input("請輸入您的分析提示: ")
try:
analyze_document(file_path, prompt)
except FileNotFoundError:
print(f"錯誤:找不到文件 '{file_path}'。請確保文件路徑正確。")
except Exception as e:
print(f"發生錯誤: {e}")
if __name__ == "__main__":
main()
💖 💪 🙅 🚩
MAK KA WAI
Posted on August 17, 2024
Join Our Newsletter. No Spam, Only the good stuff.
Sign up to receive the latest update from our blog.
Related
privacy Caught in the Crunch My Journey from Snacks to 2 Million Exposed Users Privacy
November 30, 2024