huhan3000/tools/ai-tools/scripts/openai_image_analyzer.py

82 lines
2.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
使用 OpenAI GPT-4V 分析图片
需要设置 OPENAI_API_KEY 环境变量
"""
import base64
import os
from openai import OpenAI
def encode_image(image_path):
"""将图片编码为 base64"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def analyze_image_with_gpt4v(image_path, prompt="请详细描述这张图片的内容"):
"""使用 GPT-4V 分析图片"""
# 检查 API key
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
return "请设置 OPENAI_API_KEY 环境变量"
client = OpenAI(api_key=api_key)
# 编码图片
base64_image = encode_image(image_path)
try:
response = client.chat.completions.create(
model="gpt-4o", # 或 "gpt-4-vision-preview"
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}"
}
}
]
}
],
max_tokens=1000
)
return response.choices[0].message.content
except Exception as e:
return f"分析失败: {e}"
# 简单的本地图片描述工具
def describe_image_locally(image_path):
"""基于文件名和位置推测图片内容"""
filename = os.path.basename(image_path)
# 根据文件名模式推测内容
descriptions = {
"image-000": "可能是标题页或封面",
"image-001": "可能是目录或章节导航",
"image-002": "可能是地图或示意图",
# 可以根据实际情况添加更多
}
base_name = filename.replace('.png', '')
if base_name in descriptions:
return descriptions[base_name]
else:
return f"图片 {filename},需要进一步分析"
if __name__ == "__main__":
# 测试单张图片
test_image = "images/0 序令人又敬又畏的_忽里勒台_大会/image-000.png"
if os.path.exists(test_image):
print("本地描述:", describe_image_locally(test_image))
# 如果有 OpenAI API key尝试 GPT-4V
if os.getenv('OPENAI_API_KEY'):
print("GPT-4V 分析:", analyze_image_with_gpt4v(test_image))
else:
print("提示:设置 OPENAI_API_KEY 可使用 GPT-4V 分析")