82 lines
2.6 KiB
Python
82 lines
2.6 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
使用 OpenAI GPT-4V 分析图片
|
||
需要设置 OPENAI_API_KEY 环境变量
|
||
"""
|
||
|
||
import base64
|
||
import os
|
||
from openai import OpenAI
|
||
|
||
def encode_image(image_path):
|
||
"""将图片编码为 base64"""
|
||
with open(image_path, "rb") as image_file:
|
||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||
|
||
def analyze_image_with_gpt4v(image_path, prompt="请详细描述这张图片的内容"):
|
||
"""使用 GPT-4V 分析图片"""
|
||
# 检查 API key
|
||
api_key = os.getenv('OPENAI_API_KEY')
|
||
if not api_key:
|
||
return "请设置 OPENAI_API_KEY 环境变量"
|
||
|
||
client = OpenAI(api_key=api_key)
|
||
|
||
# 编码图片
|
||
base64_image = encode_image(image_path)
|
||
|
||
try:
|
||
response = client.chat.completions.create(
|
||
model="gpt-4o", # 或 "gpt-4-vision-preview"
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": prompt},
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": f"data:image/png;base64,{base64_image}"
|
||
}
|
||
}
|
||
]
|
||
}
|
||
],
|
||
max_tokens=1000
|
||
)
|
||
|
||
return response.choices[0].message.content
|
||
|
||
except Exception as e:
|
||
return f"分析失败: {e}"
|
||
|
||
# 简单的本地图片描述工具
|
||
def describe_image_locally(image_path):
|
||
"""基于文件名和位置推测图片内容"""
|
||
filename = os.path.basename(image_path)
|
||
|
||
# 根据文件名模式推测内容
|
||
descriptions = {
|
||
"image-000": "可能是标题页或封面",
|
||
"image-001": "可能是目录或章节导航",
|
||
"image-002": "可能是地图或示意图",
|
||
# 可以根据实际情况添加更多
|
||
}
|
||
|
||
base_name = filename.replace('.png', '')
|
||
if base_name in descriptions:
|
||
return descriptions[base_name]
|
||
else:
|
||
return f"图片 {filename},需要进一步分析"
|
||
|
||
if __name__ == "__main__":
|
||
# 测试单张图片
|
||
test_image = "images/0 序:令人又敬又畏的_忽里勒台_大会/image-000.png"
|
||
if os.path.exists(test_image):
|
||
print("本地描述:", describe_image_locally(test_image))
|
||
|
||
# 如果有 OpenAI API key,尝试 GPT-4V
|
||
if os.getenv('OPENAI_API_KEY'):
|
||
print("GPT-4V 分析:", analyze_image_with_gpt4v(test_image))
|
||
else:
|
||
print("提示:设置 OPENAI_API_KEY 可使用 GPT-4V 分析") |