#!/usr/bin/env python3 """ 使用 OpenAI GPT-4V 分析图片 需要设置 OPENAI_API_KEY 环境变量 """ import base64 import os from openai import OpenAI def encode_image(image_path): """将图片编码为 base64""" with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def analyze_image_with_gpt4v(image_path, prompt="请详细描述这张图片的内容"): """使用 GPT-4V 分析图片""" # 检查 API key api_key = os.getenv('OPENAI_API_KEY') if not api_key: return "请设置 OPENAI_API_KEY 环境变量" client = OpenAI(api_key=api_key) # 编码图片 base64_image = encode_image(image_path) try: response = client.chat.completions.create( model="gpt-4o", # 或 "gpt-4-vision-preview" messages=[ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{base64_image}" } } ] } ], max_tokens=1000 ) return response.choices[0].message.content except Exception as e: return f"分析失败: {e}" # 简单的本地图片描述工具 def describe_image_locally(image_path): """基于文件名和位置推测图片内容""" filename = os.path.basename(image_path) # 根据文件名模式推测内容 descriptions = { "image-000": "可能是标题页或封面", "image-001": "可能是目录或章节导航", "image-002": "可能是地图或示意图", # 可以根据实际情况添加更多 } base_name = filename.replace('.png', '') if base_name in descriptions: return descriptions[base_name] else: return f"图片 {filename},需要进一步分析" if __name__ == "__main__": # 测试单张图片 test_image = "images/0 序:令人又敬又畏的_忽里勒台_大会/image-000.png" if os.path.exists(test_image): print("本地描述:", describe_image_locally(test_image)) # 如果有 OpenAI API key,尝试 GPT-4V if os.getenv('OPENAI_API_KEY'): print("GPT-4V 分析:", analyze_image_with_gpt4v(test_image)) else: print("提示:设置 OPENAI_API_KEY 可使用 GPT-4V 分析")