82 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			82 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
| #!/usr/bin/env python3
 | ||
| """
 | ||
| 使用 OpenAI GPT-4V 分析图片
 | ||
| 需要设置 OPENAI_API_KEY 环境变量
 | ||
| """
 | ||
| 
 | ||
| import base64
 | ||
| import os
 | ||
| from openai import OpenAI
 | ||
| 
 | ||
| def encode_image(image_path):
 | ||
|     """将图片编码为 base64"""
 | ||
|     with open(image_path, "rb") as image_file:
 | ||
|         return base64.b64encode(image_file.read()).decode('utf-8')
 | ||
| 
 | ||
| def analyze_image_with_gpt4v(image_path, prompt="请详细描述这张图片的内容"):
 | ||
|     """使用 GPT-4V 分析图片"""
 | ||
|     # 检查 API key
 | ||
|     api_key = os.getenv('OPENAI_API_KEY')
 | ||
|     if not api_key:
 | ||
|         return "请设置 OPENAI_API_KEY 环境变量"
 | ||
|     
 | ||
|     client = OpenAI(api_key=api_key)
 | ||
|     
 | ||
|     # 编码图片
 | ||
|     base64_image = encode_image(image_path)
 | ||
|     
 | ||
|     try:
 | ||
|         response = client.chat.completions.create(
 | ||
|             model="gpt-4o",  # 或 "gpt-4-vision-preview"
 | ||
|             messages=[
 | ||
|                 {
 | ||
|                     "role": "user",
 | ||
|                     "content": [
 | ||
|                         {"type": "text", "text": prompt},
 | ||
|                         {
 | ||
|                             "type": "image_url",
 | ||
|                             "image_url": {
 | ||
|                                 "url": f"data:image/png;base64,{base64_image}"
 | ||
|                             }
 | ||
|                         }
 | ||
|                     ]
 | ||
|                 }
 | ||
|             ],
 | ||
|             max_tokens=1000
 | ||
|         )
 | ||
|         
 | ||
|         return response.choices[0].message.content
 | ||
|         
 | ||
|     except Exception as e:
 | ||
|         return f"分析失败: {e}"
 | ||
| 
 | ||
| # 简单的本地图片描述工具
 | ||
| def describe_image_locally(image_path):
 | ||
|     """基于文件名和位置推测图片内容"""
 | ||
|     filename = os.path.basename(image_path)
 | ||
|     
 | ||
|     # 根据文件名模式推测内容
 | ||
|     descriptions = {
 | ||
|         "image-000": "可能是标题页或封面",
 | ||
|         "image-001": "可能是目录或章节导航",
 | ||
|         "image-002": "可能是地图或示意图",
 | ||
|         # 可以根据实际情况添加更多
 | ||
|     }
 | ||
|     
 | ||
|     base_name = filename.replace('.png', '')
 | ||
|     if base_name in descriptions:
 | ||
|         return descriptions[base_name]
 | ||
|     else:
 | ||
|         return f"图片 {filename},需要进一步分析"
 | ||
| 
 | ||
| if __name__ == "__main__":
 | ||
|     # 测试单张图片
 | ||
|     test_image = "images/0 序:令人又敬又畏的_忽里勒台_大会/image-000.png"
 | ||
|     if os.path.exists(test_image):
 | ||
|         print("本地描述:", describe_image_locally(test_image))
 | ||
|         
 | ||
|         # 如果有 OpenAI API key,尝试 GPT-4V
 | ||
|         if os.getenv('OPENAI_API_KEY'):
 | ||
|             print("GPT-4V 分析:", analyze_image_with_gpt4v(test_image))
 | ||
|         else:
 | ||
|             print("提示:设置 OPENAI_API_KEY 可使用 GPT-4V 分析") |