173 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			173 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
| #!/usr/bin/env python3
 | ||
| """
 | ||
| 大型图像分析工具
 | ||
| 用于分析《三体》项目的复杂图表结构
 | ||
| """
 | ||
| 
 | ||
| import os
 | ||
| import sys
 | ||
| from PIL import Image, ImageDraw, ImageFont
 | ||
| import numpy as np
 | ||
| from collections import Counter
 | ||
| import json
 | ||
| 
 | ||
| def analyze_image_basic_info(image_path):
 | ||
|     """分析图像基本信息"""
 | ||
|     print(f"正在分析图像: {image_path}")
 | ||
|     
 | ||
|     try:
 | ||
|         # 使用更节省内存的方式打开图像
 | ||
|         Image.MAX_IMAGE_PIXELS = None  # 移除像素数量限制
 | ||
|         
 | ||
|         with Image.open(image_path) as img:
 | ||
|             print(f"图像格式: {img.format}")
 | ||
|             print(f"图像模式: {img.mode}")
 | ||
|             print(f"图像尺寸: {img.size} (宽 x 高)")
 | ||
|             print(f"总像素数: {img.size[0] * img.size[1]:,}")
 | ||
|             
 | ||
|             # 计算文件大小
 | ||
|             file_size = os.path.getsize(image_path)
 | ||
|             print(f"文件大小: {file_size / (1024*1024):.1f} MB")
 | ||
|             
 | ||
|             return img
 | ||
|     except Exception as e:
 | ||
|         print(f"打开图像时出错: {e}")
 | ||
|         return None
 | ||
| 
 | ||
| def analyze_image_colors(img, sample_size=1000):
 | ||
|     """分析图像颜色分布"""
 | ||
|     print("\n=== 颜色分析 ===")
 | ||
|     
 | ||
|     # 将图像转换为RGB模式(如果不是的话)
 | ||
|     if img.mode != 'RGB':
 | ||
|         img = img.convert('RGB')
 | ||
|     
 | ||
|     # 采样分析(对于大图像,采样会更快)
 | ||
|     pixels = list(img.getdata())
 | ||
|     if len(pixels) > sample_size * sample_size:
 | ||
|         # 均匀采样
 | ||
|         step = len(pixels) // (sample_size * sample_size)
 | ||
|         pixels = pixels[::step]
 | ||
|     
 | ||
|     # 统计主要颜色
 | ||
|     color_counter = Counter(pixels)
 | ||
|     print(f"采样像素数: {len(pixels):,}")
 | ||
|     print("主要颜色 (RGB值, 出现次数):")
 | ||
|     
 | ||
|     for color, count in color_counter.most_common(10):
 | ||
|         percentage = (count / len(pixels)) * 100
 | ||
|         print(f"  RGB{color}: {count:,} 次 ({percentage:.1f}%)")
 | ||
| 
 | ||
| def detect_content_regions(img, threshold=240):
 | ||
|     """检测图像中的内容区域"""
 | ||
|     print("\n=== 内容区域检测 ===")
 | ||
|     
 | ||
|     # 转换为灰度图
 | ||
|     gray = img.convert('L')
 | ||
|     pixels = np.array(gray)
 | ||
|     
 | ||
|     # 找到非白色区域(假设白色背景)
 | ||
|     non_white = pixels < threshold
 | ||
|     
 | ||
|     # 找到边界
 | ||
|     rows, cols = np.where(non_white)
 | ||
|     if len(rows) > 0:
 | ||
|         min_row, max_row = rows.min(), rows.max()
 | ||
|         min_col, max_col = cols.min(), cols.max()
 | ||
|         
 | ||
|         print(f"内容区域边界:")
 | ||
|         print(f"  行范围: {min_row} - {max_row} (高度: {max_row - min_row + 1})")
 | ||
|         print(f"  列范围: {min_col} - {max_col} (宽度: {max_col - min_col + 1})")
 | ||
|         
 | ||
|         return (min_row, max_row, min_col, max_col)
 | ||
|     else:
 | ||
|         print("未检测到明显的内容区域")
 | ||
|         return None
 | ||
| 
 | ||
| def extract_text_regions(img, region_bounds=None):
 | ||
|     """提取可能的文本区域"""
 | ||
|     print("\n=== 文本区域分析 ===")
 | ||
|     
 | ||
|     # 如果指定了区域边界,只分析该区域
 | ||
|     if region_bounds:
 | ||
|         min_row, max_row, min_col, max_col = region_bounds
 | ||
|         img_cropped = img.crop((min_col, min_row, max_col, max_row))
 | ||
|     else:
 | ||
|         img_cropped = img
 | ||
|     
 | ||
|     # 转换为灰度图
 | ||
|     gray = img_cropped.convert('L')
 | ||
|     pixels = np.array(gray)
 | ||
|     
 | ||
|     # 简单的文本检测:寻找高对比度区域
 | ||
|     # 计算局部方差
 | ||
|     from scipy import ndimage
 | ||
|     
 | ||
|     # 使用sobel算子检测边缘
 | ||
|     sobel_x = ndimage.sobel(pixels, axis=1)
 | ||
|     sobel_y = ndimage.sobel(pixels, axis=0)
 | ||
|     edges = np.sqrt(sobel_x**2 + sobel_y**2)
 | ||
|     
 | ||
|     # 找到高边缘密度的区域
 | ||
|     edge_threshold = np.percentile(edges, 90)
 | ||
|     high_edge_regions = edges > edge_threshold
 | ||
|     
 | ||
|     # 统计高边缘区域
 | ||
|     high_edge_pixels = np.sum(high_edge_regions)
 | ||
|     total_pixels = pixels.size
 | ||
|     
 | ||
|     print(f"高边缘密度像素: {high_edge_pixels:,} / {total_pixels:,} ({high_edge_pixels/total_pixels*100:.1f}%)")
 | ||
|     
 | ||
|     return high_edge_regions
 | ||
| 
 | ||
| def create_overview_image(img, output_path="overview.png", max_dimension=2000):
 | ||
|     """创建图像概览(缩略图)"""
 | ||
|     print(f"\n=== 创建概览图像 ===")
 | ||
|     
 | ||
|     # 计算缩放比例
 | ||
|     scale = min(max_dimension / img.size[0], max_dimension / img.size[1], 1.0)
 | ||
|     
 | ||
|     if scale < 1.0:
 | ||
|         new_size = (int(img.size[0] * scale), int(img.size[1] * scale))
 | ||
|         print(f"缩放到: {new_size}")
 | ||
|         overview = img.resize(new_size, Image.Resampling.LANCZOS)
 | ||
|     else:
 | ||
|         overview = img.copy()
 | ||
|     
 | ||
|     # 保存概览图
 | ||
|     overview.save(output_path)
 | ||
|     print(f"概览图已保存: {output_path}")
 | ||
|     
 | ||
|     return overview
 | ||
| 
 | ||
| def main():
 | ||
|     """主函数"""
 | ||
|     image_path = "/home/ben/code/huhan3000/3body/三体结构3.drawio.png"
 | ||
|     
 | ||
|     print("=" * 50)
 | ||
|     print("《三体》项目大型图像分析工具")
 | ||
|     print("=" * 50)
 | ||
|     
 | ||
|     # 分析基本信息
 | ||
|     img = analyze_image_basic_info(image_path)
 | ||
|     if img is None:
 | ||
|         return
 | ||
|     
 | ||
|     # 分析颜色分布
 | ||
|     analyze_image_colors(img)
 | ||
|     
 | ||
|     # 检测内容区域
 | ||
|     regions = detect_content_regions(img)
 | ||
|     
 | ||
|     # 提取文本区域
 | ||
|     extract_text_regions(img, regions)
 | ||
|     
 | ||
|     # 创建概览图
 | ||
|     create_overview_image(img, "/home/ben/code/huhan3000/3body/overview.png")
 | ||
|     
 | ||
|     print("\n" + "=" * 50)
 | ||
|     print("分析完成!")
 | ||
|     print("=" * 50)
 | ||
| 
 | ||
| if __name__ == "__main__":
 | ||
|     main() |