重构程序文件目录结构并更新相关路径引用
- 创建新的目录结构:research/、tools/(含子目录)和apps/ - 移动核心理论文件到research/core-theory/ - 移动天山理论文件到research/specialized/ - 重组tools/目录为多个子目录:content-generation/、data-processing/等 - 更新所有文档中的路径引用,包括README.md、项目结构说明.md等 - 更新工作流文件和脚本中的路径引用 - 更新文档索引文件中的路径引用
This commit is contained in:
173
tools/data-processing/image-processing/analyze_large_image.py
Normal file
173
tools/data-processing/image-processing/analyze_large_image.py
Normal file
@@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
大型图像分析工具
|
||||
用于分析《三体》项目的复杂图表结构
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import numpy as np
|
||||
from collections import Counter
|
||||
import json
|
||||
|
||||
def analyze_image_basic_info(image_path):
|
||||
"""分析图像基本信息"""
|
||||
print(f"正在分析图像: {image_path}")
|
||||
|
||||
try:
|
||||
# 使用更节省内存的方式打开图像
|
||||
Image.MAX_IMAGE_PIXELS = None # 移除像素数量限制
|
||||
|
||||
with Image.open(image_path) as img:
|
||||
print(f"图像格式: {img.format}")
|
||||
print(f"图像模式: {img.mode}")
|
||||
print(f"图像尺寸: {img.size} (宽 x 高)")
|
||||
print(f"总像素数: {img.size[0] * img.size[1]:,}")
|
||||
|
||||
# 计算文件大小
|
||||
file_size = os.path.getsize(image_path)
|
||||
print(f"文件大小: {file_size / (1024*1024):.1f} MB")
|
||||
|
||||
return img
|
||||
except Exception as e:
|
||||
print(f"打开图像时出错: {e}")
|
||||
return None
|
||||
|
||||
def analyze_image_colors(img, sample_size=1000):
|
||||
"""分析图像颜色分布"""
|
||||
print("\n=== 颜色分析 ===")
|
||||
|
||||
# 将图像转换为RGB模式(如果不是的话)
|
||||
if img.mode != 'RGB':
|
||||
img = img.convert('RGB')
|
||||
|
||||
# 采样分析(对于大图像,采样会更快)
|
||||
pixels = list(img.getdata())
|
||||
if len(pixels) > sample_size * sample_size:
|
||||
# 均匀采样
|
||||
step = len(pixels) // (sample_size * sample_size)
|
||||
pixels = pixels[::step]
|
||||
|
||||
# 统计主要颜色
|
||||
color_counter = Counter(pixels)
|
||||
print(f"采样像素数: {len(pixels):,}")
|
||||
print("主要颜色 (RGB值, 出现次数):")
|
||||
|
||||
for color, count in color_counter.most_common(10):
|
||||
percentage = (count / len(pixels)) * 100
|
||||
print(f" RGB{color}: {count:,} 次 ({percentage:.1f}%)")
|
||||
|
||||
def detect_content_regions(img, threshold=240):
|
||||
"""检测图像中的内容区域"""
|
||||
print("\n=== 内容区域检测 ===")
|
||||
|
||||
# 转换为灰度图
|
||||
gray = img.convert('L')
|
||||
pixels = np.array(gray)
|
||||
|
||||
# 找到非白色区域(假设白色背景)
|
||||
non_white = pixels < threshold
|
||||
|
||||
# 找到边界
|
||||
rows, cols = np.where(non_white)
|
||||
if len(rows) > 0:
|
||||
min_row, max_row = rows.min(), rows.max()
|
||||
min_col, max_col = cols.min(), cols.max()
|
||||
|
||||
print(f"内容区域边界:")
|
||||
print(f" 行范围: {min_row} - {max_row} (高度: {max_row - min_row + 1})")
|
||||
print(f" 列范围: {min_col} - {max_col} (宽度: {max_col - min_col + 1})")
|
||||
|
||||
return (min_row, max_row, min_col, max_col)
|
||||
else:
|
||||
print("未检测到明显的内容区域")
|
||||
return None
|
||||
|
||||
def extract_text_regions(img, region_bounds=None):
|
||||
"""提取可能的文本区域"""
|
||||
print("\n=== 文本区域分析 ===")
|
||||
|
||||
# 如果指定了区域边界,只分析该区域
|
||||
if region_bounds:
|
||||
min_row, max_row, min_col, max_col = region_bounds
|
||||
img_cropped = img.crop((min_col, min_row, max_col, max_row))
|
||||
else:
|
||||
img_cropped = img
|
||||
|
||||
# 转换为灰度图
|
||||
gray = img_cropped.convert('L')
|
||||
pixels = np.array(gray)
|
||||
|
||||
# 简单的文本检测:寻找高对比度区域
|
||||
# 计算局部方差
|
||||
from scipy import ndimage
|
||||
|
||||
# 使用sobel算子检测边缘
|
||||
sobel_x = ndimage.sobel(pixels, axis=1)
|
||||
sobel_y = ndimage.sobel(pixels, axis=0)
|
||||
edges = np.sqrt(sobel_x**2 + sobel_y**2)
|
||||
|
||||
# 找到高边缘密度的区域
|
||||
edge_threshold = np.percentile(edges, 90)
|
||||
high_edge_regions = edges > edge_threshold
|
||||
|
||||
# 统计高边缘区域
|
||||
high_edge_pixels = np.sum(high_edge_regions)
|
||||
total_pixels = pixels.size
|
||||
|
||||
print(f"高边缘密度像素: {high_edge_pixels:,} / {total_pixels:,} ({high_edge_pixels/total_pixels*100:.1f}%)")
|
||||
|
||||
return high_edge_regions
|
||||
|
||||
def create_overview_image(img, output_path="overview.png", max_dimension=2000):
|
||||
"""创建图像概览(缩略图)"""
|
||||
print(f"\n=== 创建概览图像 ===")
|
||||
|
||||
# 计算缩放比例
|
||||
scale = min(max_dimension / img.size[0], max_dimension / img.size[1], 1.0)
|
||||
|
||||
if scale < 1.0:
|
||||
new_size = (int(img.size[0] * scale), int(img.size[1] * scale))
|
||||
print(f"缩放到: {new_size}")
|
||||
overview = img.resize(new_size, Image.Resampling.LANCZOS)
|
||||
else:
|
||||
overview = img.copy()
|
||||
|
||||
# 保存概览图
|
||||
overview.save(output_path)
|
||||
print(f"概览图已保存: {output_path}")
|
||||
|
||||
return overview
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
image_path = "/home/ben/code/huhan3000/3body/三体结构3.drawio.png"
|
||||
|
||||
print("=" * 50)
|
||||
print("《三体》项目大型图像分析工具")
|
||||
print("=" * 50)
|
||||
|
||||
# 分析基本信息
|
||||
img = analyze_image_basic_info(image_path)
|
||||
if img is None:
|
||||
return
|
||||
|
||||
# 分析颜色分布
|
||||
analyze_image_colors(img)
|
||||
|
||||
# 检测内容区域
|
||||
regions = detect_content_regions(img)
|
||||
|
||||
# 提取文本区域
|
||||
extract_text_regions(img, regions)
|
||||
|
||||
# 创建概览图
|
||||
create_overview_image(img, "/home/ben/code/huhan3000/3body/overview.png")
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("分析完成!")
|
||||
print("=" * 50)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user