huhan3000/academic-papers/scripts/query_papers.py

260 lines
9.8 KiB
Python
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
学术论文库查询工具
Academic Papers Query Tool
Usage:
python query_papers.py --list-all
python query_papers.py --topic "K音"
python query_papers.py --civilization "贵霜"
python query_papers.py --keyword "Jade"
python query_papers.py --phonetic "M"
"""
import json
import argparse
from pathlib import Path
from typing import List, Dict, Any
class PaperQuery:
def __init__(self, metadata_path: str = "../metadata/papers-metadata.json"):
"""初始化查询工具"""
self.metadata_path = Path(__file__).parent / metadata_path
self.data = self._load_metadata()
def _load_metadata(self) -> Dict[str, Any]:
"""加载元数据"""
with open(self.metadata_path, 'r', encoding='utf-8') as f:
return json.load(f)
def list_all_papers(self) -> None:
"""列出所有论文"""
print(f"\n{'='*80}")
print(f"学术论文库总览 - 共 {self.data['metadata']['total_papers']}")
print(f"{'='*80}\n")
for category, papers in self.data['categories'].items():
category_name = self._translate_category(category)
print(f"\n{category_name}】 ({len(papers)} 篇)")
print("-" * 80)
for i, paper in enumerate(papers, 1):
print(f" {i}. {paper}")
def query_by_topic(self, topic: str) -> List[str]:
"""按主题查询"""
topic_key = self._get_topic_key(topic)
if topic_key and topic_key in self.data['categories']:
papers = self.data['categories'][topic_key]
print(f"\n{topic}】相关论文 ({len(papers)} 篇)")
print("-" * 80)
for i, paper in enumerate(papers, 1):
print(f" {i}. papers/{paper}")
return papers
else:
print(f"\n未找到主题:{topic}")
print("可用主题K音, M音, Y/J音, S音, 贵霜, 北朝, 昆仑, 汉人, 粟特, 西方")
return []
def query_by_civilization(self, civ: str) -> List[str]:
"""按文明系统查询"""
civ_data = self.data['civilizations'].get(civ)
if not civ_data:
# 尝试中文匹配
civ_map = {
"昆仑": "Kunlun",
"北朝": "Northern Dynasty",
"贵霜": "Kushan",
"汉人": "Han",
"粟特": "Sogdian",
"西方": "Jerusalem"
}
civ = civ_map.get(civ, civ)
civ_data = self.data['civilizations'].get(civ)
if civ_data:
print(f"\n{civ}】文明系统")
print(f"角色:{civ_data['role']}")
print(f"论文数:{civ_data['paper_count']}")
print(f"核心概念:{', '.join(civ_data['key_concepts'])}")
print("-" * 80)
# 在所有分类中查找相关论文
papers = self._find_papers_by_civilization(civ)
for i, paper in enumerate(papers, 1):
print(f" {i}. papers/{paper}")
return papers
else:
print(f"\n未找到文明系统:{civ}")
print("可用系统Kunlun(昆仑), Northern Dynasty(北朝), Kushan(贵霜), Han(汉人), Sogdian(粟特), Jerusalem(西方)")
return []
def query_by_keyword(self, keyword: str) -> List[str]:
"""按关键词查询"""
results = []
print(f"\n搜索关键词:{keyword}")
print("-" * 80)
for paper in self.data.get('papers', []):
if keyword.lower() in str(paper.get('keywords', [])).lower() or \
keyword.lower() in paper.get('title', '').lower() or \
keyword.lower() in paper.get('filename', '').lower():
results.append(paper)
if results:
print(f"找到 {len(results)} 篇相关论文:\n")
for i, paper in enumerate(results, 1):
print(f" {i}. {paper['filename']}")
print(f" 标题:{paper['title']}")
print(f" 关键词:{', '.join(paper['keywords'][:5])}")
print()
else:
print(f"未找到包含关键词 '{keyword}' 的论文")
return [p['filename'] for p in results]
def query_by_phonetic(self, phonetic: str) -> List[str]:
"""按音韵系统查询"""
phonetic = phonetic.upper()
phonetic_data = self.data['phonetic_systems'].get(phonetic) or \
self.data['phonetic_systems'].get(f"{phonetic}_J")
if phonetic_data:
print(f"\n{phonetic}音】系统")
print(f"几何形态:{phonetic_data['geometry']}")
print(f"本质:{phonetic_data['nature']}")
print(f"核心概念:{', '.join(phonetic_data['concepts'][:8])}")
print(f"论文数:{phonetic_data['paper_count']}")
print("-" * 80)
# 查找相关论文
category_key = f"{phonetic.lower()}_sound"
if phonetic in ["Y", "J"]:
category_key = "yj_sound"
papers = self.data['categories'].get(category_key, [])
for i, paper in enumerate(papers, 1):
print(f" {i}. papers/{paper}")
return papers
else:
print(f"\n未找到音韵系统:{phonetic}")
print("可用系统K, M, Y/J, S, Z")
return []
def show_core_concepts(self) -> None:
"""显示核心概念"""
print(f"\n{'='*80}")
print("核心理论框架")
print(f"{'='*80}\n")
for concept, data in self.data['core_concepts'].items():
print(f"{concept}{data['english']}")
print(f" 描述:{data['description']}")
print(f" 核心论文:")
for paper in data['papers']:
print(f" - papers/{paper}")
print()
def show_reading_path(self, path_type: str = "beginner") -> None:
"""显示推荐阅读路径"""
paths = self.data['reading_paths']
if path_type == "beginner":
print(f"\n{'='*80}")
print("推荐阅读路径 - 入门")
print(f"{'='*80}\n")
for i, paper in enumerate(paths['beginner'], 1):
print(f" {i}. papers/{paper}")
elif path_type == "phonetic":
print(f"\n{'='*80}")
print("推荐阅读路径 - 按音韵系统")
print(f"{'='*80}\n")
for phonetic, paper in paths['by_phonetics'].items():
print(f" {phonetic}papers/{paper}")
else:
print(f"\n可用路径beginner, phonetic")
def _translate_category(self, category: str) -> str:
"""翻译分类名称"""
translations = {
"core_theory": "核心理论框架",
"k_sound": "K音系统研究",
"m_sound": "M音系统研究",
"yj_sound": "Y/J音系统研究",
"s_sound": "S音系统研究",
"jerusalem_western": "J/Jerusalem系统研究",
"kushan_central_asia": "贵霜/中亚研究",
"northern_dynasty": "北朝/辽金元研究",
"literature_culture": "文学/文化分析"
}
return translations.get(category, category)
def _get_topic_key(self, topic: str) -> str:
"""获取主题键"""
topic_map = {
"K音": "k_sound",
"M音": "m_sound",
"Y音": "yj_sound",
"J音": "yj_sound",
"S音": "s_sound",
"核心理论": "core_theory",
"贵霜": "kushan_central_asia",
"中亚": "kushan_central_asia",
"北朝": "northern_dynasty",
"辽金元": "northern_dynasty",
"昆仑": "core_theory",
"文学": "literature_culture"
}
return topic_map.get(topic)
def _find_papers_by_civilization(self, civ: str) -> List[str]:
"""根据文明系统查找论文"""
# 这里简化处理,实际应该根据更详细的映射
civ_category_map = {
"Kunlun": "core_theory",
"Northern Dynasty": "northern_dynasty",
"Kushan": "kushan_central_asia",
"Jerusalem": "jerusalem_western"
}
category = civ_category_map.get(civ)
if category:
return self.data['categories'].get(category, [])
return []
def main():
parser = argparse.ArgumentParser(description="学术论文库查询工具")
parser.add_argument("--list-all", action="store_true", help="列出所有论文")
parser.add_argument("--topic", type=str, help="按主题查询")
parser.add_argument("--civilization", type=str, help="按文明系统查询")
parser.add_argument("--keyword", type=str, help="按关键词查询")
parser.add_argument("--phonetic", type=str, help="按音韵系统查询 (K/M/Y/J/S/Z)")
parser.add_argument("--concepts", action="store_true", help="显示核心概念")
parser.add_argument("--path", type=str, choices=["beginner", "phonetic"], help="显示推荐阅读路径")
args = parser.parse_args()
query = PaperQuery()
if args.list_all:
query.list_all_papers()
elif args.topic:
query.query_by_topic(args.topic)
elif args.civilization:
query.query_by_civilization(args.civilization)
elif args.keyword:
query.query_by_keyword(args.keyword)
elif args.phonetic:
query.query_by_phonetic(args.phonetic)
elif args.concepts:
query.show_core_concepts()
elif args.path:
query.show_reading_path(args.path)
else:
parser.print_help()
if __name__ == "__main__":
main()