260 lines
9.8 KiB
Python
Executable File
260 lines
9.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
学术论文库查询工具
|
||
Academic Papers Query Tool
|
||
|
||
Usage:
|
||
python query_papers.py --list-all
|
||
python query_papers.py --topic "K音"
|
||
python query_papers.py --civilization "贵霜"
|
||
python query_papers.py --keyword "Jade"
|
||
python query_papers.py --phonetic "M"
|
||
"""
|
||
|
||
import json
|
||
import argparse
|
||
from pathlib import Path
|
||
from typing import List, Dict, Any
|
||
|
||
class PaperQuery:
|
||
def __init__(self, metadata_path: str = "../metadata/papers-metadata.json"):
|
||
"""初始化查询工具"""
|
||
self.metadata_path = Path(__file__).parent / metadata_path
|
||
self.data = self._load_metadata()
|
||
|
||
def _load_metadata(self) -> Dict[str, Any]:
|
||
"""加载元数据"""
|
||
with open(self.metadata_path, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
|
||
def list_all_papers(self) -> None:
|
||
"""列出所有论文"""
|
||
print(f"\n{'='*80}")
|
||
print(f"学术论文库总览 - 共 {self.data['metadata']['total_papers']} 篇")
|
||
print(f"{'='*80}\n")
|
||
|
||
for category, papers in self.data['categories'].items():
|
||
category_name = self._translate_category(category)
|
||
print(f"\n【{category_name}】 ({len(papers)} 篇)")
|
||
print("-" * 80)
|
||
for i, paper in enumerate(papers, 1):
|
||
print(f" {i}. {paper}")
|
||
|
||
def query_by_topic(self, topic: str) -> List[str]:
|
||
"""按主题查询"""
|
||
topic_key = self._get_topic_key(topic)
|
||
if topic_key and topic_key in self.data['categories']:
|
||
papers = self.data['categories'][topic_key]
|
||
print(f"\n【{topic}】相关论文 ({len(papers)} 篇):")
|
||
print("-" * 80)
|
||
for i, paper in enumerate(papers, 1):
|
||
print(f" {i}. papers/{paper}")
|
||
return papers
|
||
else:
|
||
print(f"\n未找到主题:{topic}")
|
||
print("可用主题:K音, M音, Y/J音, S音, 贵霜, 北朝, 昆仑, 汉人, 粟特, 西方")
|
||
return []
|
||
|
||
def query_by_civilization(self, civ: str) -> List[str]:
|
||
"""按文明系统查询"""
|
||
civ_data = self.data['civilizations'].get(civ)
|
||
if not civ_data:
|
||
# 尝试中文匹配
|
||
civ_map = {
|
||
"昆仑": "Kunlun",
|
||
"北朝": "Northern Dynasty",
|
||
"贵霜": "Kushan",
|
||
"汉人": "Han",
|
||
"粟特": "Sogdian",
|
||
"西方": "Jerusalem"
|
||
}
|
||
civ = civ_map.get(civ, civ)
|
||
civ_data = self.data['civilizations'].get(civ)
|
||
|
||
if civ_data:
|
||
print(f"\n【{civ}】文明系统")
|
||
print(f"角色:{civ_data['role']}")
|
||
print(f"论文数:{civ_data['paper_count']}")
|
||
print(f"核心概念:{', '.join(civ_data['key_concepts'])}")
|
||
print("-" * 80)
|
||
# 在所有分类中查找相关论文
|
||
papers = self._find_papers_by_civilization(civ)
|
||
for i, paper in enumerate(papers, 1):
|
||
print(f" {i}. papers/{paper}")
|
||
return papers
|
||
else:
|
||
print(f"\n未找到文明系统:{civ}")
|
||
print("可用系统:Kunlun(昆仑), Northern Dynasty(北朝), Kushan(贵霜), Han(汉人), Sogdian(粟特), Jerusalem(西方)")
|
||
return []
|
||
|
||
def query_by_keyword(self, keyword: str) -> List[str]:
|
||
"""按关键词查询"""
|
||
results = []
|
||
print(f"\n搜索关键词:{keyword}")
|
||
print("-" * 80)
|
||
|
||
for paper in self.data.get('papers', []):
|
||
if keyword.lower() in str(paper.get('keywords', [])).lower() or \
|
||
keyword.lower() in paper.get('title', '').lower() or \
|
||
keyword.lower() in paper.get('filename', '').lower():
|
||
results.append(paper)
|
||
|
||
if results:
|
||
print(f"找到 {len(results)} 篇相关论文:\n")
|
||
for i, paper in enumerate(results, 1):
|
||
print(f" {i}. {paper['filename']}")
|
||
print(f" 标题:{paper['title']}")
|
||
print(f" 关键词:{', '.join(paper['keywords'][:5])}")
|
||
print()
|
||
else:
|
||
print(f"未找到包含关键词 '{keyword}' 的论文")
|
||
|
||
return [p['filename'] for p in results]
|
||
|
||
def query_by_phonetic(self, phonetic: str) -> List[str]:
|
||
"""按音韵系统查询"""
|
||
phonetic = phonetic.upper()
|
||
phonetic_data = self.data['phonetic_systems'].get(phonetic) or \
|
||
self.data['phonetic_systems'].get(f"{phonetic}_J")
|
||
|
||
if phonetic_data:
|
||
print(f"\n【{phonetic}音】系统")
|
||
print(f"几何形态:{phonetic_data['geometry']}")
|
||
print(f"本质:{phonetic_data['nature']}")
|
||
print(f"核心概念:{', '.join(phonetic_data['concepts'][:8])}")
|
||
print(f"论文数:{phonetic_data['paper_count']}")
|
||
print("-" * 80)
|
||
|
||
# 查找相关论文
|
||
category_key = f"{phonetic.lower()}_sound"
|
||
if phonetic in ["Y", "J"]:
|
||
category_key = "yj_sound"
|
||
|
||
papers = self.data['categories'].get(category_key, [])
|
||
for i, paper in enumerate(papers, 1):
|
||
print(f" {i}. papers/{paper}")
|
||
return papers
|
||
else:
|
||
print(f"\n未找到音韵系统:{phonetic}")
|
||
print("可用系统:K, M, Y/J, S, Z")
|
||
return []
|
||
|
||
def show_core_concepts(self) -> None:
|
||
"""显示核心概念"""
|
||
print(f"\n{'='*80}")
|
||
print("核心理论框架")
|
||
print(f"{'='*80}\n")
|
||
|
||
for concept, data in self.data['core_concepts'].items():
|
||
print(f"【{concept}】{data['english']}")
|
||
print(f" 描述:{data['description']}")
|
||
print(f" 核心论文:")
|
||
for paper in data['papers']:
|
||
print(f" - papers/{paper}")
|
||
print()
|
||
|
||
def show_reading_path(self, path_type: str = "beginner") -> None:
|
||
"""显示推荐阅读路径"""
|
||
paths = self.data['reading_paths']
|
||
|
||
if path_type == "beginner":
|
||
print(f"\n{'='*80}")
|
||
print("推荐阅读路径 - 入门")
|
||
print(f"{'='*80}\n")
|
||
for i, paper in enumerate(paths['beginner'], 1):
|
||
print(f" {i}. papers/{paper}")
|
||
|
||
elif path_type == "phonetic":
|
||
print(f"\n{'='*80}")
|
||
print("推荐阅读路径 - 按音韵系统")
|
||
print(f"{'='*80}\n")
|
||
for phonetic, paper in paths['by_phonetics'].items():
|
||
print(f" {phonetic}音:papers/{paper}")
|
||
|
||
else:
|
||
print(f"\n可用路径:beginner, phonetic")
|
||
|
||
def _translate_category(self, category: str) -> str:
|
||
"""翻译分类名称"""
|
||
translations = {
|
||
"core_theory": "核心理论框架",
|
||
"k_sound": "K音系统研究",
|
||
"m_sound": "M音系统研究",
|
||
"yj_sound": "Y/J音系统研究",
|
||
"s_sound": "S音系统研究",
|
||
"jerusalem_western": "J/Jerusalem系统研究",
|
||
"kushan_central_asia": "贵霜/中亚研究",
|
||
"northern_dynasty": "北朝/辽金元研究",
|
||
"literature_culture": "文学/文化分析"
|
||
}
|
||
return translations.get(category, category)
|
||
|
||
def _get_topic_key(self, topic: str) -> str:
|
||
"""获取主题键"""
|
||
topic_map = {
|
||
"K音": "k_sound",
|
||
"M音": "m_sound",
|
||
"Y音": "yj_sound",
|
||
"J音": "yj_sound",
|
||
"S音": "s_sound",
|
||
"核心理论": "core_theory",
|
||
"贵霜": "kushan_central_asia",
|
||
"中亚": "kushan_central_asia",
|
||
"北朝": "northern_dynasty",
|
||
"辽金元": "northern_dynasty",
|
||
"昆仑": "core_theory",
|
||
"文学": "literature_culture"
|
||
}
|
||
return topic_map.get(topic)
|
||
|
||
def _find_papers_by_civilization(self, civ: str) -> List[str]:
|
||
"""根据文明系统查找论文"""
|
||
# 这里简化处理,实际应该根据更详细的映射
|
||
civ_category_map = {
|
||
"Kunlun": "core_theory",
|
||
"Northern Dynasty": "northern_dynasty",
|
||
"Kushan": "kushan_central_asia",
|
||
"Jerusalem": "jerusalem_western"
|
||
}
|
||
category = civ_category_map.get(civ)
|
||
if category:
|
||
return self.data['categories'].get(category, [])
|
||
return []
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description="学术论文库查询工具")
|
||
parser.add_argument("--list-all", action="store_true", help="列出所有论文")
|
||
parser.add_argument("--topic", type=str, help="按主题查询")
|
||
parser.add_argument("--civilization", type=str, help="按文明系统查询")
|
||
parser.add_argument("--keyword", type=str, help="按关键词查询")
|
||
parser.add_argument("--phonetic", type=str, help="按音韵系统查询 (K/M/Y/J/S/Z)")
|
||
parser.add_argument("--concepts", action="store_true", help="显示核心概念")
|
||
parser.add_argument("--path", type=str, choices=["beginner", "phonetic"], help="显示推荐阅读路径")
|
||
|
||
args = parser.parse_args()
|
||
|
||
query = PaperQuery()
|
||
|
||
if args.list_all:
|
||
query.list_all_papers()
|
||
elif args.topic:
|
||
query.query_by_topic(args.topic)
|
||
elif args.civilization:
|
||
query.query_by_civilization(args.civilization)
|
||
elif args.keyword:
|
||
query.query_by_keyword(args.keyword)
|
||
elif args.phonetic:
|
||
query.query_by_phonetic(args.phonetic)
|
||
elif args.concepts:
|
||
query.show_core_concepts()
|
||
elif args.path:
|
||
query.show_reading_path(args.path)
|
||
else:
|
||
parser.print_help()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
|