如果你正处在下面几种状态,很有可能需要一套 “AI 论文秘书”:
这篇文章,我们不谈「怎么读论文」,
而是带你 从零搭一个论文精读 Multi-Agent 系统,让 LangGraph 来做这些事:
帮你筛论文:从一堆候选中选出“值得精读 / 有复现价值”的; 帮你结构化做笔记:任务定义、数据集、方法框架、实验指标; 帮你生成实验 TODO:哪些可以在你现有代码库上快速复现,哪些适合作为你下篇论文的 idea 种子。
整套系统的定位是:
「科研狗专属 论文调研流水线」 —— 你写 prompt 定方向,它帮你把文献研究拆成「可执行任务」。

我们先把「论文精读」拆到 Multi-Agent 的粒度:
外层用 LangGraph 把这些 Agent 串成一张有状态的图:
START → PaperCollector → PaperTagger → PaperSummarizer → ExperimentPlanner → END
中间 State 里,挂着「论文清单、标签、总结、实验 TODO」。
先定义一个 PaperResearchState,所有 Agent 都在上面读写信息。
代码语言:python AI代码解释:定义 State 与类型,方便 LangGraph 做静态检查。
# state.py
from typing import TypedDict, List, Optional
from typing_extensions import Annotated
import operator
class PaperMeta(TypedDict):
id: str
title: str
authors: str
year: int
source: str # arXiv / CVPR / NeurIPS ...
url: str
class PaperTag(TypedDict):
id: str
task_type: str # detection / generation / ...
method_type: str # architecture / loss / augmentation / ...
has_code: bool
importance: str # low / medium / high
class PaperSummary(TypedDict):
id: str
markdown: str
class ExperimentIdea(TypedDict):
id: str
title: str
idea_type: str # reproduction / extension / ablation
description: str
class PaperResearchState(TypedDict):
# 用户输入
topic: str # 研究主题,如 "generalizable deepfake detection"
my_research_field: str # 用户当前方向,如 "deepfake detection"
# 1) 候选论文
papers: Annotated[List[PaperMeta], operator.add]
# 2) 标签结果
tags: Annotated[List[PaperTag], operator.add]
# 3) 精读总结
summaries: Annotated[List[PaperSummary], operator.add]
# 4) 实验 TODO
experiments: Annotated[List[ExperimentIdea], operator.add]
# 日志
logs: Annotated[List[str], operator.add]为了让代码离线也能跑,我们先造几个 mock 论文(都编的):
代码语言:python AI代码解释:模拟 arXiv / CVPR 论文元数据。
# mock_papers.py
PAPER_DB = [
{
"id": "P001",
"title": "Latent Space Data Augmentation for Generalizable Deepfake Detection",
"authors": "Alice et al.",
"year": 2024,
"source": "CVPR",
"url": "https://example.com/lsda",
},
{
"id": "P002",
"title": "Energy-Guided Representation Learning for Face Forgery Detection",
"authors": "Bob et al.",
"year": 2025,
"source": "ICCV",
"url": "https://example.com/energy-forgery",
},
{
"id": "P003",
"title": "Multi-Agent Reinforcement Learning for LLM-based Research Planning",
"authors": "Carol et al.",
"year": 2025,
"source": "NeurIPS",
"url": "https://example.com/marl-llm",
},
]真实工程里,你可以把这一块换成:
先来一个最简单的「基于标题关键词」筛选:
代码语言:python AI代码解释:根据 topic 关键词,在 mock 数据中做过滤。
# nodes.py
from typing import List
from state import PaperResearchState, PaperMeta
from mock_papers import PAPER_DB
def paper_collector_node(state: PaperResearchState) -> PaperResearchState:
topic = state["topic"].lower()
selected: List[PaperMeta] = []
for p in PAPER_DB:
title_lower = p["title"].lower()
if any(k in title_lower for k in topic.split()):
selected.append(PaperMeta(**p))
# 如果一个都没选到,就全部给它(避免空集)
if not selected:
selected = [PaperMeta(**p) for p in PAPER_DB]
state["papers"].extend(selected)
state["logs"].append(
f"[PaperCollector] topic={state['topic']}, selected={len(selected)} papers."
)
return state代码语言:python AI代码解释:统一封装一个 LLM 构造,方便以后换模型/参数。
# llm_config.py
from langchain_openai import ChatOpenAI
def build_llm():
# 这里按你自己的环境来填 key / base / model
return ChatOpenAI(
model="gpt-4o-mini", # 或者你自己的部署
temperature=0.2,
)papers代码语言:python AI代码解释:用 PromptTemplate + LLMChain,对每篇论文做结构化标签。
# tagger.py
from typing import List
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from state import PaperResearchState, PaperTag
from llm_config import build_llm
import json
llm = build_llm()
tag_prompt = PromptTemplate(
input_variables=["topic", "papers"],
template=(
"你是一个科研助手,当前研究主题是:{topic}\n"
"下面是候选论文列表(JSON 数组,每个元素包含 id, title, authors, year, source):\n"
"{papers}\n\n"
"请你为每篇论文打上以下标签,并返回 JSON 数组(不要解释):\n"
"- id: 论文 id\n"
"- task_type: 任务类型(例如 detection / generation / representation / RL / multi-agent / other)\n"
"- method_type: 方法类型(例如 architecture / loss / data_augmentation / training_strategy / system / other)\n"
"- has_code: 是否倾向认为有开源代码(true/false)\n"
"- importance: 对当前主题的重要程度(low / medium / high)\n"
),
)
tag_chain = LLMChain(llm=llm, prompt=tag_prompt)
def paper_tagger_node(state: PaperResearchState) -> PaperResearchState:
if not state["papers"]:
state["logs"].append("[PaperTagger] no papers to tag.")
return state
papers_json = json.dumps(state["papers"], ensure_ascii=False)
raw = tag_chain.run(topic=state["topic"], papers=papers_json)
try:
parsed = json.loads(raw)
except Exception:
# LLM 偶尔乱输出,做个兜底
state["logs"].append("[PaperTagger] failed to parse LLM output, skip tagging.")
return state
tags: List[PaperTag] = []
for item in parsed:
tag = PaperTag(
id=item.get("id", ""),
task_type=item.get("task_type", "other"),
method_type=item.get("method_type", "other"),
has_code=bool(item.get("has_code", False)),
importance=item.get("importance", "medium"),
)
tags.append(tag)
state["tags"].extend(tags)
state["logs"].append(f"[PaperTagger] tagged {len(tags)} papers.")
return state我们只对 importance == "high" 的论文做结构化总结:
代码语言:python AI代码解释:生成 Markdown 格式的论文精读笔记。
# summarizer.py
from typing import List
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from state import PaperResearchState, PaperSummary
from llm_config import build_llm
import json
llm = build_llm()
summary_prompt = PromptTemplate(
input_variables=["paper", "topic"],
template=(
"你是一个论文精读助手,当前研究主题是:{topic}\n"
"下面是论文的元信息(JSON 格式):\n{paper}\n\n"
"请用 Markdown 结构化地总结这篇论文,要求:\n"
"1. 用中文回答,但保留必要的英文术语和公式符号。\n"
"2. 结构包含:研究问题、方法概述、关键技术点(可以分条)、实验设置与指标、主要结论与局限。\n"
"3. 每一节前用 emoji 做小标题,比如:🧩 方法、📊 实验。\n"
"4. 假设读者是已经有深度学习基础的研究生,不用讲太基础的概念。\n"
),
)
summary_chain = LLMChain(llm=llm, prompt=summary_prompt)
def paper_summarizer_node(state: PaperResearchState) -> PaperResearchState:
# 找出 importance=high 的论文
important_ids = {
t["id"] for t in state["tags"] if t["importance"] == "high"
}
id2paper = {p["id"]: p for p in state["papers"]}
summaries: List[PaperSummary] = []
for pid in important_ids:
paper = id2paper.get(pid)
if not paper:
continue
paper_json = json.dumps(paper, ensure_ascii=False, indent=2)
md = summary_chain.run(paper=paper_json, topic=state["topic"])
summaries.append(PaperSummary(id=pid, markdown=md))
state["summaries"].extend(summaries)
state["logs"].append(
f"[PaperSummarizer] summarized {len(summaries)} important papers."
)
return state目标是让它输出一份「对你当前研究方向有用的实验清单」。
代码语言:python AI代码解释:结合用户 research_field、论文标签与摘要,生成实验 idea。
# planner.py
from typing import List
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from state import PaperResearchState, ExperimentIdea
from llm_config import build_llm
import json
llm = build_llm()
plan_prompt = PromptTemplate(
input_variables=["research_field", "topic", "papers", "tags", "summaries"],
template=(
"你是一个科研助理,用户当前的研究方向是:{research_field}\n"
"本次文献调研的主题是:{topic}\n\n"
"候选论文元信息(JSON 数组):\n{papers}\n\n"
"论文标签信息(JSON 数组):\n{tags}\n\n"
"重要论文的精读总结(Markdown 嵌入 JSON 数组):\n{summaries}\n\n"
"请你基于这些信息,设计一份实验 TODO 列表,并以 JSON 数组返回,每个元素包含:\n"
"- id: 对应的论文 id(如果是综合 idea,可以写 'MIX')\n"
"- title: 实验标题(简短)\n"
"- idea_type: reproduction / extension / ablation 三选一\n"
"- description: 实验设计简要描述,包含:要用的数据集、baseline、大致评估指标。\n"
"注意:只返回 JSON,不要多余解释。\n"
),
)
plan_chain = LLMChain(llm=llm, prompt=plan_prompt)
def experiment_planner_node(state: PaperResearchState) -> PaperResearchState:
import json
if not state["papers"]:
state["logs"].append("[ExperimentPlanner] no papers, skip.")
return state
papers_json = json.dumps(state["papers"], ensure_ascii=False)
tags_json = json.dumps(state["tags"], ensure_ascii=False)
summaries_json = json.dumps(state["summaries"], ensure_ascii=False)
raw = plan_chain.run(
research_field=state["my_research_field"],
topic=state["topic"],
papers=papers_json,
tags=tags_json,
summaries=summaries_json,
)
try:
parsed = json.loads(raw)
except Exception:
state["logs"].append("[ExperimentPlanner] failed to parse ideas JSON.")
return state
ideas: List[ExperimentIdea] = []
for item in parsed:
ideas.append(
ExperimentIdea(
id=item.get("id", "MIX"),
title=item.get("title", "Untitled Experiment"),
idea_type=item.get("idea_type", "extension"),
description=item.get("description", ""),
)
)
state["experiments"].extend(ideas)
state["logs"].append(
f"[ExperimentPlanner] generated {len(ideas)} experiment ideas."
)
return state代码语言:python AI代码解释:声明节点、定义有向边、编译 Graph。
# graph_build.py
from langgraph.graph import StateGraph, START, END
from state import PaperResearchState
from nodes import paper_collector_node
from tagger import paper_tagger_node
from summarizer import paper_summarizer_node
from planner import experiment_planner_node
def build_paper_graph():
builder = StateGraph(PaperResearchState)
# 注册节点
builder.add_node("collector", paper_collector_node)
builder.add_node("tagger", paper_tagger_node)
builder.add_node("summarizer", paper_summarizer_node)
builder.add_node("planner", experiment_planner_node)
# 定义流程:START -> collector -> tagger -> summarizer -> planner -> END
builder.add_edge(START, "collector")
builder.add_edge("collector", "tagger")
builder.add_edge("tagger", "summarizer")
builder.add_edge("summarizer", "planner")
builder.add_edge("planner", END)
graph = builder.compile()
return graph跑一个完整 demo:
代码语言:python AI代码解释:构造初始 state,调用 graph,打印日志 & 输出结果。
# main.py
from graph_build import build_paper_graph
def main():
graph = build_paper_graph()
init_state = {
"topic": "deepfake detection latent augmentation",
"my_research_field": "face forgery detection",
"papers": [],
"tags": [],
"summaries": [],
"experiments": [],
"logs": [],
}
final_state = graph.invoke(init_state)
print("==== Logs ====")
for log in final_state["logs"]:
print(log)
print("\n==== Papers ====")
for p in final_state["papers"]:
print(f"- [{p['id']}] {p['title']} ({p['year']} {p['source']})")
print("\n==== Experiment TODOs ====")
for e in final_state["experiments"]:
print(f"[{e['idea_type']}] {e['title']} <- from {e['id']}")
print(f" {e['description']}\n")
# 如果你想把精读摘要 + 实验 TODO 直接写到一个 Markdown 文件
if final_state["summaries"]:
with open("paper_research_report.md", "w", encoding="utf-8") as f:
f.write("# 论文调研报告(自动生成)\n\n")
for s in final_state["summaries"]:
f.write(f"## 论文 {s['id']} 精读\n\n")
f.write(s["markdown"])
f.write("\n\n---\n\n")
f.write("## 实验 TODO 列表\n\n")
for e in final_state["experiments"]:
f.write(f"- **[{e['idea_type']}] {e['title']}** (来自 {e['id']})\n")
f.write(f" - {e['description']}\n")
if __name__ == "__main__":
main()原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。