langchain的主要概念和关系梳理。
相关对象关系图如下:
class BaseLanguageModel(BaseModel, ABC):
@abstractmethod
def generate_prompt(
self, prompts: List[PromptValue], stop: Optional[List[str]] = None
) -> LLMResult:
"""Take in a list of prompt values and return an LLMResult."""
def get_num_tokens(self, text: str) -> int:
"""Get the number of tokens present in the text."""
def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
"""Get the number of tokens in the message."""
class BaseLLM(BaseLanguageModel, BaseModel, ABC):
"""LLM wrapper should take in a prompt and return a string."""
cache: Optional[bool] = None
verbose: bool = Field(default_factory=_get_verbosity)
"""Whether to print out response text."""
callback_manager: BaseCallbackManager = Field(default_factory=get_callback_manager)
def generate(
self, prompts: List[str], stop: Optional[List[str]] = None
) -> LLMResult:
"""Run the LLM on the given prompt and input."""
def dict(self, **kwargs: Any) -> Dict:
"""Return a dictionary of the LLM."""
starter_dict = dict(self._identifying_params)
starter_dict["_type"] = self._llm_type
return starter_dict
def save(self, file_path: Union[Path, str]) -> None:
"""Save the LLM."""
默认的模型是text-davinci-003
。
class BaseOpenAI(BaseLLM, BaseModel):
"""Wrapper around OpenAI large language models."""
client: Any #: :meta private:
model_name: str = "text-davinci-003"
"""Model name to use."""
temperature: float = 0.7
"""What sampling temperature to use."""
max_tokens: int = 256
"""The maximum number of tokens to generate in the completion.
-1 returns as many tokens as possible given the prompt and
the models maximal context size."""
top_p: float = 1
"""Total probability mass of tokens to consider at each step."""
frequency_penalty: float = 0
"""Penalizes repeated tokens according to frequency."""
presence_penalty: float = 0
"""Penalizes repeated tokens."""
n: int = 1
"""How many completions to generate for each prompt."""
best_of: int = 1
"""Generates best_of completions server-side and returns the "best"."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Holds any model parameters valid for `create` call not explicitly specified."""
openai_api_key: Optional[str] = None
batch_size: int = 20
"""Batch size to use when passing multiple documents to generate."""
request_timeout: Optional[Union[float, Tuple[float, float]]] = None
"""Timeout for requests to OpenAI completion API. Default is 600 seconds."""
logit_bias: Optional[Dict[str, float]] = Field(default_factory=dict)
"""Adjust the probability of specific tokens being generated."""
max_retries: int = 6
"""Maximum number of retries to make when generating."""
streaming: bool = False
"""Whether to stream the results or not."""
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key and python package exists in environment."""
def get_num_tokens(self, text: str) -> int:
"""Calculate num tokens with tiktoken package."""
def modelname_to_contextsize(self, modelname: str) -> int:
"""Calculate the maximum number of tokens possible to generate for a model.
text-davinci-003: 4,097 tokens
text-curie-001: 2,048 tokens
text-babbage-001: 2,048 tokens
text-ada-001: 2,048 tokens
code-davinci-002: 8,000 tokens
code-cushman-001: 2,048 tokens
Args:
modelname: The modelname we want to know the context size for.
Returns:
The maximum context size """
def max_tokens_for_prompt(self, prompt: str) -> int:
"""Calculate the maximum number of tokens possible to generate for a prompt.
class OpenAI(BaseOpenAI):
"""Generic OpenAI class that uses model name."""
@property
def _invocation_params(self) -> Dict[str, Any]:
return {**{"model": self.model_name}, **super()._invocation_params}
class BaseChatModel(BaseLanguageModel, BaseModel, ABC):
verbose: bool = Field(default_factory=_get_verbosity)
"""Whether to print out response text."""
callback_manager: BaseCallbackManager = Field(default_factory=get_callback_manager)
默认的模型是gpt-3.5-turbo
,经过对话调优的模型。
class ChatOpenAI(BaseChatModel, BaseModel):
"""Wrapper around OpenAI Chat large language models.
相关对象关系图如下:
BaseModel是python中用于数据接口定义检查与设置管理的库(pydantic库)。pedantic在运行时强制执行类型提示,并在数据无效时提供友好的错误。参考:https://www.cnblogs.com/dyl0/articles/16896330.html
class Chain(BaseModel, ABC):
"""Base interface that all chains should implement."""
memory: Optional[BaseMemory] = None
callback_manager: BaseCallbackManager = Field(
default_factory=get_callback_manager, exclude=True
)
verbose: bool = Field(
default_factory=_get_verbosity
) # Whether to print the response text
@property
@abstractmethod
def input_keys(self) -> List[str]:
"""Input keys this chain expects."""
@property
@abstractmethod
def output_keys(self) -> List[str]:
"""Output keys this chain expects."""
def apply(self, input_list: List[Dict[str, Any]]) -> List[Dict[str, str]]:
"""Call the chain on all inputs in the list."""
def run(self, *args: str, **kwargs: str) -> str:
"""Run the chain as text in, text out or multiple variables, text out."""
def dict(self, **kwargs: Any) -> Dict:
"""Return dictionary representation of chain."""
def save(self, file_path: Union[Path, str]) -> None:
"""Save the chain."""
class LLMChain(Chain, BaseModel):
"""Chain to run queries against LLMs."""
prompt: BasePromptTemplate
"""Prompt object to use."""
llm: BaseLanguageModel
output_key: str = "text" #: :meta private:
def generate(self, input_list: List[Dict[str, Any]]) -> LLMResult:
"""Generate LLM result from inputs."""
def apply(self, input_list: List[Dict[str, Any]]) -> List[Dict[str, str]]:
"""Utilize the LLM generate method for speed gains."""
def predict(self, **kwargs: Any) -> str:
"""Format prompt with kwargs and pass to LLM.
def from_string(cls, llm: BaseLanguageModel, template: str) -> Chain:
"""Create LLMChain from LLM and template."""
继承自LLMChain
class ConversationChain(LLMChain, BaseModel):
"""Chain to have a conversation and load context from memory.
Example:
.. code-block:: python
from langchain import ConversationChain, OpenAI
conversation = ConversationChain(llm=OpenAI())
"""
memory: BaseMemory = Field(default_factory=ConversationBufferMemory)
"""Default memory store."""
prompt: BasePromptTemplate = PROMPT
"""Default conversation prompt to use."""
input_key: str = "input" #: :meta private:
output_key: str = "response" #: :meta private:
相关对象关系图如下:
class Agent(BaseModel):
"""Class responsible for calling the language model and deciding the action.
This is driven by an LLMChain. The prompt in the LLMChain MUST include
a variable called "agent_scratchpad" where the agent can put its
intermediary work.
"""
llm_chain: LLMChain
allowed_tools: Optional[List[str]] = None
return_values: List[str] = ["output"]
@property
def _stop(self) -> List[str]:
def _construct_scratchpad(
self, intermediate_steps: List[Tuple[AgentAction, str]]
) -> Union[str, List[BaseMessage]]:
"""Construct the scratchpad that lets the agent continue its thought process."""
def _get_next_action(self, full_inputs: Dict[str, str]) -> AgentAction:
def plan(
self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
) -> Union[AgentAction, AgentFinish]:
"""Given input, decided what to do."""
@property
def finish_tool_name(self) -> str:
"""Name of the tool to use to finish the chain."""
return "Final Answer"
@root_validator()
def validate_prompt(cls, values: Dict) -> Dict:
"""Validate that prompt matches format."""
@classmethod
def from_llm_and_tools(
cls,
llm: BaseLLM,
tools: Sequence[BaseTool],
callback_manager: Optional[BaseCallbackManager] = None,
**kwargs: Any,
) -> Agent:
"""Construct an agent from an LLM and tools."""
def return_stopped_response(
self,
early_stopping_method: str,
intermediate_steps: List[Tuple[AgentAction, str]],
**kwargs: Any,
) -> AgentFinish:
"""Return response when agent has been stopped due to max iterations."""
def save(self, file_path: Union[Path, str]) -> None:
"""Save the agent."""
class ZeroShotAgent(Agent):
"""Agent for the MRKL chain."""
@property
def _agent_type(self) -> str:
"""Return Identifier of agent type."""
return "zero-shot-react-description"
@property
def observation_prefix(self) -> str:
"""Prefix to append the observation with."""
return "Observation: "
@property
def llm_prefix(self) -> str:
"""Prefix to append the llm call with."""
return "Thought:"
@classmethod
def create_prompt(
cls,
tools: Sequence[BaseTool],
prefix: str = PREFIX,
suffix: str = SUFFIX,
format_instructions: str = FORMAT_INSTRUCTIONS,
input_variables: Optional[List[str]] = None,
) -> PromptTemplate:
"""Create prompt in the style of the zero shot agent.
@classmethod
def from_llm_and_tools(
cls,
llm: BaseLLM,
tools: Sequence[BaseTool],
callback_manager: Optional[BaseCallbackManager] = None,
prefix: str = PREFIX,
suffix: str = SUFFIX,
format_instructions: str = FORMAT_INSTRUCTIONS,
input_variables: Optional[List[str]] = None,
**kwargs: Any,
) -> Agent:
"""Construct an agent from an LLM and tools."""
继承自Chain,拥有run()
方法,也有Agent对象作为其属性。
class AgentExecutor(Chain, BaseModel):
"""Consists of an agent using tools."""
agent: Agent
tools: Sequence[BaseTool]
return_intermediate_steps: bool = False
max_iterations: Optional[int] = 15
early_stopping_method: str = "force"
"""
def from_agent_and_tools(
cls,
agent: Agent,
tools: Sequence[BaseTool],
callback_manager: Optional[BaseCallbackManager] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Create from agent and tools."""
def save_agent(self, file_path: Union[Path, str]) -> None:
"""Save the underlying agent."""
def input_keys(self) -> List[str]:
"""Return the input keys.
def output_keys(self) -> List[str]:
"""Return the singular output key.
def initialize_agent(
tools: Sequence[BaseTool],
llm: BaseLLM,
agent: Optional[str] = None,
callback_manager: Optional[BaseCallbackManager] = None,
agent_path: Optional[str] = None,
agent_kwargs: Optional[dict] = None,
**kwargs: Any,
) -> AgentExecutor:
"""Load an agent executor given tools and LLM.
Args:
tools: List of tools this agent has access to.
llm: Language model to use as the agent.
agent: A string that specified the agent type to use. Valid options are:
`zero-shot-react-description`
`react-docstore`
`self-ask-with-search`
`conversational-react-description`
`chat-zero-shot-react-description`,
`chat-conversational-react-description`,
If None and agent_path is also None, will default to
`zero-shot-react-description`.
callback_manager: CallbackManager to use. Global callback manager is used if
not provided. Defaults to None.
agent_path: Path to serialized agent to use.
agent_kwargs: Additional key word arguments to pass to the underlying agent
**kwargs: Additional key word arguments passed to the agent executor
Returns:
An agent executor
"""
相关对象关系图如下:
class BaseMemory(BaseModel, ABC):
"""Base interface for memory in chains."""
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
arbitrary_types_allowed = True
@property
@abstractmethod
def memory_variables(self) -> List[str]:
"""Input keys this memory class will load dynamically."""
@abstractmethod
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
"""Return key-value pairs given the text input to the chain.
If None, return all memories
"""
@abstractmethod
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
"""Save the context of this model run to memory."""
@abstractmethod
def clear(self) -> None:
"""Clear memory contents."""
class BaseChatMemory(BaseMemory, ABC):
chat_memory: ChatMessageHistory = Field(default_factory=ChatMessageHistory)
output_key: Optional[str] = None
input_key: Optional[str] = None
return_messages: bool = False
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
"""Save context from this conversation to buffer."""
def clear(self) -> None:
"""Clear memory contents."""
class ChatMessageHistory(BaseModel):
messages: List[BaseMessage] = Field(default_factory=list)
def add_user_message(self, message: str) -> None:
self.messages.append(HumanMessage(content=message))
def add_ai_message(self, message: str) -> None:
self.messages.append(AIMessage(content=message))
def clear(self) -> None:
self.messages = []
buffer()
方法返回的是BaseChatMemory
中ChatMessageHistory
的mesages
,类似的Memory还有ConversationBufferWindowMemory、
ConversationSummaryBufferMemory`等。
class ConversationBufferMemory(BaseChatMemory, BaseModel):
"""Buffer for storing conversation memory."""
human_prefix: str = "Human"
ai_prefix: str = "AI"
memory_key: str = "history" #: :meta private:
@property
def buffer(self) -> Any:
"""String buffer of memory."""
@property
def memory_variables(self) -> List[str]:
"""Will always return list of memory variables.
:meta private:
"""
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
"""Return history buffer."""
class ConversationBufferWindowMemory(BaseChatMemory, BaseModel):
"""Buffer for storing conversation memory."""
human_prefix: str = "Human"
ai_prefix: str = "AI"
memory_key: str = "history" #: :meta private:
k: int = 5
@property
def buffer(self) -> List[BaseMessage]:
"""String buffer of memory."""
@property
def memory_variables(self) -> List[str]:
"""Will always return list of memory variables."""
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]:
"""Return history buffer."""
class ConversationSummaryBufferMemory(BaseChatMemory, SummarizerMixin, BaseModel):
"""Buffer with summarizer for storing conversation memory."""
max_token_limit: int = 2000
moving_summary_buffer: str = ""
memory_key: str = "history"
@property
def buffer(self) -> List[BaseMessage]:
return self.chat_memory.messages
@property
def memory_variables(self) -> List[str]:
"""Will always return list of memory variables.
:meta private:
"""
return [self.memory_key]
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
"""Return history buffer."""
class ConversationSummaryMemory(BaseChatMemory, SummarizerMixin, BaseModel):
"""Conversation summarizer to memory."""
buffer: str = ""
memory_key: str = "history" #: :meta private:
@property
def memory_variables(self) -> List[str]:
"""Will always return list of memory variables.
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
"""Return history buffer."""
相关对象关系图如下:
class BasePromptTemplate(BaseModel, ABC):
"""Base class for all prompt templates, returning a prompt."""
input_variables: List[str]
"""A list of the names of the variables the prompt template expects."""
output_parser: Optional[BaseOutputParser] = None
"""How to parse the output of calling an LLM on this formatted prompt."""
partial_variables: Mapping[str, Union[str, Callable[[], str]]] = Field(
default_factory=dict
)
@abstractmethod
def format_prompt(self, **kwargs: Any) -> PromptValue:
"""Create Chat Messages."""
@abstractmethod
def format(self, **kwargs: Any) -> str:
"""Format the prompt with the inputs. """
def dict(self, **kwargs: Any) -> Dict:
"""Return dictionary representation of prompt."""
def save(self, file_path: Union[Path, str]) -> None:
"""Save the prompt. """
class StringPromptTemplate(BasePromptTemplate, ABC):
"""String prompt should expose the format method, returning a prompt."""
def format_prompt(self, **kwargs: Any) -> PromptValue:
"""Create Chat Messages."""
return StringPromptValue(text=self.format(**kwargs))
class PromptTemplate(StringPromptTemplate, BaseModel):
"""Schema to represent a prompt for an LLM."""
input_variables: List[str]
"""A list of the names of the variables the prompt template expects."""
template: str
"""The prompt template."""
template_format: str = "f-string"
"""The format of the prompt template. Options are: 'f-string', 'jinja2'."""
validate_template: bool = True
"""Whether or not to try validating the template."""
def format(self, **kwargs: Any) -> str:
"""Format the prompt with the inputs."""
def from_file(
cls, template_file: Union[str, Path], input_variables: List[str]
) -> PromptTemplate:
"""Load a prompt from a file."""
def from_template(cls, template: str) -> PromptTemplate:
"""Load a prompt template from a template."""
class ChatPromptTemplate(BasePromptTemplate, ABC):
input_variables: List[str]
messages: List[Union[BaseMessagePromptTemplate, BaseMessage]]
def from_role_strings(
cls, string_messages: List[Tuple[str, str]]
) -> ChatPromptTemplate:
def from_strings(
cls, string_messages: List[Tuple[Type[BaseMessagePromptTemplate], str]]
) -> ChatPromptTemplate:
def from_messages(
cls, messages: Sequence[Union[BaseMessagePromptTemplate, BaseMessage]]
) -> ChatPromptTemplate:
def format(self, **kwargs: Any) -> str:
return self.format_prompt(**kwargs).to_string()
def format_prompt(self, **kwargs: Any) -> PromptValue:
class BaseMessagePromptTemplate(BaseModel, ABC):
@abstractmethod
def format_messages(self, **kwargs: Any) -> List[BaseMessage]:
"""To messages."""
@property
@abstractmethod
def input_variables(self) -> List[str]:
"""Input variables for this prompt template."""
含有StringPromptTemplate
字段。
class BaseStringMessagePromptTemplate(BaseMessagePromptTemplate, ABC):
prompt: StringPromptTemplate
additional_kwargs: dict = Field(default_factory=dict)
@classmethod
def from_template(cls, template: str, **kwargs: Any) -> BaseMessagePromptTemplate:
prompt = PromptTemplate.from_template(template)
return cls(prompt=prompt, **kwargs)
@abstractmethod
def format(self, **kwargs: Any) -> BaseMessage:
"""To a BaseMessage."""
def format_messages(self, **kwargs: Any) -> List[BaseMessage]:
return [self.format(**kwargs)]
@property
def input_variables(self) -> List[str]:
return self.prompt.input_variables
class ChatMessagePromptTemplate(BaseStringMessagePromptTemplate):
role: str
def format(self, **kwargs: Any) -> BaseMessage:
text = self.prompt.format(**kwargs)
return ChatMessage(
content=text, role=self.role, additional_kwargs=self.additional_kwargs
)
class HumanMessagePromptTemplate(BaseStringMessagePromptTemplate):
def format(self, **kwargs: Any) -> BaseMessage:
text = self.prompt.format(**kwargs)
return HumanMessage(content=text, additional_kwargs=self.additional_kwargs)
class AIMessagePromptTemplate(BaseStringMessagePromptTemplate):
def format(self, **kwargs: Any) -> BaseMessage:
text = self.prompt.format(**kwargs)
return AIMessage(content=text, additional_kwargs=self.additional_kwargs)
class SystemMessagePromptTemplate(BaseStringMessagePromptTemplate):
def format(self, **kwargs: Any) -> BaseMessage:
text = self.prompt.format(**kwargs)
return SystemMessage(content=text, additional_kwargs=self.additional_kwargs)
相关对象关系图如下:
class BaseMessage(BaseModel):
"""Message object."""
content: str
additional_kwargs: dict = Field(default_factory=dict)
def format_chatml(self) -> str:
raise NotImplementedError()
@property
@abstractmethod
def type(self) -> str:
"""Type of the message, used for serialization."""
class HumanMessage(BaseMessage):
"""Type of message that is spoken by the human."""
def format_chatml(self) -> str:
return f"<|im_start|>user\n{self.content}\n<|im_end|>"
@property
def type(self) -> str:
"""Type of the message, used for serialization."""
return "human"
class AIMessage(BaseMessage):
"""Type of message that is spoken by the AI."""
def format_chatml(self) -> str:
return f"<|im_start|>assistant\n{self.content}\n<|im_end|>"
@property
def type(self) -> str:
"""Type of the message, used for serialization."""
return "ai"
class SystemMessage(BaseMessage):
"""Type of message that is a system message."""
def format_chatml(self) -> str:
return f"<|im_start|>system\n{self.content}\n<|im_end|>"
@property
def type(self) -> str:
"""Type of the message, used for serialization."""
return "system"
class ChatMessage(BaseMessage):
"""Type of message with arbitrary speaker."""
role: str
def format_chatml(self) -> str:
return f"<|im_start|>{self.role}\n{self.content}\n<|im_end|>"
@property
def type(self) -> str:
"""Type of the message, used for serialization."""
return "chat"
class Tool(BaseTool):
"""Tool that takes in function or coroutine directly."""
description: str = ""
func: Callable[[str], str]
coroutine: Optional[Callable[[str], Awaitable[str]]] = None
def _run(self, tool_input: str) -> str:
"""Use the tool."""
return self.func(tool_input)
async def _arun(self, tool_input: str) -> str:
"""Use the tool asynchronously."""
if self.coroutine:
return await self.coroutine(tool_input)
raise NotImplementedError("Tool does not support async")
# TODO: this is for backwards compatibility, remove in future
def __init__(
self, name: str, func: Callable[[str], str], description: str, **kwargs: Any
) -> None:
"""Initialize tool."""
super(Tool, self).__init__(
name=name, func=func, description=description, **kwargs
)
def load_tools(
tool_names: List[str],
llm: Optional[BaseLLM] = None,
callback_manager: Optional[BaseCallbackManager] = None,
**kwargs: Any,
) -> List[BaseTool]:
"""Load tools based on their name.
支持连接sqlite、MySQL、PostgreSQL等常见数据库。比如:db = SQLDatabase.from_uri("sqlite://./notebooks/Chinook.db")
。
class SQLDatabase:
"""SQLAlchemy wrapper around a database."""
def __init__(
self,
engine: Engine,
schema: Optional[str] = None,
metadata: Optional[MetaData] = None,
ignore_tables: Optional[List[str]] = None,
include_tables: Optional[List[str]] = None,
sample_rows_in_table_info: int = 3,
custom_table_info: Optional[dict] = None,
):
"""Create engine from database URI."""
def from_uri(cls, database_uri: str, **kwargs: Any) -> SQLDatabase:
"""Construct a SQLAlchemy engine from URI."""
return cls(create_engine(database_uri), **kwargs)
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。