系列文章索引
J-LangChain 入门
介绍
j-langchain是一个Java版的LangChain开发框架,旨在简化和加速各类大模型应用在Java平台的落地开发。它提供了一组实用的工具和类,使得开发人员能够更轻松地构建类似于LangChain的Java应用程序。
github: https://github.com/flower-trees/j-langchain
依赖
Maven
<dependency>
<groupId>io.github.flower-trees</groupId>
<artifactId>j-langchain</artifactId>
<version>1.0.4-preview</version>
</dependency>
Gradle
implementation 'io.github.flower-trees:j-langchain:1.0.4-preview'
配置
@Import(JLangchainConfig.class)
public class YourApplication {
public static void main(String[] args) {
SpringApplication.run(YourApplication.class, args);
}
}
export ALIYUN_KEY=xxx-xxx-xxx-xxx
export CHATGPT_KEY=xxx-xxx-xxx-xxx
export DOUBAO_KEY=xxx-xxx-xxx-xxx
export MOONSHOT_KEY=xxx-xxx-xxx-xxx
💡 Notes:
- 系统基于salt-function-flow流程编排框架开发,具体语法可 参考。
流式运行可执行项
使用流
LLM是大多数应用中最重要的瓶颈,生成完整响应可能需要几秒钟。为了让用户感知到更快的响应,我们可以通过流式逐块输出方式。
LangChain实现
from langchain_ollama import OllamaLLM
model = OllamaLLM(model="llama3:8b")
# sync
chunks = []
for chunk in model.stream("what color is the sky?"):
chunks.append(chunk)
print(chunk, end="|", flush=True)
# async
async def model_astream():
chunks = []
async for chunk in model.astream("what color is the sky?"):
chunks.append(chunk)
print(chunk, end="|", flush=True)
await model_astream()
J-LangChain实现
@Component
public class ChainExtDemo {
@Autowired
ChainActor chainActor;
public void StreamDemo() throws TimeoutException, InterruptedException {
ChatOllama llm = ChatOllama.builder().model("llama3:8b").build();
AIMessageChunk chunk = llm.stream("what color is the sky?");
StringBuilder sb = new StringBuilder();
while (chunk.getIterator().hasNext()) {
sb.append(chunk.getIterator().next().getContent()).append("|");
System.out.println(sb);
}
}
}
输出
The|
The| sky|
The| sky| is|
The| sky| is| blue|
The| sky| is| blue|.|
The| sky| is| blue|.||
💡 Notes:
- 由于语言特性问题,J-LangChain只支持异步方式stream。
链流式执行
在实际应用中,除了调用LLM本身,还可能涉及多个处理步骤。J-LangChain支持构建复杂链条,自动实现流式处理。
LangChain实现
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
model = OllamaLLM(model="llama3:8b")
prompt = ChatPromptTemplate.from_template("tell me a joke about {topic}")
parser = StrOutputParser()
chain = prompt | model | parser
async def chain_astream():
async for chunk in chain.astream({"topic": "parrot"}):
print(chunk, end="|", flush=True)
await chain_astream()
J-LangChain实现
public void ChainStreamDemo() throws TimeoutException, InterruptedException {
ChatOllama llm = ChatOllama.builder().model("llama3:8b").build();
BaseRunnable<StringPromptValue, ?> prompt = PromptTemplate.fromTemplate("tell me a joke about ${topic}");
StrOutputParser parser = new StrOutputParser();
FlowInstance chain = chainActor.builder().next(prompt).next(llm).next(parser).build();
ChatGenerationChunk chunk = chainActor.stream(chain, Map.of("topic", "parrot"));
StringBuilder sb = new StringBuilder();
while (chunk.getIterator().hasNext()) {
sb.append(chunk.getIterator().next()).append("|");
System.out.println(sb);
}
}
处理输入流
某些场景下需要处理部分数据流,例如流式传输JSON内容。在这些情况下,解析器可以操作输入流,尝试将部分数据补全为有效状态。
LangChain实现
from langchain_core.output_parsers import JsonOutputParser
model = OllamaLLM(model="llama3:8b")
async def input_astream():
chain = model | JsonOutputParser()
async for text in chain.astream(
"output a list of countries and their populations in JSON format. limit 3 countries."
):
print(text, flush=True)
await input_astream()
J-LangChain实现
public void InputDemo() throws TimeoutException, InterruptedException {
ChatOllama model = ChatOllama.builder().model("llama3:8b").build();
FlowInstance chain = chainActor.builder().next(model).next(new JsonOutputParser()).build();
ChatGenerationChunk chunk = chainActor.stream(chain, "output a list of countries and their populations in JSON format. limit 3 countries.");
while (chunk.getIterator().hasNext()) {
System.out.println(chunk.getIterator().next());
}
}
输出
[]
[]
[{}]
[{}]
[{"country":""}]
[{"country":"China"}]
[{"country":"China","population":143}]
[{"country":"China","population":143932}]
[{"country":"China","population":143932377}]
[{"country":"China","population":1439323776}]
[{"country":"China","population":1439323776}]
[{"country":"China","population":1439323776}]
[{"country":"China","population":1439323776},{}]
......
生成器函数
生成器函数可以操作输入流以支持复杂的流式处理逻辑。
LangChain实现
model = OllamaLLM(model="llama3:8b")
async def _extract_country_names_streaming(input_stream):
country_names_so_far = set()
async for input in input_stream:
if isinstance(input, dict) and "countries" in input:
if isinstance(input["countries"], list):
for country in input["countries"]:
if isinstance(country, dict):
name = country.get("name")
if name and name not in country_names_so_far:
yield name
country_names_so_far.add(name)
chain = model | JsonOutputParser() | _extract_country_names_streaming
async def function_astream():
async for text in chain.astream(
"output a list of the countries france, spain and japan and their populations in JSON format. "
'Use a dict with an outer key of "countries" which contains a list of countries. '
"Each country should have the key `name` and `population`"):
print(text, end="|", flush=True)
await function_astream()
J-LangChain实现
public void OutputFunctionDemo() throws TimeoutException, InterruptedException {
ChatOllama llm = ChatOllama.builder().model("llama3:8b").build();
FlowInstance chain = chainActor.builder()
.next(llm)
.next(new JsonOutputParser())
.next(new FunctionOutputParser(this::extractCountryNamesStreaming))
.build();
ChatGenerationChunk chunk = chainActor.stream(chain, """
output a list of the countries france, spain and japan and their populations in JSON format. "
'Use a dict with an outer key of "countries" which contains a list of countries. '
"Each country should have the key `name` and `population`""");
StringBuilder sb = new StringBuilder();
while (chunk.getIterator().hasNext()) {
ChatGenerationChunk chunkIterator = chunk.getIterator().next();
if (StringUtils.isNotEmpty(chunkIterator.getText())) {
sb.append(chunkIterator).append("|");
System.out.println(sb);
}
}
}
Set<Object> set = new HashSet<>();
private String extractCountryNamesStreaming(String chunk) {
if (JsonUtil.isValidJson(chunk)) {
Map chunkMap = JsonUtil.fromJson(chunk, Map.class);
if (chunkMap != null && chunkMap.get("countries") != null) {
Map countries = (Map) chunkMap.get("countries");
for (Object name : countries.keySet()) {
if (!set.contains(name)) {
set.add(name);
return (String) name;
}
}
}
}
return "";
}
输出
France|
France|Spain|
France|Spain|Japan|
使用流事件
J-LangChain提供了事件流式处理的API(streamEvents
),支持流式监控中间步骤。
LangChain实现
model = OllamaLLM(model="llama3:8b")
events = []
async def event_astream():
async for event in model.astream_events("hello", version="v2"):
events.append(event)
await event_astream()
print(events[:3])
J-LangChain实现
public void EventDemo() throws TimeoutException {
ChatOllama model = ChatOllama.builder().model("llama3:8b").build();
List<EventMessageChunk> events = new ArrayList<>();
EventMessageChunk chunk = model.streamEvent("hello");
while (chunk.getIterator().hasNext()) {
events.add(chunk.getIterator().next());
}
events.subList(events.size()-3, events.size()).forEach(event -> System.out.println(event.toJson()));
}
输出
{"event":"on_llm_stream","data":{"chunk":{"role":"ai","content":".","last":false}},"name":"ChatOllama","parentIds":[],"metadata":{"ls_model_name":"gpt-4","ls_provider":"chatgpt","ls_model_type":"llm"},"tags":[]}
{"event":"on_llm_stream","data":{"chunk":{"role":"ai","content":"","finishReason":"stop","last":true}},"name":"ChatOllama","parentIds":[],"metadata":{"ls_model_name":"gpt-4","ls_provider":"chatgpt","ls_model_type":"llm"},"tags":[]}
{"event":"on_llm_end","data":{"output":"Hello! How can I help you today? Let me know if you have any questions or need assistance with anything else."},"name":"ChatOllama","parentIds":[],"metadata":{},"tags":[]}
链流事件
LangChain实现
model = OllamaLLM(model="llama3:8b")
chain = model | JsonOutputParser()
)
async def event_name_astream():
async for event in chain.astream_events(
"Generate JSON data.",
include_names=["my_parser"],
version="v2"
):
print(event)
await event_name_astream()
J-LangChain实现
public void EventChainDemo() throws TimeoutException {
BaseRunnable<StringPromptValue, ?> prompt = PromptTemplate.fromTemplate("tell me a joke about ${topic}");
ChatOllama oll = ChatOllama.builder().model("llama3:8b"").build();
FlowInstance chain = chainActor.builder().next(prompt).next(oll).next(new StrOutputParser()).build();
EventMessageChunk chunk = chainActor.streamEvent(chain, Map.of("topic", "dog"));
while (chunk.getIterator().hasNext()) {
System.out.println(chunk.getIterator().next().toJson());
}
}
输出
{"event":"on_chain_start","data":{"input":{"topic":"dog"}},"name":"ChainActor","runId":"9fbfb04d3101465daa9e762716a59ecd","parentIds":[],"metadata":{},"tags":[]}
{"event":"on_prompt_start","data":{"input":{"topic":"dog"}},"name":"PromptTemplate","runId":"6f0a038fe9a847768922ce2c559f06ec","parentIds":["9fbfb04d3101465daa9e762716a59ecd"],"metadata":{},"tags":[]}
{"event":"on_prompt_end","data":{"output":{"text":"tell me a joke about dog"}},"name":"PromptTemplate","runId":"6f0a038fe9a847768922ce2c559f06ec","parentIds":["9fbfb04d3101465daa9e762716a59ecd"],"metadata":{},"tags":[]}
{"event":"on_llm_start","data":{"input":{"text":"tell me a joke about dog"}},"name":"ChatOllama","runId":"0e21f6ad4fc84c16a50b3db3d3d54193","parentIds":["6f0a038fe9a847768922ce2c559f06ec"],"metadata":{"ls_model_type":"llm","ls_provider":"chatgpt","ls_model_name":"gpt-4"},"tags":[]}
{"event":"on_parser_start","data":{"input":{"role":"ai","last":false}},"name":"StrOutputParser","runId":"d1d0efcadd904b2090f4d202003d4c04","parentIds":["0e21f6ad4fc84c16a50b3db3d3d54193"],"metadata":{},"tags":[]}
{"event":"on_llm_stream","data":{"chunk":{"role":"ai","content":"Why","last":false}},"name":"ChatOllama","runId":"0e21f6ad4fc84c16a50b3db3d3d54193","parentIds":["6f0a038fe9a847768922ce2c559f06ec"],"metadata":{"ls_model_type":"llm","ls_provider":"chatgpt","ls_model_name":"gpt-4"},"tags":[]}
{"event":"on_parser_stream","data":{"chunk":{"text":"Why","message":{"role":"ai","content":"Why"},"last":false}},"name":"StrOutputParser","runId":"d1d0efcadd904b2090f4d202003d4c04","parentIds":["0e21f6ad4fc84c16a50b3db3d3d54193"],"metadata":{},"tags":[]}
{"event":"on_chain_stream","data":{"chunk":{"text":"Why","message":{"role":"ai","content":"Why"},"last":false}},"name":"ChainActor","runId":"d1d0efcadd904b2090f4d202003d4c04","parentIds":["d1d0efcadd904b2090f4d202003d4c04"],"metadata":{},"tags":[]}
......
过滤事件
可以根据组件的名称、类型或标签过滤事件:
LangChain实现
model = OllamaLLM(model="llama3:8b")
chain = model.with_config({"run_name": "model"}) | JsonOutputParser().with_config(
{"run_name": "my_parser", "tags": ["my_chain"]}
)
按名称
async def event_name_astream():
async for event in chain.astream_events(
"Generate JSON data.",
include_names=["my_parser"],
version="v2"
):
print(event)
await event_name_astream()
按类型
async def event_type_astream():
async for event in chain.astream_events(
"Generate JSON data.",
include_types=["llm"],
version="v2"
):
print(event)
await event_type_astream()
按标签
async def event_tag_astream():
async for event in chain.astream_events(
"Generate JSON data.",
include_tags=["my_chain"],
version="v2"
):
print(event)
await event_type_astream()
J-LangChain实现
public void EventFilterDemo() throws TimeoutException {
ChatOllama model = ChatOllama.builder().model("llama3:8b").build();
FlowInstance chain = chainActor.builder()
.next(model.withConfig(Map.of("run_name", "model")))
.next((new JsonOutputParser()).withConfig(Map.of("run_name", "my_parser", "tags", List.of("my_chain"))))
.build();
EventMessageChunk chunk = chainActor.streamEvent(chain,"Generate JSON data.");
while (chunk.getIterator().hasNext()) {
System.out.println(chunk.getIterator().next().toJson());
}
System.out.println("\n----------------\n");
EventMessageChunk chunkFilterByName = chainActor.streamEvent(chain,"Generate JSON data.", event -> List.of("my_parser").contains(event.getName()));
while (chunkFilterByName.getIterator().hasNext()) {
System.out.println(chunkFilterByName.getIterator().next().toJson());
}
System.out.println("\n----------------\n");
EventMessageChunk chunkFilterByType = chainActor.streamEvent(chain,"Generate JSON data.", event -> List.of("llm").contains(event.getType()));
while (chunkFilterByType.getIterator().hasNext()) {
System.out.println(chunkFilterByType.getIterator().next().toJson());
}
System.out.println("\n----------------\n");
EventMessageChunk chunkFilterByTag = chainActor.streamEvent(chain,"Generate JSON data.", event -> Stream.of("my_chain").anyMatch(event.getTags()::contains));
while (chunkFilterByTag.getIterator().hasNext()) {
System.out.println(chunkFilterByTag.getIterator().next().toJson());
}
}