generate `runningSummary` on each turn, and include it with the main completion

master
Avraham Sakal 3 months ago
parent d4018bdc8e
commit 3ba60b999c

@ -103,7 +103,9 @@ export default function LayoutDefault({
size={16}
stroke={1.5}
className="border-on-hover"
onClick={() => {
onClick={(e) => {
e.preventDefault();
e.stopPropagation();
trpc.chat.createConversation.mutate().then((res) => {
if (!res?.id) return;
addConversation(res);

@ -2,6 +2,31 @@
"$schema": "https://opencode.ai/config.json",
"theme": "tokyonight",
"model": "openrouter/google/gemini-2.5-flash-preview-05-20",
"provider": {
"openrouter": {
"options": {
"name": "OpenRouter"
},
"models": {
"moonshotai/kimi-k2": {
"name": "Kimi K2",
"options": {
"OpenRouter": {
"provider": {
"order": [
"moonshotai",
"chutes/fp8",
"baseten/fp8",
"together/fp8"
],
"allow_fallbacks": false
}
}
}
}
}
}
},
"mode": {
"build": {
"model": "openrouter/moonshotai/kimi-k2"

@ -19,7 +19,7 @@
"@mantine/hooks": "^8.1.1",
"@openrouter/ai-sdk-provider": "^0.7.2",
"@sinclair/typebox": "^0.34.37",
"@tabler/icons-react": "^3.34.0",
"@tabler/icons-react": "^3.34.1",
"@trpc/client": "^11.4.2",
"@trpc/server": "^11.4.2",
"@universal-middleware/core": "^0.4.8",
@ -34,6 +34,7 @@
"pg": "^8.16.3",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-markdown": "^10.1.0",
"vike": "^0.4.235",
"vike-cloudflare": "^0.1.7",
"vike-react": "^0.6.4",

@ -1,6 +1,13 @@
import { JsonInput, Tabs, Textarea } from "@mantine/core";
import {
Box,
Group,
JsonInput,
Stack,
Tabs,
Textarea,
useMantineTheme,
} from "@mantine/core";
import { trpc } from "../../../trpc/client";
import type { Message as UIMessage } from "ai";
import { useEffect } from "react";
import {
defaultParameters,
@ -11,6 +18,8 @@ import { usePageContext } from "vike-react/usePageContext";
import { useData } from "vike-react/useData";
import type { Data } from "./+data";
import type { ConversationsId } from "../../../database/generated/public/Conversations";
import type { CommittedMessage, DraftMessage } from "../../../types";
import Markdown from "react-markdown";
export default function ChatPage() {
const pageContext = usePageContext();
@ -31,7 +40,7 @@ export default function ChatPage() {
const setParameters = useStore((state) => state.setParameters);
const setLoading = useStore((state) => state.setLoading);
const conversation = useData<Data>();
const { conversation, messages: initialMessages } = useData<Data>();
useEffect(() => {
setConversationId(conversationId);
@ -49,6 +58,10 @@ export default function ChatPage() {
setConversationTitle,
]);
useEffect(() => {
setMessages(initialMessages);
}, [initialMessages, setMessages]);
return (
<>
<div>
@ -86,18 +99,36 @@ export default function ChatPage() {
e.preventDefault();
const messagesWithNewUserMessage = [
...messages,
{ role: "user" as const, content: message } as UIMessage,
{ role: "user" as const, content: message } as DraftMessage,
];
setMessages(messagesWithNewUserMessage);
setLoading(true);
const response = await trpc.chat.sendMessage.mutate({
conversationId,
messages: messagesWithNewUserMessage,
systemPrompt,
parameters,
});
const messagesWithAssistantMessage = [
...messagesWithNewUserMessage,
{ role: "assistant", content: response.text } as UIMessage,
...messages,
{
id: response.insertedUserMessage?.id,
conversationId,
role: "user" as const,
content: message,
index: response.insertedUserMessage?.index,
runningSummary: undefined,
} as CommittedMessage,
{
id: response.insertedAssistantMessage?.id,
conversationId,
role: "assistant" as const,
content: response.insertedAssistantMessage?.content,
index: response.insertedAssistantMessage?.index,
runningSummary:
response.insertedAssistantMessage?.running_summary ||
undefined,
} as CommittedMessage,
];
setMessages(messagesWithAssistantMessage);
setMessage("");
@ -131,14 +162,41 @@ export default function ChatPage() {
function Messages({
messages,
}: {
messages: Array<UIMessage>;
messages: Array<DraftMessage | CommittedMessage>;
}) {
const theme = useMantineTheme();
console.log("messages", messages);
return (
<div>
<Stack gap="md" justify="flex-start">
{messages.map((message, index) => (
// biome-ignore lint/suspicious/noArrayIndexKey: <explanation>
<div key={index}>{message.content}</div>
<Group
// biome-ignore lint/suspicious/noArrayIndexKey: <explanation>
key={index}
justify={message.role === "user" ? "flex-end" : "flex-start"}
>
<Box
w="75%"
bg={
message.role === "user"
? theme.colors.gray[2]
: theme.colors.blue[2]
}
p="md"
bdrs="md"
>
<div>
{"index" in message ? message.index : ""}
{message.role}
</div>
<Markdown>{message.content}</Markdown>
{"runningSummary" in message && (
<div>
<strong>Running Summary:</strong> {message.runningSummary}
</div>
)}
</Box>
</Group>
))}
</div>
</Stack>
);
}

@ -9,5 +9,8 @@ export const data = async (pageContext: PageContextServer) => {
const conversation = await caller.fetchConversation({
id: Number(id),
});
return conversation;
const messages = await caller.fetchMessages({
conversationId: Number(id),
});
return { conversation, messages };
};

@ -2,13 +2,15 @@ import {
router,
publicProcedure,
createCallerFactory,
Validator,
// Validator
} from "../../trpc/server";
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
import { generateText } from "ai";
import type { Message as UIMessage } from "ai";
import type { OtherParameters } from "../../types.js";
import type {
OtherParameters,
CommittedMessage,
DraftMessage,
} from "../../types.js";
import { env } from "../../server/env.js";
// import { client } from "../../database/milvus";
// import {
@ -55,7 +57,7 @@ export const chat = router({
.mutation(async ({ input: { id } }) => {
const result = await db
.deleteFrom("conversations")
.where("id", "=", id as ConversationsId)
.where("id", "=", Number(id) as ConversationsId)
.execute();
return result;
}),
@ -71,32 +73,200 @@ export const chat = router({
const result = await db
.updateTable("conversations")
.set({ title })
.where("id", "=", id as ConversationsId)
.where("id", "=", Number(id) as ConversationsId)
.execute();
return result[0];
}),
fetchMessages: publicProcedure
.input((x) => x as { conversationId: number })
.query(async ({ input: { conversationId } }) => {
const rows = await db
.selectFrom("messages")
.selectAll()
.where("conversation_id", "=", conversationId as ConversationsId)
.execute();
return rows.map((row) => ({
...row,
conversationId: conversationId as ConversationsId,
runningSummary: row.running_summary,
})) as Array<CommittedMessage>;
}),
sendMessage: publicProcedure
.input(
(x) =>
x as {
messages: Array<UIMessage>;
conversationId: number;
messages: Array<DraftMessage | CommittedMessage>;
systemPrompt: string;
parameters: OtherParameters;
},
)
.mutation(async ({ input: { messages, systemPrompt, parameters } }) => {
const response = await generateText({
model: openrouter("mistralai/mistral-nemo"),
messages: [
{ role: "system" as const, content: systemPrompt },
...messages,
],
maxSteps: 3,
tools: undefined,
...parameters,
});
return response;
}),
.mutation(
async ({
input: { conversationId, messages, systemPrompt, parameters },
}) => {
/** TODO: Save all unsaved messages (i.e. those without an `id`) to the
* database. Is this dangerous? Can an attacker just send a bunch of
* messages, omitting the ids, causing me to save a bunch of them to the
* database? I guess it's no worse than starting new converations, which
* anyone can freely do. */
const previousRunningSummaryIndex = messages.findLastIndex(
(message) =>
typeof (message as CommittedMessage).runningSummary !== "undefined",
);
const previousRunningSummary =
previousRunningSummaryIndex >= 0
? ((messages[previousRunningSummaryIndex] as CommittedMessage)
.runningSummary as string)
: "";
/** Save the incoming message to the database. */
const insertedUserMessage = await db
.insertInto("messages")
.values({
conversation_id: conversationId as ConversationsId,
content: messages[messages.length - 1].content,
role: "user" as const,
index: messages.length - 1,
created_at: new Date().toISOString(),
})
.returning(["id", "index"])
.executeTakeFirst();
/** Generate a new message from the model, but hold-off on adding it to
* the database until we produce the associated running-summary, below.
* The model should be given the conversation summary thus far, and of
* course the user's latest message, unmodified. Invite the model to
* create any tools it needs. The tool needs to be implemented in a
* language which this system can execute; usually an interpretted
* language like Python or JavaScript. */
const mainResponse = await generateText({
model: openrouter("mistralai/mistral-nemo"),
messages: [
previousRunningSummary === ""
? { role: "system" as const, content: systemPrompt }
: {
role: "system" as const,
content: `${systemPrompt}
This is a summary of the conversation so far, from your point-of-view (so "I" and "me" refer to you):
<running_summary>
${previousRunningSummary}
</running_summary>
`,
},
...messages.slice(previousRunningSummaryIndex + 1),
],
maxSteps: 3,
tools: undefined,
...parameters,
});
console.log("sent", [
previousRunningSummary === ""
? { role: "system" as const, content: systemPrompt }
: {
role: "system" as const,
content: `${systemPrompt}
This is a summary of the conversation so far, from your point-of-view (so "I" and "me" refer to you):
<running_summary>
${previousRunningSummary}
</running_summary>
`,
},
...messages.slice(previousRunningSummaryIndex + 1),
]);
/** Extract Facts from the user's message, and add them to the database,
* linking the Facts with the messages they came from. (Yes, this should
* be done *after* the model response, not before; because when we run a
* query to find Facts to inject into the context sent to the model, we
* don't want Facts from the user's current message to be candidates for
* injection, because we're sending the user's message unadulterated to
* the model; there's no reason to inject the same Facts that the model is
* already using to generate its response.) */
/** Extract Facts from the model's response, and add them to the database,
* linking the Facts with the messages they came from. */
/** For each Fact produced in the two fact-extraction steps, generate
* FactTriggers and add them to the database, linking the FactTriggers
* with the Facts they came from. A FactTrigger is a natural language
* phrase that describes a situation in which it would be useful to invoke
* the Fact. (e.g., "When food preferences are discussed"). */
/** Produce a running summary of the conversation, and save that along
* with the model's response to the database. The new running summary is
* based on the previous running summary combined with the all messages
* since that summary was produced. */
const runningSummaryResponse = previousRunningSummary
? await generateText({
model: openrouter("mistralai/mistral-nemo"),
messages: [
{
role: "system" as const,
content: `Given the following summary of a conversation, coupled with the messages exchanged since that summary was produced, produce a new summary of the conversation.
<running_summary>
${previousRunningSummary}
</running_summary>
`,
},
...messages.slice(previousRunningSummaryIndex + 1),
{
role: "assistant" as const,
content: mainResponse.text,
} as UIMessage,
/** I might need this next message, because models are trained to
* respond when the final message in `messages` is from the `user`,
* but in our case it's an `assistant` message, so I'm artificially
* adding a `user` message to the end of the conversation. */
{
role: "user" as const,
content: "What is the new summary of the conversation?",
} as UIMessage,
],
maxSteps: 3,
tools: undefined,
...parameters,
})
: await generateText({
model: openrouter("mistralai/mistral-nemo"),
messages: [
{
role: "system" as const,
content:
"Given the following messages of a conversation, produce a summary of the conversation.",
},
...messages,
{
role: "assistant" as const,
content: mainResponse.text,
} as UIMessage,
/** I might need this next message, because models are trained to
* respond when the final message in `messages` is from the `user`,
* but in our case it's an `assistant` message, so I'm artificially
* adding a `user` message to the end of the conversation. */
{
role: "user" as const,
content: "What is the new summary of the conversation?",
} as UIMessage,
],
maxSteps: 3,
tools: undefined,
...parameters,
});
const insertedAssistantMessage = await db
.insertInto("messages")
.values({
conversation_id: conversationId as ConversationsId,
content: mainResponse.text,
running_summary: runningSummaryResponse.text,
role: "assistant" as const,
index: messages.length,
created_at: new Date().toISOString(),
})
.returningAll()
.executeTakeFirst();
/** TODO: notify the caller, somehow, that some messages were saved to
* the database and/or were outfitted with runningSummaries, so the
* caller can update its UI state. */
return { insertedAssistantMessage, insertedUserMessage };
},
),
});
export const createCaller = createCallerFactory(chat);

File diff suppressed because it is too large Load Diff

@ -17,7 +17,7 @@ export type Store = {
* overflows the JS integer anyway. */
selectedConversationId: ConversationsId;
conversations: Array<ConversationUI>;
messages: Array<UIMessage>;
messages: Array<DraftMessage | CommittedMessage>;
message: string;
systemPrompt: string;
parameters: OtherParameters;
@ -27,9 +27,19 @@ export type Store = {
setConversations: (conversations: Array<ConversationUI>) => void;
addConversation: (conversation: ConversationUI) => void;
removeConversation: (conversationId: ConversationsId) => void;
setMessages: (messages: Array<UIMessage>) => void;
setMessages: (messages: Array<DraftMessage | CommittedMessage>) => void;
setMessage: (message: string) => void;
setSystemPrompt: (systemPrompt: string) => void;
setParameters: (parameters: OtherParameters) => void;
setLoading: (loading: boolean) => void;
};
/** The message while it's being typed in the input box. */
export type DraftMessage = Omit<UIMessage, "id">;
/** The message after it's been saved to the database. */
export type CommittedMessage = DraftMessage & {
id: number;
conversationId: ConversationsId;
index: number;
runningSummary?: string;
};

Loading…
Cancel
Save