context-efficient-ai/pages/chat/trpc.ts

import {
  router,
  publicProcedure,
  createCallerFactory,
} from "../../trpc/server.js";
import { generateObject, generateText, jsonSchema } from "ai";
import type {
  OtherParameters,
  CommittedMessage,
  DraftMessage,
} from "../../types.js";
// import { client } from "../../database/milvus";
// import {
//   ConsistencyLevelEnum,
//   type NumberArrayId,
// } from "@zilliz/milvus2-sdk-node";
import { db, type FactTrigger, type Fact } from "../../database/lowdb.js";
import { nanoid } from "nanoid";
import { conversations } from "./conversations.js";
import { messages } from "./messages.js";
import { facts, createCaller as createCallerFacts } from "./facts.js";
import { openrouter } from "./provider.js";

const factsCaller = createCallerFacts({});

const mainSystemPrompt = ({
  systemPrompt,
  previousRunningSummary,
}: { systemPrompt: string; previousRunningSummary: string }) => `${systemPrompt}

This is a summary of the conversation so far, from your point-of-view (so "I" and "me" refer to you):
<running_summary>
${previousRunningSummary}
</running_summary>
`;

const factTriggersSystemPrompt = ({
  previousRunningSummary,
  messagesSincePreviousRunningSummary,
  mainResponseContent,
}: {
  previousRunningSummary: string;
  messagesSincePreviousRunningSummary: Array<DraftMessage>;
  mainResponseContent: string;
}) => `You are an expert at idenitfying situations when facts are useful.

You will be given a summary of a conversation, and the messages exchanged since that summary was produced.

Then you will be given a fact that was extracted from that conversation, and you will need to identify a natural language phrase that describes a situation in which it would be useful to invoke the fact.

The facts will be used to enrich context of conversations with AI assistants: upon each turn, a semantic database will be searched to see whether the current situation in the conversation matches any situations that are deemed to render the fact useful, and the fact will be injected into the context of the conversation.

Your task is to produce a list of triggers for the fact.

* You should not extract any facts that are already in the summary.
* The user should be referred to as "the user" in the fact text.
* The user's pronouns should be either he or she, NOT "they" or "them", because these triggers will be read by an AI assistant to give it context; and excessive use of "they" or "them" will make what they refer to unclear or ambiguous.
* The assistant should be referred to as "I" or "me", because these triggers will be read by an AI assistant to give it context.

<running_summary>
${previousRunningSummary}
</running_summary>

${messagesSincePreviousRunningSummary.map(
  (message) =>
    `<${message.role}_message>${message.content}</${message.role}_message>`,
)}
<assistant_response>
${mainResponseContent}
</assistant_response>
`;

const factTriggersUserPrompt = ({
  factContent,
}: {
  factContent: string;
}) => `<fact_content>
${factContent}
</fact_content>

Generate a list of situations in which the fact is useful.`;

const runningSummarySystemPrompt = ({
  previousRunningSummary,
}: {
  previousRunningSummary: string;
}) => `You are an expert at summarizing conversations.

You will be given a summary of a conversation, and the messages exchanged since that summary was produced.

Your task is to produce a new summary of the conversation.

* The user should be referred to as "the user" in the summary.
* The user's pronouns should be either he or she, NOT "they" or "them", because this summary will be read by an AI assistant to give it context; and excessive use of "they" or "them" will make what they refer to unclear or ambiguous.
* The assistant should be referred to as "I" or "me", because this summary will be read by an AI assistant to give it context.
* The new summary may omit details present in the old summary, provided that the messages that were exchanged since that summary was produced indictae that those details are becoming less relevant to the continuation of the conversation.

<running_summary>
  ${previousRunningSummary}
</running_summary>
`;

const runningSummaryUserPrompt = ({
  messagesSincePreviousRunningSummary,
  mainResponseContent,
}: {
  messagesSincePreviousRunningSummary: Array<DraftMessage>;
  mainResponseContent: string;
}) =>
  `${messagesSincePreviousRunningSummary.map(
    (message) =>
      `<${message.role}_message>${message.content}</${message.role}_message>`,
  )}
<assistant_response>
${mainResponseContent}
</assistant_response>

Generate a new running summary of the conversation.`;

export const chat = router({
  conversations,
  messages,
  facts,
  sendMessage: publicProcedure
    .input(
      (x) =>
        x as {
          conversationId: string;
          messages: Array<DraftMessage | CommittedMessage>;
          systemPrompt: string;
          parameters: OtherParameters;
        },
    )
    .mutation(
      async ({
        input: { conversationId, messages, systemPrompt, parameters },
      }) => {
        /** TODO: Save all unsaved messages (i.e. those without an `id`) to the
         * database. Is this dangerous? Can an attacker just send a bunch of
         * messages, omitting the ids, causing me to save a bunch of them to the
         * database? I guess it's no worse than starting new converations, which
         * anyone can freely do. */
        const previousRunningSummaryIndex = messages.findLastIndex(
          (message) =>
            typeof (message as CommittedMessage).runningSummary !== "undefined",
        );
        const previousRunningSummary =
          previousRunningSummaryIndex >= 0
            ? ((messages[previousRunningSummaryIndex] as CommittedMessage)
                .runningSummary as string)
            : "";
        const messagesSincePreviousRunningSummary = messages.slice(
          previousRunningSummaryIndex + 1,
        );
        /** Save the incoming message to the database. */
        const insertedUserMessage: CommittedMessage = {
          id: nanoid(),
          conversationId,
          content: messages[messages.length - 1].content,
          role: "user" as const,
          index: messages.length - 1,
          createdAt: new Date().toISOString(),
        };
        db.data.messages.push(insertedUserMessage);
        // do not db.write() until the end

        /** Generate a new message from the model, but hold-off on adding it to
         * the database until we produce the associated running-summary, below.
         * The model should be given the conversation summary thus far, and of
         * course the user's latest message, unmodified. Invite the model to
         * create any tools it needs. The tool needs to be implemented in a
         * language which this system can execute; usually an interpretted
         * language like Python or JavaScript. */
        const mainResponse = await generateText({
          model: openrouter("mistralai/mistral-nemo"),
          messages: [
            previousRunningSummary === ""
              ? { role: "system" as const, content: systemPrompt }
              : {
                  role: "system" as const,
                  content: mainSystemPrompt({
                    systemPrompt,
                    previousRunningSummary,
                  }),
                },
            ...messagesSincePreviousRunningSummary,
          ],
          maxSteps: 3,
          tools: undefined,
          ...parameters,
        });
        /** Extract Facts from the user's message, and add them to the database,
         * linking the Facts with the messages they came from. (Yes, this should
         * be done *after* the model response, not before; because when we run a
         * query to find Facts to inject into the context sent to the model, we
         * don't want Facts from the user's current message to be candidates for
         * injection, because we're sending the user's message unadulterated to
         * the model; there's no reason to inject the same Facts that the model is
         * already using to generate its response.) */
        const factsFromUserMessageResponse =
          await factsCaller.extractFromNewMessages({
            previousRunningSummary,
            messagesSincePreviousRunningSummary: [],
            newMessages: messagesSincePreviousRunningSummary,
          });
        const insertedFactsFromUserMessage: Array<Fact> =
          factsFromUserMessageResponse.object.facts.map((fact) => ({
            id: nanoid(),
            userId: "1",
            sourceMessageId: insertedUserMessage.id,
            content: fact,
            createdAt: new Date().toISOString(),
          }));
        db.data.facts.push(...insertedFactsFromUserMessage);

        /** Produce a running summary of the conversation, and save that along
         * with the model's response to the database. The new running summary is
         * based on the previous running summary combined with the all messages
         * since that summary was produced. */
        const runningSummaryResponse = await generateText({
          model: openrouter("mistralai/mistral-nemo"),
          messages: [
            {
              role: "system" as const,
              content: runningSummarySystemPrompt({
                previousRunningSummary,
              }),
            },
            {
              role: "user" as const,
              content: runningSummaryUserPrompt({
                messagesSincePreviousRunningSummary,
                mainResponseContent: mainResponse.text,
              }),
            },
          ],
          maxSteps: 3,
          tools: undefined,
          ...parameters,
        });
        const insertedAssistantMessage: CommittedMessage = {
          id: nanoid(),
          conversationId,
          content: mainResponse.text,
          runningSummary: runningSummaryResponse.text,
          role: "assistant" as const,
          index: messages.length,
          createdAt: new Date().toISOString(),
        };
        db.data.messages.push(insertedAssistantMessage);
        /** Extract Facts from the model's response, and add them to the database,
         * linking the Facts with the messages they came from. */
        const factsFromAssistantMessageResponse =
          await factsCaller.extractFromNewMessages({
            previousRunningSummary,
            messagesSincePreviousRunningSummary,
            newMessages: [
              {
                role: "assistant" as const,
                content: mainResponse.text,
              },
            ],
          });

        const insertedFactsFromAssistantMessage: Array<Fact> =
          factsFromAssistantMessageResponse.object.facts.map((factContent) => ({
            id: nanoid(),
            userId: "1",
            sourceMessageId: insertedAssistantMessage.id,
            content: factContent,
            createdAt: new Date().toISOString(),
          }));
        db.data.facts.push(...insertedFactsFromAssistantMessage);

        const insertedFacts = [
          ...insertedFactsFromUserMessage,
          ...insertedFactsFromAssistantMessage,
        ];

        /** For each Fact produced in the two fact-extraction steps, generate
         * FactTriggers and add them to the database, linking the FactTriggers
         * with the Facts they came from. A FactTrigger is a natural language
         * phrase that describes a situation in which it would be useful to invoke
         * the Fact. (e.g., "When food preferences are discussed"). */
        for (const fact of insertedFacts) {
          const factTriggers = await generateObject<{
            factTriggers: Array<string>;
          }>({
            model: openrouter("mistralai/mistral-nemo"),
            messages: [
              {
                role: "system" as const,
                content: factTriggersSystemPrompt({
                  previousRunningSummary,
                  messagesSincePreviousRunningSummary,
                  mainResponseContent: mainResponse.text,
                }),
              },
              {
                role: "user" as const,
                content: factTriggersUserPrompt({
                  factContent: fact.content,
                }),
              },
            ],
            schema: jsonSchema({
              type: "object",
              properties: {
                factTriggers: {
                  type: "array",
                  items: {
                    type: "string",
                  },
                },
              },
            }),
            maxSteps: 3,
            tools: undefined,
            ...parameters,
          });
          const insertedFactTriggers: Array<FactTrigger> =
            factTriggers.object.factTriggers.map((factTrigger) => ({
              id: nanoid(),
              sourceFactId: fact.id,
              content: factTrigger,
              priorityMultiplier: 1,
              priorityMultiplierReason: "",
              scopeConversationId: conversationId,
              createdAt: new Date().toISOString(),
            }));
          db.data.factTriggers.push(...insertedFactTriggers);
        }

        await db.write();

        return {
          insertedAssistantMessage,
          insertedUserMessage,
          insertedFacts,
        };
      },
    ),
});

export const createCaller = createCallerFactory(chat);