diff --git a/README.md b/README.md index ba113ac..79e1469 100644 --- a/README.md +++ b/README.md @@ -31,38 +31,38 @@ npm install --save @vectara/stream-query-client Then use it in your application like this: ```js -import { - streamQuery, - StreamUpdate, - StreamQueryConfig, -} from "@vectara/stream-query-client"; +import { streamQueryV2, ApiV2 } from "@vectara/stream-query-client"; const sendQuery = async () => { - const configurationOptions: StreamQueryConfig = { + const configurationOptions: ApiV2.StreamQueryConfig = { // Required fields. customerId: "customerIdValue", - corpusIds: ["2"], apiKey: "zqt_apiKeyValue", - - // Optional fields. - queryValue: "Who put the ram in the ramalamadingdong?", - summaryNumResults: 5, - language: "eng", - debug: true, - enableFactualConsistencyScore: true, - summaryPromptName: "summary-prompt-name", + corpusKey: "corpora_1", + query: "How many coconuts can an African swallow carry?", + search: { + offset: 0, + limit: 5, + metadataFilter: "", + }, + generation: { + maxUsedSearchResults: 5, + responseLanguage: "eng", + enableFactualConsistencyScore: true, + promptName: "summary-prompt-name", + }, }; - const onStreamUpdate = (update: StreamUpdate) => { + const onStreamUpdate = (update: ApiV2.StreamUpdate) => { // Perform operations on returned data, e.g. update state. console.log(update.updatedText); }; - streamQuery(configurationOptions, onStreamUpdate); + streamQueryV2(configurationOptions, onStreamUpdate); }; ``` -For more information on configuration options and callback types, see [the type definitions](src/types.ts) and our [Query API documentation](https://docs.vectara.com/docs/api-reference/search-apis/stream-query). +For more information on configuration options and callback types, see [the type definitions](src/apiV2/types.ts) and our [Query API documentation](https://docs.vectara.com/docs/rest-api/query). ## License diff --git a/docs/package-lock.json b/docs/package-lock.json index 229f69d..1142950 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -14,7 +14,7 @@ }, "..": { "name": "@vectara/stream-query-client", - "version": "2.0.2", + "version": "2.1.0", "license": "Apache-2.0", "devDependencies": { "@types/jest": "^29.5.11", diff --git a/docs/src/index.tsx b/docs/src/index.tsx index 50ac312..5d3d6aa 100644 --- a/docs/src/index.tsx +++ b/docs/src/index.tsx @@ -1,68 +1,152 @@ import { useState } from "react"; import ReactDOM from "react-dom"; import { - streamQuery, - StreamUpdate, - StreamQueryConfig, + streamQueryV1, + ApiV1, + streamQueryV2, + ApiV2, } from "@vectara/stream-query-client"; +// TODO: Switch back to prod values before merging +const CUSTOMER_ID = "3099635174"; +const API_KEY = "zqt_uMCt5uGR7CXARu7QHg7GDYNG5Q5v58HOpvQO0A"; +const CORPUS_NAME = "markdown"; +const CORPUS_ID = "203"; + +// const CUSTOMER_ID = "1526022105"; +// const API_KEY = "zqt_WvU_2ewh7ZGRwq8LdL2SV8B9RJmVGyUm1VAuOw"; +// const CORPUS_NAME = "ofer-bm-moma-docs"; +// const CORPUS_ID = "232"; + const App = () => { - const [question, setQuestion] = useState(""); - const [answer, setAnswer] = useState(); - const [conversationId, setConversationId] = useState(); + const [question, setQuestion] = useState("markdown"); + const [answerV1, setAnswerV1] = useState(); + const [resultsV1, setResultsV1] = useState(); + const [answerV2, setAnswerV2] = useState(); + const [resultsV2, setResultsV2] = useState(); + const [conversationIdV1, setConversationIdV1] = useState(); + const [conversationIdV2, setConversationIdV2] = useState(); - const sendQuery = async () => { - const configurationOptions: StreamQueryConfig = { + const sendQueryV1 = async () => { + const configurationOptions: ApiV1.StreamQueryConfig = { // Required fields. - customerId: "1366999410", - corpusIds: ["1"], - apiKey: "zqt_UXrBcnI2UXINZkrv4g1tQPhzj02vfdtqYJIDiA", + customerId: CUSTOMER_ID, + corpusIds: [CORPUS_ID], + apiKey: API_KEY, // Optional fields. queryValue: question, - summaryNumResults: 5, + summaryNumResults: 2, language: "eng", chat: { store: true, - conversationId, + conversationId: conversationIdV1, }, debug: true, enableFactualConsistencyScore: true, summaryPromptName: "vectara-experimental-summary-ext-2023-12-11-large", }; - const onStreamUpdate = (update: StreamUpdate) => { - console.log(update); - const { updatedText, details } = update; - if (details.chat) { - setConversationId(details.chat.conversationId); + const onStreamUpdate = (update: ApiV1.StreamUpdate) => { + // console.log("v1", update); + const { updatedText, details, references } = update; + + if (details?.chat) { + setConversationIdV1(details.chat.conversationId); + } + + setAnswerV1(updatedText); + + if (references) { + setResultsV1(JSON.stringify(references)); } - setAnswer(updatedText); }; - streamQuery(configurationOptions, onStreamUpdate); + streamQueryV1(configurationOptions, onStreamUpdate); + }; + + const sendQueryV2 = async () => { + const configurationOptions: ApiV2.StreamQueryConfig = { + customerId: CUSTOMER_ID, + apiKey: API_KEY, + query: question, + corpusKey: `${CORPUS_NAME}_${CORPUS_ID}`, + search: { + offset: 0, + metadataFilter: "", + limit: 2, + lexicalInterpolation: 0, + contextConfiguration: { + sentencesBefore: 2, + sentencesAfter: 2, + }, + }, + generation: { + maxUsedSearchResults: 5, + responseLanguage: "eng", + enableFactualConsistencyScore: true, + promptName: "vectara-experimental-summary-ext-2023-12-11-large", + }, + chat: { + store: true, + conversationId: conversationIdV2, + }, + }; + + const onStreamEvent = (event: ApiV2.StreamEvent) => { + const { updatedText, chatId, searchResults } = event; + if (chatId) { + setConversationIdV2(chatId); + } + + if (updatedText) { + setAnswerV2(updatedText); + } + + if (searchResults) { + setResultsV2(JSON.stringify(searchResults)); + } + + if (event.type === "error") { + console.log("Error", event.messages); + } + }; + + streamQueryV2(configurationOptions, onStreamEvent); }; return ( <> -

Stream Query Client

-

Question

setQuestion(e.target.value)} /> -

Answer

+
+
+

Stream Query Client v1 answer

+

{resultsV1}

+

{answerV1}

+
-

{answer}

+
+

Stream Query Client v2 answer

+

{resultsV2}

+

{answerV2}

+
+
); }; diff --git a/src/index.mocks.ts b/src/apiV1/client.mocks.ts similarity index 100% rename from src/index.mocks.ts rename to src/apiV1/client.mocks.ts diff --git a/src/index.test.ts b/src/apiV1/client.test.ts similarity index 83% rename from src/index.test.ts rename to src/apiV1/client.test.ts index 24795dd..9eea40e 100644 --- a/src/index.test.ts +++ b/src/apiV1/client.test.ts @@ -1,13 +1,22 @@ import { SetupServerApi } from "msw/node"; -import { streamQuery, StreamUpdate, StreamQueryConfig } from "./index"; -import { createStreamingServer } from "./createStreamingServer"; -import { chunks } from "./index.mocks"; +import { streamQueryV1 } from "./client"; +import { StreamQueryConfig, StreamUpdate } from "./types"; +import { createTestStreamingServer } from "../common/createTestStreamingServer"; +import { chunks } from "./client.mocks"; -describe("stream-query-client", () => { +const encoder = new TextEncoder(); + +describe("stream-query-client API v1", () => { let server: SetupServerApi; beforeAll(async () => { - server = createStreamingServer(chunks); + server = createTestStreamingServer( + "/v1/stream-query", + chunks, + (json: any) => { + return encoder.encode(JSON.stringify(json)); + } + ); await server.listen(); }); @@ -44,7 +53,7 @@ describe("stream-query-client", () => { handleUpdate(update); }; - await streamQuery(configurationOptions, onStreamUpdate); + await streamQueryV1(configurationOptions, onStreamUpdate); expect(handleUpdate).toHaveBeenNthCalledWith(1, { references: [ diff --git a/src/client.ts b/src/apiV1/client.ts similarity index 62% rename from src/client.ts rename to src/apiV1/client.ts index 863d24d..5cd6946 100644 --- a/src/client.ts +++ b/src/apiV1/client.ts @@ -6,11 +6,12 @@ import { StreamUpdateHandler, } from "./types"; import { deserializeSearchResponse } from "./deserializeSearchResponse"; +import { processStreamChunk } from "./processStreamChunk"; import { SNIPPET_START_TAG, SNIPPET_END_TAG } from "./constants"; +import { DEFAULT_DOMAIN } from "../common/constants"; +import { generateStream } from "../common/generateStream"; -const DEFAULT_ENDPOINT = "api.vectara.io"; - -export const streamQuery = async ( +export const streamQueryV1 = async ( config: StreamQueryConfig, onStreamUpdate: StreamUpdateHandler ) => { @@ -90,53 +91,47 @@ export const streamQuery = async ( ], }); - const stream = await generateStream( - requestHeaders, - requestBody, - config.endpoint ?? DEFAULT_ENDPOINT - ); + const url = config.endpoint ?? `${DEFAULT_DOMAIN}/v1/stream-query`; + + const { stream } = await generateStream(requestHeaders, requestBody, url); let previousAnswerText = ""; for await (const chunk of stream) { try { - const parts = chunk.split("\n"); + processStreamChunk(chunk, (part) => { + const dataObj = JSON.parse(part); - parts - .filter((part) => part !== "") - .forEach((part) => { - const dataObj = JSON.parse(part); + if (!dataObj.result) return; - if (!dataObj.result) return; + const details: StreamUpdate["details"] = {}; - const details: StreamUpdate["details"] = {}; + const summaryDetail = getSummaryDetail(config, dataObj.result); + if (summaryDetail) { + details.summary = summaryDetail; + } - const summaryDetail = getSummaryDetail(config, dataObj.result); - if (summaryDetail) { - details.summary = summaryDetail; - } + const chatDetail = getChatDetail(config, dataObj.result); + if (chatDetail) { + details.chat = chatDetail; + } - const chatDetail = getChatDetail(config, dataObj.result); - if (chatDetail) { - details.chat = chatDetail; - } + const fcsDetail = getFactualConsistencyDetail(dataObj.result); + if (fcsDetail) { + details.factualConsistency = fcsDetail; + } - const fcsDetail = getFactualConsistencyDetail(dataObj.result); - if (fcsDetail) { - details.factualConsistency = fcsDetail; - } + const streamUpdate: StreamUpdate = { + references: deserializeSearchResponse(dataObj.result.responseSet), + details, + updatedText: getUpdatedText(dataObj.result, previousAnswerText), + isDone: dataObj.result.summary?.done ?? false, + }; - const streamUpdate: StreamUpdate = { - references: deserializeSearchResponse(dataObj.result.responseSet), - details, - updatedText: getUpdatedText(dataObj.result, previousAnswerText), - isDone: dataObj.result.summary?.done ?? false, - }; + previousAnswerText = streamUpdate.updatedText ?? ""; - previousAnswerText = streamUpdate.updatedText ?? ""; - - onStreamUpdate(streamUpdate); - }); + onStreamUpdate(streamUpdate); + }); } catch (error) {} } }; @@ -185,34 +180,3 @@ const getUpdatedText = (parsedResult: ParsedResult, previousText: string) => { return `${previousText}${parsedResult.summary.text}`; }; - -const generateStream = async ( - requestHeaders: Record, - requestBody: string, - endpoint: string -): Promise> => { - const response = await fetch(`https://${endpoint}/v1/stream-query`, { - method: "POST", - headers: requestHeaders, - body: requestBody, - }); - if (response.status !== 200) throw new Error(response.status.toString()); - if (!response.body) throw new Error("Response body does not exist"); - return getIterableStream(response.body); -}; - -async function* getIterableStream( - body: ReadableStream -): AsyncIterable { - const reader = body.getReader(); - const decoder = new TextDecoder(); - - while (true) { - const { value, done } = await reader.read(); - if (done) { - break; - } - const decodedChunk = decoder.decode(value, { stream: true }); - yield decodedChunk; - } -} diff --git a/src/constants.ts b/src/apiV1/constants.ts similarity index 100% rename from src/constants.ts rename to src/apiV1/constants.ts diff --git a/src/deserializeSearchResponse.ts b/src/apiV1/deserializeSearchResponse.ts similarity index 99% rename from src/deserializeSearchResponse.ts rename to src/apiV1/deserializeSearchResponse.ts index 8c58d16..97c79ca 100644 --- a/src/deserializeSearchResponse.ts +++ b/src/apiV1/deserializeSearchResponse.ts @@ -1,5 +1,4 @@ import { SNIPPET_START_TAG, SNIPPET_END_TAG } from "./constants"; - import { DeserializedSearchResult, DocMetadata, SearchResponse } from "./types"; /** diff --git a/src/apiV1/index.ts b/src/apiV1/index.ts new file mode 100644 index 0000000..3ccfb2a --- /dev/null +++ b/src/apiV1/index.ts @@ -0,0 +1 @@ +export { streamQueryV1 } from "./client"; diff --git a/src/apiV1/processStreamChunk.ts b/src/apiV1/processStreamChunk.ts new file mode 100644 index 0000000..c1b241f --- /dev/null +++ b/src/apiV1/processStreamChunk.ts @@ -0,0 +1,8 @@ +export const processStreamChunk = ( + chunk: string, + callback: (part: string) => void +) => { + const parts = chunk.split("\n"); + + parts.filter((part) => part !== "").forEach(callback); +}; diff --git a/src/types.ts b/src/apiV1/types.ts similarity index 88% rename from src/types.ts rename to src/apiV1/types.ts index e5d9501..e2f5b61 100644 --- a/src/types.ts +++ b/src/apiV1/types.ts @@ -1,57 +1,9 @@ -export type Summary = { - prompt?: string; -}; - -export type Chat = { - conversationId: string; - turnId: string; - // Debug-only - rephrasedQuery?: string; -}; - -export type FactualConsistency = { - score: number; -}; - -export type DeserializedSearchResult = { - id: string; - snippet: { - pre: string; - text: string; - post: string; - }; - source: string; - url?: string; - title?: string; - metadata: Record; -}; - -// A subset of the Vectara query response, in parsed form. -// This types only data relevant to stream processing -export type ParsedResult = { - responseSet: { - document: Array<{ - id: string; - metadata: Array; - }>; - response: Array<{ - corpusKey: { corpusId: number }; - documentIndex: number; - score: number; - text: string; - }>; - }; - summary?: { - chat?: Chat; - factualConsistency?: FactualConsistency; - done: boolean; - text: string; - // Debug-only - prompt?: string; - }; -}; +import { SummaryLanguage } from "../common/types"; export type StreamQueryConfig = { + // IDs of Vectara corpora to include in the query + corpusIds: Array; + filter?: string; // The query to send to the API. @@ -96,9 +48,6 @@ export type StreamQueryConfig = { // The customer ID of the Vectara corpora owner customerId: string; - // IDs of Vectara corpora to include in the query - corpusIds: Array; - // The Vectara query API key for provided corpus IDs apiKey: string; @@ -122,6 +71,59 @@ type ChatConfig = { conversationId?: string; }; +export type Summary = { + prompt?: string; +}; + +export type Chat = { + conversationId: string; + turnId: string; + // Debug-only + rephrasedQuery?: string; +}; + +export type FactualConsistency = { + score: number; +}; + +export type DeserializedSearchResult = { + id: string; + snippet: { + pre: string; + text: string; + post: string; + }; + source: string; + url?: string; + title?: string; + metadata: Record; +}; + +// A subset of the Vectara query response, in parsed form. +// This types only data relevant to stream processing +export type ParsedResult = { + responseSet: { + document: Array<{ + id: string; + metadata: Array; + }>; + response: Array<{ + corpusKey: { corpusId: number }; + documentIndex: number; + score: number; + text: string; + }>; + }; + summary?: { + chat?: Chat; + factualConsistency?: FactualConsistency; + done: boolean; + text: string; + // Provided only when debug: true + prompt?: string; + }; +}; + export type StreamUpdate = { // A list of references that apply to the query response. references?: Array; @@ -176,37 +178,3 @@ export type DocMetadata = { name: string; value: string; }; - -export const SUMMARY_LANGUAGES = [ - "auto", - "eng", "en", - "deu", "de", - "fra", "fr", - "zho", "zh", - "kor", "ko", - "ara", "ar", - "rus", "ru", - "tha", "th", - "nld", "nl", - "ita", "it", - "por", "pt", - "spa", "es", - "jpn", "ja", - "pol", "pl", - "tur", "tr", - "heb", "he", - "vie", "vi", - "ind", "id", - "ces", "cs", - "ukr", "uk", - "ell", "el", - "fas", "fa", - "hin", "hi", - "urd", "ur", - "swe", "sv", - "ben", "bn", - "msa", "ms", - "ron", "ro" -] as const; - -export type SummaryLanguage = (typeof SUMMARY_LANGUAGES)[number]; diff --git a/src/apiV2/EventBuffer.test.ts b/src/apiV2/EventBuffer.test.ts new file mode 100644 index 0000000..dd1e98c --- /dev/null +++ b/src/apiV2/EventBuffer.test.ts @@ -0,0 +1,72 @@ +import { EventBuffer } from "./EventBuffer"; + +describe.skip("EventBuffer", () => { + test("handles multiple events within a single chunk", () => { + const onStreamEvent = jest.fn(); + const buffer = new EventBuffer(onStreamEvent); + + buffer.consumeChunk(` +event:error +data:{"type":"error","messages":["INVALID_ARGUMENT: The filter expression contains an error. Syntax error at 1:0 nc79bc8s must be referenced as doc.nc79bc8s or part.nc79bc8s"]} + +event:end +data:{"type":"end"} + `); + + expect(onStreamEvent).toHaveBeenNthCalledWith(1, { + type: "error", + messages: [ + "INVALID_ARGUMENT: The filter expression contains an error. Syntax error at 1:0 nc79bc8s must be referenced as doc.nc79bc8s or part.nc79bc8s", + ], + }); + + expect(onStreamEvent).toHaveBeenNthCalledWith(2, { type: "end" }); + }); + + test("handles multiple chunks composing a single event", () => { + const onStreamEvent = jest.fn(); + const buffer = new EventBuffer(onStreamEvent); + + buffer.consumeChunk(` +event:search_results +data:{"type":"search_results", + `); + + buffer.consumeChunk(` +"search_results":[ + `); + + buffer.consumeChunk(` +{"id":"doc1"}]} + `); + + expect(onStreamEvent).toHaveBeenCalledWith({ + type: "searchResults", + searchResults: [{ id: "doc1" }], + }); + }); + + test("handles multiple events, each within its own chunk", () => { + const onStreamEvent = jest.fn(); + const buffer = new EventBuffer(onStreamEvent); + + buffer.consumeChunk(` +event:error +data:{"type":"error","messages":["INVALID_ARGUMENT: The filter expression contains an error. Syntax error at 1:0 nc79bc8s must be referenced as doc.nc79bc8s or part.nc79bc8s"]} + `); + + buffer.consumeChunk(` +event:end +data:{"type":"end"} + `); + + expect(onStreamEvent).toHaveBeenNthCalledWith(1, { + type: "error", + messages: [ + "INVALID_ARGUMENT: The filter expression contains an error. Syntax error at 1:0 nc79bc8s must be referenced as doc.nc79bc8s or part.nc79bc8s", + ], + }); + + expect(onStreamEvent).toHaveBeenNthCalledWith(2, { type: "end" }); + }); +}); diff --git a/src/apiV2/EventBuffer.ts b/src/apiV2/EventBuffer.ts new file mode 100644 index 0000000..510573d --- /dev/null +++ b/src/apiV2/EventBuffer.ts @@ -0,0 +1,113 @@ +import { StreamEvent } from "./types"; + +export class EventBuffer { + private events: StreamEvent[]; + private onStreamEvent: (event: StreamEvent) => void; + private eventInProgress: string = ""; + private updatedText: string = ""; + + constructor(onStreamEvent: (event: any) => void) { + this.events = []; + this.onStreamEvent = onStreamEvent; + } + + consumeChunk(chunk: string) { + // A chunk might consist of multiple updates, or part of a single update. + const parts = chunk.split("\n"); + + parts.forEach((part: string) => { + // Skip empty lines. + if (part.trim().length === 0) return; + + // Skip "header" lines. + if (part.indexOf("event:") === 0) return; + + // Beginning of an event. + if (part.indexOf("data:") === 0) { + // Trim the "data:" prefix to get the JSON data itself. + this.eventInProgress = part.slice(5, part.length); + } else { + // Partial event. + this.eventInProgress += part; + } + + try { + // If we can parse the JSON, it's complete. + const rawEvent = JSON.parse(this.eventInProgress); + this.enqueueEvent(rawEvent); + this.eventInProgress = ""; + } catch {} + }); + + this.drainEvents(); + } + + private enqueueEvent(rawEvent: any) { + const { + type, + messages, + search_results, + chat_id, + turn_id, + factual_consistency_score, + generation_chunk, + } = rawEvent; + + switch (type) { + case "error": + this.events.push({ + type: "error", + messages, + }); + break; + + case "search_results": + this.events.push({ + type: "searchResults", + searchResults: search_results, + }); + break; + + case "chat_info": + this.events.push({ + type: "chatInfo", + chatId: chat_id, + turnId: turn_id, + }); + break; + + case "generation_chunk": + this.updatedText += generation_chunk; + this.events.push({ + type: "generationChunk", + updatedText: this.updatedText, + generationChunk: generation_chunk, + }); + break; + + case "factual_consistency_score": + this.events.push({ + type: "factualConsistencyScore", + factualConsistencyScore: factual_consistency_score, + }); + break; + + case "end": + this.events.push({ + type: "end", + }); + break; + + default: + console.log(`Unhandled event: ${type}`, dataObj); + } + } + + private drainEvents() { + // Emit all events that are complete and reset the queue. + this.events.forEach((event) => { + this.onStreamEvent(event); + }); + this.events = []; + } +} diff --git a/src/apiV2/apiTypes.ts b/src/apiV2/apiTypes.ts new file mode 100644 index 0000000..00dc248 --- /dev/null +++ b/src/apiV2/apiTypes.ts @@ -0,0 +1,98 @@ +import { SummaryLanguage } from "../common/types"; + +export type CustomerSpecificReranker = { + type: "customer_reranker"; + reranker_id: string; +}; + +export type MmrReranker = { + type: "mmr"; + diversity_bias: number; +}; + +export type SearchConfiguration = { + corpora: { + corpus_key: string; + metadata_filter?: string; + lexical_interpolation?: number; + custom_dimensions?: Record; + semantics?: "default" | "query" | "response"; + }[]; + offset: number; + limit?: number; + context_configuration?: { + characters_before?: number; + characters_after?: number; + sentences_before?: number; + sentences_after?: number; + start_tag?: string; + end_tag?: string; + }; + reranker?: CustomerSpecificReranker | MmrReranker; +}; + +export type NoneCitations = { + style: "none"; +}; + +export type NumericCitations = { + style: "numeric"; +}; + +export type HtmlCitations = { + style: "html"; + url_pattern: string; + text_pattern: string; +}; + +export type MarkdownCitations = { + style: "markdown"; + url_pattern: string; + text_pattern: string; +}; + +export type GenerationConfiguration = { + prompt_name?: string; + max_used_search_results?: number; + prompt_text?: string; + max_response_characters?: number; + response_language?: SummaryLanguage; + model_parameters?: { + max_tokens: number; + temperature: number; + frequency_penalty: number; + presence_penalty: number; + }; + citations?: + | NoneCitations + | NumericCitations + | HtmlCitations + | MarkdownCitations; + enable_factual_consistency_score?: boolean; +}; + +export type ChatConfiguration = { + store?: boolean; + conversation_id?: string; +}; + +export type QueryBody = { + query: string; + search: SearchConfiguration; + stream_response?: boolean; + generation?: GenerationConfiguration; + chat?: ChatConfiguration; +}; + +export type SearchResult = { + document_id: string; + text: string; + score: number; + part_metadata: { + lang: string; + section: number; + offset: number; + len: number; + }; + document_metadata: Record; +}; diff --git a/src/apiV2/client.mocks.ts b/src/apiV2/client.mocks.ts new file mode 100644 index 0000000..b5f6d9e --- /dev/null +++ b/src/apiV2/client.mocks.ts @@ -0,0 +1,21 @@ +// Search results. +const chunk1 = `event:search_results +data:{"type":"search_results","search_results":[{"text":"(If you're not a Markdown Here user, check out the Markdown Cheatsheet that is not specific to MDH. But, really, you should also use Markdown Here, because it's awesome. http://markdown-here.com)","score":0.7467775344848633,"part_metadata":{"lang":"eng","offset":648,"len":25},"document_metadata":{"date":"2021-02-25T10:03:47Z","Total-Time":6,"extended-properties:AppVersion":12.0,"meta:paragraph-count":8,"meta:word-count":83,"Word-Count":83,"dcterms:created":"2021-02-25T10:03:47Z","meta:line-count":12,"dcterms:modified":"2021-02-25T10:03:47Z","Last-Modified":"2021-02-25T10:03:47Z","Last-Save-Date":"2021-02-25T10:03:47Z","meta:character-count":475,"Template":"Normal.dotm","Line-Count":12,"Paragraph-Count":8,"meta:save-date":"2021-02-25T10:03:47Z","meta:character-count-with-spaces":583,"Application-Name":"Microsoft Word 12.0.0","extended-properties:TotalTime":6,"modified":"2021-02-25T10:03:47Z","Content-Type":"application/vnd.openxmlformats-officedocument.wordprocessingml.document","X-Parsed-By":"org.apache.tika.parser.microsoft.ooxml.OOXMLParser","meta:creation-date":"2021-02-25T10:03:47Z","extended-properties:Application":"Microsoft Word 12.0.0","Creation-Date":"2021-02-25T10:03:47Z","xmpTPg:NPages":1,"Character-Count-With-Spaces":583,"Character Count":475,"Page-Count":1,"Application-Version":12.0,"extended-properties:Template":"Normal.dotm","extended-properties:DocSecurityString":"None","meta:page-count":1},"document_id":"914e8885-1a65-4b56-a279-95661b264f3b"}]}`; + +// Chat info. +const chunk2 = `event:chat_info +data:{"type":"chat_info","chat_id":"cht_74b5a5f3-1f51-4427-a317-f62efb493928","turn_id":"trn_74b5a5f3-1f51-4427-a317-f62efb493928"}`; + +// Generation. +const chunk3 = `event:generation_chunk +data:{"type":"generation_chunk","generation_chunk":"Markdown is "}`; + +// FCS. +const chunk4 = `event:factual_consistency_score +data:{"type":"factual_consistency_score","factual_consistency_score":0.41796625}`; + +// // End. +const chunk5 = `event:end +data:{"type":"end"}`; + +export const chunks = [chunk1, chunk2, chunk3, chunk4, chunk5]; diff --git a/src/apiV2/client.test.ts b/src/apiV2/client.test.ts new file mode 100644 index 0000000..a100a6a --- /dev/null +++ b/src/apiV2/client.test.ts @@ -0,0 +1,131 @@ +import { SetupServerApi } from "msw/node"; +import { streamQueryV2 } from "./client"; +import { StreamQueryConfig, StreamEvent } from "./types"; +import { createTestStreamingServer } from "../common/createTestStreamingServer"; +import { chunks } from "./client.mocks"; + +const encoder = new TextEncoder(); + +describe("stream-query-client API v2", () => { + let server: SetupServerApi; + + beforeAll(async () => { + server = createTestStreamingServer("/v2/chats", chunks, (value: any) => { + return encoder.encode(value); + }); + await server.listen(); + }); + + afterEach(() => { + server.resetHandlers(); + }); + + afterAll(() => { + server.close(); + }); + + it("streamQuery converts streamed chunks into usable data", async () => { + const configurationOptions: StreamQueryConfig = { + customerId: "1366999410", + apiKey: "zqt_UXrBcnI2UXINZkrv4g1tQPhzj02vfdtqYJIDiA", + corpusKey: "1", + query: "test query", + search: { + offset: 0, + limit: 5, + metadataFilter: "", + }, + generation: { + maxUsedSearchResults: 5, + responseLanguage: "eng", + enableFactualConsistencyScore: true, + promptName: "vectara-experimental-summary-ext-2023-12-11-large", + }, + chat: { + store: true, + }, + }; + + const handleEvent = jest.fn(); + + const onStreamEvent = (event: StreamEvent) => { + handleEvent(event); + + if (event.type === "end") { + expect(handleEvent).toHaveBeenNthCalledWith(1, { + type: "searchResults", + searchResults: [ + { + text: "(If you're not a Markdown Here user, check out the Markdown Cheatsheet that is not specific to MDH. But, really, you should also use Markdown Here, because it's awesome. http://markdown-here.com)", + score: 0.7467775344848633, + document_metadata: { + "Application-Name": "Microsoft Word 12.0.0", + "Application-Version": 12, + "Character Count": 475, + "Character-Count-With-Spaces": 583, + "Content-Type": + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "Creation-Date": "2021-02-25T10:03:47Z", + "Last-Modified": "2021-02-25T10:03:47Z", + "Last-Save-Date": "2021-02-25T10:03:47Z", + "Line-Count": 12, + "Page-Count": 1, + "Paragraph-Count": 8, + Template: "Normal.dotm", + "Total-Time": 6, + "Word-Count": 83, + "X-Parsed-By": + "org.apache.tika.parser.microsoft.ooxml.OOXMLParser", + date: "2021-02-25T10:03:47Z", + "dcterms:created": "2021-02-25T10:03:47Z", + "dcterms:modified": "2021-02-25T10:03:47Z", + "extended-properties:AppVersion": 12, + "extended-properties:Application": "Microsoft Word 12.0.0", + "extended-properties:DocSecurityString": "None", + "extended-properties:Template": "Normal.dotm", + "extended-properties:TotalTime": 6, + "meta:character-count": 475, + "meta:character-count-with-spaces": 583, + "meta:creation-date": "2021-02-25T10:03:47Z", + "meta:line-count": 12, + "meta:page-count": 1, + "meta:paragraph-count": 8, + "meta:save-date": "2021-02-25T10:03:47Z", + "meta:word-count": 83, + modified: "2021-02-25T10:03:47Z", + "xmpTPg:NPages": 1, + }, + part_metadata: { + lang: "eng", + len: 25, + offset: 648, + }, + document_id: "914e8885-1a65-4b56-a279-95661b264f3b", + }, + ], + }); + + expect(handleEvent).toHaveBeenNthCalledWith(2, { + type: "chatInfo", + chatId: "cht_74b5a5f3-1f51-4427-a317-f62efb493928", + turnId: "trn_74b5a5f3-1f51-4427-a317-f62efb493928", + }); + + expect(handleEvent).toHaveBeenNthCalledWith(3, { + type: "generationChunk", + generationChunk: "Markdown is ", + updatedText: "Markdown is ", + }); + + expect(handleEvent).toHaveBeenNthCalledWith(4, { + type: "factualConsistencyScore", + factualConsistencyScore: 0.41796625, + }); + + expect(handleEvent).toHaveBeenNthCalledWith(5, { type: "end" }); + } + }; + + await streamQueryV2(configurationOptions, onStreamEvent); + }); +}); diff --git a/src/apiV2/client.ts b/src/apiV2/client.ts new file mode 100644 index 0000000..1fba3b4 --- /dev/null +++ b/src/apiV2/client.ts @@ -0,0 +1,174 @@ +import { + GenerationConfig, + StreamQueryConfig, + StreamEventHandler, +} from "./types"; +import { QueryBody } from "./apiTypes"; +import { DEFAULT_DOMAIN } from "../common/constants"; +import { generateStream } from "../common/generateStream"; +import { EventBuffer } from "./EventBuffer"; + +const convertReranker = ( + reranker?: StreamQueryConfig["search"]["reranker"] +) => { + if (!reranker) return; + + if (reranker.type === "customer_reranker") { + return { + type: reranker.type, + reranker_id: reranker.rerankerId, + }; + } + + if (reranker.type === "mmr") { + return { + type: reranker.type, + diversity_bias: reranker.diversityBias, + }; + } +}; + +const convertCitations = (citations?: GenerationConfig["citations"]) => { + if (!citations) return; + + if (citations.style === "none" || citations.style === "numeric") { + return { + style: citations.style, + }; + } + + if (citations.style === "html" || citations.style === "markdown") { + return { + style: citations.style, + url_pattern: citations.urlPattern, + text_pattern: citations.textPattern, + }; + } +}; + +export const streamQueryV2 = async ( + config: StreamQueryConfig, + onStreamEvent: StreamEventHandler +) => { + const { + customerId, + apiKey, + endpoint, + corpusKey, + query, + search: { + metadataFilter, + lexicalInterpolation, + customDimensions, + semantics, + offset, + limit, + contextConfiguration, + reranker, + }, + generation: { + promptName, + maxUsedSearchResults, + promptText, + maxResponseCharacters, + responseLanguage, + modelParameters, + citations, + } = {}, + chat, + } = config; + + let body: QueryBody = { + query, + search: { + corpora: [ + { + corpus_key: corpusKey, + metadata_filter: metadataFilter, + lexical_interpolation: lexicalInterpolation, + custom_dimensions: customDimensions, + semantics, + }, + ], + offset, + limit, + context_configuration: { + characters_before: contextConfiguration?.charactersBefore, + characters_after: contextConfiguration?.charactersAfter, + sentences_before: contextConfiguration?.sentencesBefore, + sentences_after: contextConfiguration?.sentencesAfter, + start_tag: contextConfiguration?.startTag, + end_tag: contextConfiguration?.endTag, + }, + reranker: convertReranker(reranker), + }, + generation: { + prompt_name: promptName, + max_used_search_results: maxUsedSearchResults, + prompt_text: promptText, + max_response_characters: maxResponseCharacters, + response_language: responseLanguage, + model_parameters: modelParameters && { + max_tokens: modelParameters.maxTokens, + temperature: modelParameters.temperature, + frequency_penalty: modelParameters.frequencyPenalty, + presence_penalty: modelParameters.presencePenalty, + }, + citations: convertCitations(citations), + }, + chat: chat && { + store: chat.store, + }, + stream_response: true, + }; + + let path; + + if (!chat) { + path = `/v2/query`; + } else { + if (chat.conversationId) { + path = `/v2/chats/${chat.conversationId}/turns`; + } else { + path = "/v2/chats"; + } + } + + const headers = { + "x-api-key": apiKey, + "customer-id": customerId, + "Content-Type": "application/json", + }; + + const url = `${endpoint ?? DEFAULT_DOMAIN}${path}`; + + try { + const { cancelStream, stream } = await generateStream( + headers, + JSON.stringify(body), + url + ); + + new Promise(async (resolve, reject) => { + try { + const buffer = new EventBuffer(onStreamEvent); + + for await (const chunk of stream) { + try { + buffer.consumeChunk(chunk); + } catch (error) { + console.log("error", error); + } + } + + resolve(undefined); + } catch (error) { + reject(error); + } + }); + + return { cancelStream }; + } catch (error) { + console.log("error", error); + } +}; diff --git a/src/apiV2/index.ts b/src/apiV2/index.ts new file mode 100644 index 0000000..17ce176 --- /dev/null +++ b/src/apiV2/index.ts @@ -0,0 +1 @@ +export { streamQueryV2 } from "./client"; diff --git a/src/apiV2/types.ts b/src/apiV2/types.ts new file mode 100644 index 0000000..d1fa0be --- /dev/null +++ b/src/apiV2/types.ts @@ -0,0 +1,151 @@ +import { SummaryLanguage } from "../common/types"; +import { SearchResult } from "./apiTypes"; + +export type GenerationConfig = { + // The preferred prompt to use, if applicable + promptName?: string; + // The number of search results to include in creating the summary + maxUsedSearchResults?: number; + // Custom prompt for summarization. + promptText?: string; + maxResponseCharacters?: number; + // The language the summary should be in. + responseLanguage?: SummaryLanguage; + modelParameters?: { + maxTokens: number; + temperature: number; + frequencyPenalty: number; + presencePenalty: number; + }; + citations?: + | { + style: "none" | "numeric"; + } + | { + style: "html" | "markdown"; + urlPattern: string; + textPattern: string; + }; + enableFactualConsistencyScore?: boolean; +}; + +export type StreamQueryConfig = { + // The customer ID of the Vectara corpora owner. + customerId: string; + + // The Vectara query API key that has access to the corpora you're querying. + apiKey: string; + + // An optional endpoint to send the query to. + // Used if proxying the Vectara API URL behind a custom server. + endpoint?: string; + + // The query to send to the API. This is the user input. + query: string; + + corpusKey: string; + + search: { + metadataFilter: string; + // A number from 0.0 -> 1.0 that determines how much to leverage neural search and keyword search. + // A value of 0.0 is purely neural search, where a value of 1.0 is purely keyword search. + // Numbers in between are a combination of the two, leaning one way or another. + lexicalInterpolation?: number; + customDimensions?: Record; + semantics?: "default" | "query" | "response"; + offset: number; + limit?: number; + contextConfiguration?: { + charactersBefore?: number; + charactersAfter?: number; + // For summary references, this is the number of sentences to include before/after + // relevant reference snippets. + sentencesBefore?: number; + // For summary references, this is the number of sentences to include before/after + // relevant reference snippets. + sentencesAfter?: number; + startTag?: string; + endTag?: string; + }; + reranker?: + | { + type: "customer_reranker"; + rerankerId: string; + } + | { + type: "mmr"; + // Diversity bias ranges from 0 to 1. + // 0 will optimize for results that are as closely related to the query as possible. + // 1 will optimize for results that are as diverse as possible. + diversityBias: number; + }; + }; + + generation?: GenerationConfig; + + chat?: { + store?: boolean; + conversationId?: string; + }; +}; + +export type Summary = { + prompt?: string; +}; + +export type Chat = { + conversationId: string; + turnId: string; + // Debug-only + rephrasedQuery?: string; +}; + +export type FactualConsistency = { + score: number; +}; + +export type StreamEvent = + | ErrorEvent + | SearchResultsEvent + | ChatInfoEvent + | GenerationChunkEvent + | FactualConsistencyScoreEvent + | EndEvent; + +export type ErrorEvent = { + type: "error"; + messages?: string[]; +}; + +export type SearchResultsEvent = { + type: "searchResults"; + searchResults: SearchResult[]; +}; + +export type ChatInfoEvent = { + type: "chatInfo"; + chatId: string; + turnId: string; +}; + +export type GenerationChunkEvent = { + type: "generationChunk"; + updatedText: string; + generationChunk: string; +}; + +export type FactualConsistencyScoreEvent = { + type: "factualConsistencyScore"; + factualConsistencyScore: number; +}; + +export type EndEvent = { + type: "end"; +}; + +export type StreamEventHandler = (event: StreamEvent) => void; + +export type DocMetadata = { + name: string; + value: string; +}; diff --git a/src/common/constants.ts b/src/common/constants.ts new file mode 100644 index 0000000..e65ecc4 --- /dev/null +++ b/src/common/constants.ts @@ -0,0 +1,2 @@ +// TODO: Switch back to prod values before merging +export const DEFAULT_DOMAIN = "https://api.vectara.dev"; diff --git a/src/createStreamingServer.ts b/src/common/createTestStreamingServer.ts similarity index 69% rename from src/createStreamingServer.ts rename to src/common/createTestStreamingServer.ts index 8295e4e..ecfc7eb 100644 --- a/src/createStreamingServer.ts +++ b/src/common/createTestStreamingServer.ts @@ -1,15 +1,14 @@ import { setupServer } from "msw/node"; import { http } from "msw"; +import { DEFAULT_DOMAIN } from "./constants"; -const encoder = new TextEncoder(); - -const createChunk = (json: any) => { - return encoder.encode(JSON.stringify(json)); -}; - -export const createStreamingServer = (chunks: any[]) => { +export const createTestStreamingServer = ( + path: string, + chunks: any[], + createChunk: (value: any) => any +) => { return setupServer( - http.post("https://api.vectara.io/v1/stream-query", () => { + http.post(`${DEFAULT_DOMAIN}${path}`, () => { const stream = new ReadableStream({ start(controller) { chunks.forEach((chunk) => { diff --git a/src/common/generateStream.ts b/src/common/generateStream.ts new file mode 100644 index 0000000..fff04ec --- /dev/null +++ b/src/common/generateStream.ts @@ -0,0 +1,38 @@ +export const generateStream = async ( + headers: Record, + body: string, + url: string +) => { + let controller = new AbortController(); + + const response = await fetch(url, { + method: "POST", + headers, + body, + signal: controller.signal, + }); + + if (response.status !== 200) throw new Error(response.status.toString()); + if (!response.body) throw new Error("Response body does not exist"); + + return { + stream: getIterableStream(response.body), + cancelStream: () => controller.abort(), + }; +}; + +async function* getIterableStream( + body: ReadableStream +): AsyncIterable { + const reader = body.getReader(); + const decoder = new TextDecoder(); + + while (true) { + const { value, done } = await reader.read(); + if (done) { + break; + } + const decodedChunk = decoder.decode(value, { stream: true }); + yield decodedChunk; + } +} diff --git a/src/common/types.ts b/src/common/types.ts new file mode 100644 index 0000000..977c70a --- /dev/null +++ b/src/common/types.ts @@ -0,0 +1,61 @@ +export const SUMMARY_LANGUAGES = [ + "auto", + "eng", + "en", + "deu", + "de", + "fra", + "fr", + "zho", + "zh", + "kor", + "ko", + "ara", + "ar", + "rus", + "ru", + "tha", + "th", + "nld", + "nl", + "ita", + "it", + "por", + "pt", + "spa", + "es", + "jpn", + "ja", + "pol", + "pl", + "tur", + "tr", + "heb", + "he", + "vie", + "vi", + "ind", + "id", + "ces", + "cs", + "ukr", + "uk", + "ell", + "el", + "fas", + "fa", + "hin", + "hi", + "urd", + "ur", + "swe", + "sv", + "ben", + "bn", + "msa", + "ms", + "ron", + "ro", +] as const; + +export type SummaryLanguage = (typeof SUMMARY_LANGUAGES)[number]; diff --git a/src/index.ts b/src/index.ts index 94c2f5e..5fd4e01 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,2 +1,11 @@ -export { streamQuery } from "./client"; -export * from "./types"; +// Creates namespace "ApiV1" +import * as ApiV1 from "./apiV1/types"; +export { ApiV1 }; + +export { streamQueryV1 } from "./apiV1/client"; + +// Creates namespace "ApiV2" +import * as ApiV2 from "./apiV2/types"; +export { ApiV2 }; + +export { streamQueryV2 } from "./apiV2/client";