From 43156f560bda2c994f351cac0bf5079a1a2e6aa1 Mon Sep 17 00:00:00 2001 From: Emma Hamilton Date: Fri, 14 Jun 2024 14:30:25 +1000 Subject: [PATCH] Add `treeFetchStrategy` to `createGitHubReader` --- .changeset/brown-fans-push.md | 5 + packages/keystatic/src/reader/github.ts | 185 ++++++++++++++++++++---- 2 files changed, 165 insertions(+), 25 deletions(-) create mode 100644 .changeset/brown-fans-push.md diff --git a/.changeset/brown-fans-push.md b/.changeset/brown-fans-push.md new file mode 100644 index 000000000..6f12af80c --- /dev/null +++ b/.changeset/brown-fans-push.md @@ -0,0 +1,5 @@ +--- +'@keystatic/core': patch +--- + +Add `treeFetchStrategy` to `createGitHubReader` diff --git a/packages/keystatic/src/reader/github.ts b/packages/keystatic/src/reader/github.ts index 38ae187da..dcde54ea1 100644 --- a/packages/keystatic/src/reader/github.ts +++ b/packages/keystatic/src/reader/github.ts @@ -24,27 +24,16 @@ export type Reader< }, > = BaseReader; -export function createGitHubReader< - Collections extends { - [key: string]: Collection, string>; - }, - Singletons extends { - [key: string]: Singleton>; - }, ->( - config: Config, - opts: { - repo: `${string}/${string}`; - pathPrefix?: string; - ref?: string; - token?: string; - } -): Reader { - const ref = opts.ref ?? 'HEAD'; - const pathPrefix = opts.pathPrefix ? fixPath(opts.pathPrefix) + '/' : ''; +function createMinimalFsForGitHubWithRecursiveTree(opts: { + token: string | undefined; + ref: string; + pathPrefix: string; + fetch: typeof globalThis.fetch; + repo: string; +}): MinimalFs { const getTree = cache(async function loadTree() { - const res = await fetch( - `https://api.github.com/repos/${opts.repo}/git/trees/${ref}?recursive=1`, + const res = await opts.fetch( + `https://api.github.com/repos/${opts.repo}/git/trees/${opts.ref}?recursive=1`, { headers: opts.token ? { Authorization: `Bearer ${opts.token}` } : {}, cache: 'no-store', @@ -58,15 +47,21 @@ export function createGitHubReader< const { tree, sha }: { tree: TreeEntry[]; sha: string } = await res.json(); return { tree: treeEntriesToTreeNodes(tree), sha }; }); - const fs: MinimalFs = { + return { async fileExists(path) { const { tree } = await getTree(); - const node = getTreeNodeAtPath(tree, fixPath(`${pathPrefix}${path}`)); + const node = getTreeNodeAtPath( + tree, + fixPath(`${opts.pathPrefix}${path}`) + ); return node?.entry.type === 'blob'; }, async readdir(path) { const { tree } = await getTree(); - const node = getTreeNodeAtPath(tree, fixPath(`${pathPrefix}${path}`)); + const node = getTreeNodeAtPath( + tree, + fixPath(`${opts.pathPrefix}${path}`) + ); if (!node?.children) return []; const filtered: { name: string; kind: 'file' | 'directory' }[] = []; for (const [name, val] of node.children) { @@ -81,8 +76,8 @@ export function createGitHubReader< }, async readFile(path) { const { sha } = await getTree(); - const res = await fetch( - `https://raw.githubusercontent.com/${opts.repo}/${sha}/${pathPrefix}${path}`, + const res = await opts.fetch( + `https://raw.githubusercontent.com/${opts.repo}/${sha}/${opts.pathPrefix}${path}`, { headers: opts.token ? { Authorization: `Bearer ${opts.token}` } : {} } ); if (res.status === 404) return null; @@ -92,6 +87,146 @@ export function createGitHubReader< return new Uint8Array(await res.arrayBuffer()); }, }; +} + +const lastPartOfPathRegex = /[^/](.+)$/; + +function toTreeNodes(entries: TreeEntry[]) { + const nodes = new Map(); + for (const entry of entries) { + const lastPart = entry.path.match(lastPartOfPathRegex)?.[1]; + if (!lastPart) continue; + nodes.set(lastPart, entry); + } + return nodes; +} + +function createMinimalFsForGitHubWithShallowTree(opts: { + token: string | undefined; + ref: string; + pathPrefix: string; + fetch: typeof globalThis.fetch; + repo: string; +}): MinimalFs { + const getRootTree = cache(async function loadTree() { + const res = await opts.fetch( + `https://api.github.com/repos/${opts.repo}/git/trees/${opts.ref}`, + { + headers: opts.token ? { Authorization: `Bearer ${opts.token}` } : {}, + cache: 'no-store', + } + ); + if (!res.ok) { + throw new Error( + `Failed to fetch tree: ${res.status} ${await res.text()}` + ); + } + const { tree, sha }: { tree: TreeEntry[]; sha: string } = await res.json(); + + return { tree: toTreeNodes(tree), sha }; + }); + const getChildTree = cache(async function loadChildTree(treeSha: string) { + const res = await opts.fetch( + `https://api.github.com/repos/${opts.repo}/git/trees/${treeSha}`, + { headers: opts.token ? { Authorization: `Bearer ${opts.token}` } : {} } + ); + if (!res.ok) { + throw new Error( + `Failed to fetch tree: ${res.status} ${await res.text()}` + ); + } + const { tree }: { tree: TreeEntry[] } = await res.json(); + return toTreeNodes(tree); + }); + + async function getTreeForPath(path: string[]) { + const { tree } = await getRootTree(); + let currentTree = tree; + for (const part of path) { + const node = currentTree.get(part); + if (node?.type !== 'tree') return undefined; + currentTree = await getChildTree(node.sha); + } + return currentTree; + } + return { + async fileExists(path) { + const fullPath = fixPath(`${opts.pathPrefix}${path}`).split('/'); + const tree = await getTreeForPath(fullPath.slice(0, -1)); + return tree?.get(fullPath[fullPath.length - 1])?.type === 'blob'; + }, + async readdir(path) { + const fullPath = fixPath(`${opts.pathPrefix}${path}`).split('/'); + const tree = await getTreeForPath(fullPath); + if (!tree) return []; + const filtered: { name: string; kind: 'file' | 'directory' }[] = []; + for (const [name, val] of tree) { + if (val.type === 'tree') { + filtered.push({ name, kind: 'directory' }); + } + if (val.type === 'blob') { + filtered.push({ name, kind: 'file' }); + } + } + return filtered; + }, + async readFile(path) { + const { sha } = await getRootTree(); + const res = await opts.fetch( + `https://raw.githubusercontent.com/${opts.repo}/${sha}/${opts.pathPrefix}${path}`, + { headers: opts.token ? { Authorization: `Bearer ${opts.token}` } : {} } + ); + if (res.status === 404) return null; + if (!res.ok) { + throw new Error(`Failed to fetch ${path}: ${await res.text()}`); + } + return new Uint8Array(await res.arrayBuffer()); + }, + }; +} + +export function createGitHubReader< + Collections extends { + [key: string]: Collection, string>; + }, + Singletons extends { + [key: string]: Singleton>; + }, +>( + config: Config, + opts: { + repo: `${string}/${string}`; + pathPrefix?: string; + ref?: string; + token?: string; + /** + * - `recursive` fetches the entire git tree at once, which is faster + * latency-wise but downloads more data and each tree can't be cached + * - `shallow` fetches each level of the tree as needed + * This will be worse latency-wise because there will be more + * round-trips to GitHub but less data will be downloaded + * and each tree can be cached separately + * + * @default 'recursive' + */ + treeFetchStrategy?: 'recursive' | 'shallow'; + fetch?: typeof globalThis.fetch; + } +): Reader { + const fetch = opts.fetch ?? globalThis.fetch; + const ref = opts.ref ?? 'HEAD'; + const pathPrefix = opts.pathPrefix ? fixPath(opts.pathPrefix) + '/' : ''; + const fs = ( + opts.treeFetchStrategy === 'shallow' + ? createMinimalFsForGitHubWithShallowTree + : createMinimalFsForGitHubWithRecursiveTree + )({ + pathPrefix, + ref, + token: opts.token, + fetch, + repo: opts.repo, + }); return { collections: Object.fromEntries( Object.keys(config.collections || {}).map(key => [