Skip to content

Commit

Permalink
Implements support for external package loading in validator.
Browse files Browse the repository at this point in the history
  • Loading branch information
dom96 committed Nov 25, 2024
1 parent 7efa08a commit 3a04883
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 35 deletions.
10 changes: 10 additions & 0 deletions src/pyodide/internal/python.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { enterJaegerSpan } from 'pyodide-internal:jaeger';
import {
SITE_PACKAGES,
TRANSITIVE_REQUIREMENTS,
adjustSysPath,
mountSitePackages,
mountWorkerFiles,
Expand Down Expand Up @@ -50,6 +51,7 @@ import {
setUnsafeEval,
setGetRandomValues,
} from 'pyodide-internal:generated/emscriptenSetup';
import { loadPackages } from 'pyodide-internal:loadPackage';

/**
* After running `instantiateEmscriptenModule` but before calling into any C
Expand All @@ -62,6 +64,11 @@ async function prepareWasmLinearMemory(Module: Module): Promise<void> {
mountSitePackages(Module, SITE_PACKAGES.rootInfo);
entropyMountFiles(Module);
Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT;

// NB. loadPackages adds the packages to the `SITE_PACKAGES` global which then gets used in
// preloadDynamicLibs.
await loadPackages(Module, TRANSITIVE_REQUIREMENTS);

enterJaegerSpan('preload_dynamic_libs', () => preloadDynamicLibs(Module));
enterJaegerSpan('remove_run_dependency', () =>
Module.removeRunDependency('dynlibs')
Expand Down Expand Up @@ -100,9 +107,11 @@ export async function loadPyodide(
}
setUnsafeEval(UnsafeEval);
setGetRandomValues(getRandomValues);

await enterJaegerSpan('prepare_wasm_linear_memory', () =>
prepareWasmLinearMemory(Module)
);

maybeSetupSnapshotUpload(Module);
// Mount worker files after doing snapshot upload so we ensure that data from the files is never
// present in snapshot memory.
Expand All @@ -111,6 +120,7 @@ export async function loadPyodide(
// Finish setting up Pyodide's ffi so we can use the nice Python interface
await enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap);
const pyodide = Module.API.public_api;

finishSnapshotSetup(pyodide);
return pyodide;
}
7 changes: 6 additions & 1 deletion src/pyodide/internal/setupPackages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
LOAD_WHEELS_FROM_ARTIFACT_BUNDLER,
} from 'pyodide-internal:metadata';
import { simpleRunPython } from 'pyodide-internal:util';
import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader';

const canonicalizeNameRegex = /[-_.]+/g;

Expand Down Expand Up @@ -44,6 +45,7 @@ class SitePackagesDir {
path: '',
name: '',
parts: [],
reader: null,
};
this.soFiles = [];
this.loadedRequirements = new Set();
Expand Down Expand Up @@ -125,9 +127,11 @@ class SitePackagesDir {
*
* This also returns the list of soFiles in the resulting site-packages
* directory so we can preload them.
*
* TODO(later): This needs to be removed when external package loading is enabled.
*/
export function buildSitePackages(requirements: Set<string>): SitePackagesDir {
const [bigTarInfo, bigTarSoFiles] = parseTarInfo();
const [bigTarInfo, bigTarSoFiles] = parseTarInfo(EmbeddedPackagesTarReader);

let requirementsInBigBundle = new Set([...STDLIB_PACKAGES]);

Expand Down Expand Up @@ -171,6 +175,7 @@ function disabledLoadPackage(): never {
function getTransitiveRequirements(): Set<string> {
const requirements = REQUIREMENTS.map(canonicalizePackageName);
// resolve transitive dependencies of requirements and if IN_WORKERD install them from the cdn.
// TODO(later): use current package's LOCKFILE instead of the global.
const packageDatas = recursiveDependencies(LOCKFILE, requirements);
return new Set(packageDatas.map(({ name }) => canonicalizePackageName(name)));
}
Expand Down
7 changes: 5 additions & 2 deletions src/pyodide/internal/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
SITE_PACKAGES,
getSitePackagesPath,
} from 'pyodide-internal:setupPackages';
import { default as TarReader } from 'pyodide-internal:packages_tar_reader';
import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader';
import {
SHOULD_SNAPSHOT_TO_DISK,
IS_CREATING_BASELINE_SNAPSHOT,
Expand Down Expand Up @@ -136,7 +136,10 @@ export function preloadDynamicLibs(Module: Module): void {
throw Error('contentsOffset not defined for ' + soFile);
}
const wasmModuleData = new Uint8Array(size);
TarReader.read(contentsOffset, wasmModuleData);
(node.reader ?? EmbeddedPackagesTarReader).read(
contentsOffset,
wasmModuleData
);
const path = sitePackages + '/' + soFile.join('/');
PRELOADED_SO_FILES.push(path);
loadDynlib(Module, path, wasmModuleData);
Expand Down
4 changes: 1 addition & 3 deletions src/pyodide/internal/tar.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { default as TarReader } from 'pyodide-internal:packages_tar_reader';

// This is based on the info about the tar file format on wikipedia
// And some trial and error with real tar files.
// https://en.wikipedia.org/wiki/Tar_(computing)#File_format
Expand Down Expand Up @@ -44,7 +42,7 @@ function decodeHeader(buf: Uint8Array, reader: Reader): TarFSInfo {
};
}

export function parseTarInfo(reader = TarReader): [TarFSInfo, string[]] {
export function parseTarInfo(reader: Reader): [TarFSInfo, string[]] {
const directories: TarFSInfo[] = [];
const soFiles = [];
const root: TarFSInfo = {
Expand Down
32 changes: 4 additions & 28 deletions src/pyodide/python-entrypoint-helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import {
import { reportError } from 'pyodide-internal:util';
import { default as Limiter } from 'pyodide-internal:limiter';
import { entropyBeforeRequest } from 'pyodide-internal:topLevelEntropy/lib';
import { loadPackages } from 'pyodide-internal:loadPackage';

function pyimportMainModule(pyodide: Pyodide): PyModule {
if (!MAIN_MODULE_NAME.endsWith('.py')) {
Expand Down Expand Up @@ -74,21 +73,8 @@ async function applyPatch(pyodide: Pyodide, patchName: string): Promise<void> {
pyodide.pyimport(patchName + '_patch');
}

/**
* Set up Python packages:
* - patch loadPackage to ignore integrity
* - get requirements
* - Use tar file + requirements to mount site packages directory
* - if in workerd use loadPackage to load packages
* - install patches to make various requests packages work
*
* TODO: move this into setupPackages.js. Can't now because the patch imports
* fail from there for some reason.
*/
export async function setupPackages(pyodide: Pyodide): Promise<void> {
return await enterJaegerSpan('setup_packages', async () => {
patchLoadPackage(pyodide);
await loadPackages(pyodide._module, TRANSITIVE_REQUIREMENTS);
async function setupPatches(pyodide: Pyodide): Promise<void> {
return await enterJaegerSpan('setup_patches', async () => {
// install any extra packages into the site-packages directory, so calculate where that is.
const pymajor = pyodide._module._py_version_major();
const pyminor = pyodide._module._py_version_minor();
Expand Down Expand Up @@ -119,7 +105,8 @@ function getMainModule(): Promise<PyModule> {
}
mainModulePromise = (async function () {
const pyodide = await getPyodide();
await setupPackages(pyodide);
patchLoadPackage(pyodide);
await setupPatches(pyodide);
Limiter.beginStartup();
try {
return enterJaegerSpan('pyimport_main_module', () =>
Expand Down Expand Up @@ -173,17 +160,6 @@ const handlers: {
try {
// Do not setup anything to do with Python in the global scope when tracing. The Jaeger tracing
// needs to be called inside an IO context.
if (!IS_TRACING) {
if (IS_WORKERD) {
// If we're in workerd, we have to do setupPackages in the IoContext, so don't start it yet.
// TODO: fix this.
await getPyodide();
} else {
// If we're not in workerd, setupPackages doesn't require IO so we can do it all here.
await getMainModule();
}
}

if (IS_WORKERD || IS_TRACING) {
handlers.fetch = makeHandler('on_fetch');
handlers.test = makeHandler('test');
Expand Down
2 changes: 1 addition & 1 deletion src/pyodide/types/FS.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ interface TarFSInfo {
name: string;
parts: string[];
contentsOffset?: number;
reader?: Reader;
reader: Reader | null;
}

declare type MetadataDirInfo = Map<string, MetadataDirInfo>;
Expand Down
1 change: 1 addition & 0 deletions src/workerd/server/workerd-api.c++
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ void WorkerdApi::compileModules(jsg::Lock& lockParam,
makePyodideMetadataReader(conf, impl->pythonConfig), jsg::ModuleRegistry::Type::INTERNAL);

// Inject packages tar file
// TODO(later): This shouldn't exist once featureFlags.getPythonExternalPackages() is true.
modules->addBuiltinModule("pyodide-internal:packages_tar_reader",
jsg::alloc<ReadOnlyBuffer>(PYODIDE_PACKAGES_TAR.get()),
workerd::jsg::ModuleRegistry::Type::INTERNAL);
Expand Down

0 comments on commit 3a04883

Please sign in to comment.