Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements support for external package loading in validator. #3140

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ common --enable_platform_specific_config
build --verbose_failures
build --build_tag_filters=-off-by-default
test --test_tag_filters=-off-by-default
test:asan --test_tag_filters=-off-by-default,-no-asan
# exclude enormous tests by default
build --test_size_filters=-enormous

Expand Down
4 changes: 4 additions & 0 deletions src/cloudflare/internal/test/ai/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,8 @@ py_wd_test(
"*.js",
"*.py",
]),
tags = [
# TODO(someday): Fix asan failure for this, see https://github.com/cloudflare/workerd/pull/3140#discussion_r1858273318
"no-asan",
],
)
4 changes: 4 additions & 0 deletions src/cloudflare/internal/test/d1/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@ py_wd_test(
"*.py",
"*.js",
]),
tags = [
# TODO(someday): Fix asan failure for this, see https://github.com/cloudflare/workerd/pull/3140#discussion_r1858273318
"no-asan",
],
)
4 changes: 4 additions & 0 deletions src/cloudflare/internal/test/vectorize/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,8 @@ py_wd_test(
"*.py",
"*.js",
]),
tags = [
# TODO(someday): Fix asan failure for this, see https://github.com/cloudflare/workerd/pull/3140#discussion_r1858273318
"no-asan",
],
)
16 changes: 14 additions & 2 deletions src/pyodide/internal/python.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { enterJaegerSpan } from 'pyodide-internal:jaeger';
import {
SITE_PACKAGES,
TRANSITIVE_REQUIREMENTS,
adjustSysPath,
mountSitePackages,
mountWorkerFiles,
Expand Down Expand Up @@ -50,6 +51,7 @@ import {
setUnsafeEval,
setGetRandomValues,
} from 'pyodide-internal:generated/emscriptenSetup';
import { loadPackages } from 'pyodide-internal:loadPackage';

/**
* After running `instantiateEmscriptenModule` but before calling into any C
Expand All @@ -59,9 +61,8 @@ import {
*/
async function prepareWasmLinearMemory(Module: Module): Promise<void> {
// Note: if we are restoring from a snapshot, runtime is not initialized yet.
mountSitePackages(Module, SITE_PACKAGES.rootInfo);
entropyMountFiles(Module);
Module.noInitialRun = !SHOULD_RESTORE_SNAPSHOT;

enterJaegerSpan('preload_dynamic_libs', () => preloadDynamicLibs(Module));
enterJaegerSpan('remove_run_dependency', () =>
Module.removeRunDependency('dynlibs')
Expand Down Expand Up @@ -100,9 +101,19 @@ export async function loadPyodide(
}
setUnsafeEval(UnsafeEval);
setGetRandomValues(getRandomValues);

mountSitePackages(Module, SITE_PACKAGES.rootInfo);
entropyMountFiles(Module);
await enterJaegerSpan('load_packages', () =>
// NB. loadPackages adds the packages to the `SITE_PACKAGES` global which then gets used in
// preloadDynamicLibs.
loadPackages(Module, TRANSITIVE_REQUIREMENTS)
);

await enterJaegerSpan('prepare_wasm_linear_memory', () =>
prepareWasmLinearMemory(Module)
);

maybeSetupSnapshotUpload(Module);
// Mount worker files after doing snapshot upload so we ensure that data from the files is never
// present in snapshot memory.
Expand All @@ -111,6 +122,7 @@ export async function loadPyodide(
// Finish setting up Pyodide's ffi so we can use the nice Python interface
await enterJaegerSpan('finalize_bootstrap', Module.API.finalizeBootstrap);
const pyodide = Module.API.public_api;

finishSnapshotSetup(pyodide);
return pyodide;
}
7 changes: 6 additions & 1 deletion src/pyodide/internal/setupPackages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
LOAD_WHEELS_FROM_ARTIFACT_BUNDLER,
} from 'pyodide-internal:metadata';
import { simpleRunPython } from 'pyodide-internal:util';
import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader';

const canonicalizeNameRegex = /[-_.]+/g;

Expand Down Expand Up @@ -44,6 +45,7 @@ class SitePackagesDir {
path: '',
name: '',
parts: [],
reader: null,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about setting reader: EmbeddedPackagesTarReader and removing the node.reader ?? EmbeddedPackagesTarReader logic in snapshot.ts? Would that work?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, but doesn't matter much. I plan to get rid of EmbeddedPackagesTarReader in a follow up PR.

};
this.soFiles = [];
this.loadedRequirements = new Set();
Expand Down Expand Up @@ -125,9 +127,11 @@ class SitePackagesDir {
*
* This also returns the list of soFiles in the resulting site-packages
* directory so we can preload them.
*
* TODO(later): This needs to be removed when external package loading is enabled.
*/
export function buildSitePackages(requirements: Set<string>): SitePackagesDir {
const [bigTarInfo, bigTarSoFiles] = parseTarInfo();
const [bigTarInfo, bigTarSoFiles] = parseTarInfo(EmbeddedPackagesTarReader);

let requirementsInBigBundle = new Set([...STDLIB_PACKAGES]);

Expand Down Expand Up @@ -171,6 +175,7 @@ function disabledLoadPackage(): never {
function getTransitiveRequirements(): Set<string> {
const requirements = REQUIREMENTS.map(canonicalizePackageName);
// resolve transitive dependencies of requirements and if IN_WORKERD install them from the cdn.
// TODO(later): use current package's LOCKFILE instead of the global.
const packageDatas = recursiveDependencies(LOCKFILE, requirements);
return new Set(packageDatas.map(({ name }) => canonicalizePackageName(name)));
}
Expand Down
7 changes: 5 additions & 2 deletions src/pyodide/internal/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
SITE_PACKAGES,
getSitePackagesPath,
} from 'pyodide-internal:setupPackages';
import { default as TarReader } from 'pyodide-internal:packages_tar_reader';
import { default as EmbeddedPackagesTarReader } from 'pyodide-internal:packages_tar_reader';
import {
SHOULD_SNAPSHOT_TO_DISK,
IS_CREATING_BASELINE_SNAPSHOT,
Expand Down Expand Up @@ -136,7 +136,10 @@ export function preloadDynamicLibs(Module: Module): void {
throw Error('contentsOffset not defined for ' + soFile);
}
const wasmModuleData = new Uint8Array(size);
TarReader.read(contentsOffset, wasmModuleData);
(node.reader ?? EmbeddedPackagesTarReader).read(
contentsOffset,
wasmModuleData
);
const path = sitePackages + '/' + soFile.join('/');
PRELOADED_SO_FILES.push(path);
loadDynlib(Module, path, wasmModuleData);
Expand Down
4 changes: 1 addition & 3 deletions src/pyodide/internal/tar.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import { default as TarReader } from 'pyodide-internal:packages_tar_reader';

// This is based on the info about the tar file format on wikipedia
// And some trial and error with real tar files.
// https://en.wikipedia.org/wiki/Tar_(computing)#File_format
Expand Down Expand Up @@ -44,7 +42,7 @@ function decodeHeader(buf: Uint8Array, reader: Reader): TarFSInfo {
};
}

export function parseTarInfo(reader = TarReader): [TarFSInfo, string[]] {
export function parseTarInfo(reader: Reader): [TarFSInfo, string[]] {
const directories: TarFSInfo[] = [];
const soFiles = [];
const root: TarFSInfo = {
Expand Down
31 changes: 4 additions & 27 deletions src/pyodide/python-entrypoint-helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import {
import { reportError } from 'pyodide-internal:util';
import { default as Limiter } from 'pyodide-internal:limiter';
import { entropyBeforeRequest } from 'pyodide-internal:topLevelEntropy/lib';
import { loadPackages } from 'pyodide-internal:loadPackage';

function pyimportMainModule(pyodide: Pyodide): PyModule {
if (!MAIN_MODULE_NAME.endsWith('.py')) {
Expand Down Expand Up @@ -74,21 +73,10 @@ async function applyPatch(pyodide: Pyodide, patchName: string): Promise<void> {
pyodide.pyimport(patchName + '_patch');
}

/**
* Set up Python packages:
* - patch loadPackage to ignore integrity
* - get requirements
* - Use tar file + requirements to mount site packages directory
* - if in workerd use loadPackage to load packages
* - install patches to make various requests packages work
*
* TODO: move this into setupPackages.js. Can't now because the patch imports
* fail from there for some reason.
*/
export async function setupPackages(pyodide: Pyodide): Promise<void> {
return await enterJaegerSpan('setup_packages', async () => {
async function setupPatches(pyodide: Pyodide): Promise<void> {
return await enterJaegerSpan('setup_patches', async () => {
patchLoadPackage(pyodide);
await loadPackages(pyodide._module, TRANSITIVE_REQUIREMENTS);

// install any extra packages into the site-packages directory, so calculate where that is.
const pymajor = pyodide._module._py_version_major();
const pyminor = pyodide._module._py_version_minor();
Expand Down Expand Up @@ -119,7 +107,7 @@ function getMainModule(): Promise<PyModule> {
}
mainModulePromise = (async function () {
const pyodide = await getPyodide();
await setupPackages(pyodide);
await setupPatches(pyodide);
Limiter.beginStartup();
try {
return enterJaegerSpan('pyimport_main_module', () =>
Expand Down Expand Up @@ -173,17 +161,6 @@ const handlers: {
try {
// Do not setup anything to do with Python in the global scope when tracing. The Jaeger tracing
// needs to be called inside an IO context.
if (!IS_TRACING) {
if (IS_WORKERD) {
// If we're in workerd, we have to do setupPackages in the IoContext, so don't start it yet.
// TODO: fix this.
await getPyodide();
} else {
// If we're not in workerd, setupPackages doesn't require IO so we can do it all here.
await getMainModule();
}
}

if (IS_WORKERD || IS_TRACING) {
handlers.fetch = makeHandler('on_fetch');
handlers.test = makeHandler('test');
Expand Down
2 changes: 1 addition & 1 deletion src/pyodide/types/FS.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ interface TarFSInfo {
name: string;
parts: string[];
contentsOffset?: number;
reader?: Reader;
reader: Reader | null;
}

declare type MetadataDirInfo = Map<string, MetadataDirInfo>;
Expand Down
5 changes: 2 additions & 3 deletions src/workerd/api/pyodide/pyodide.c++
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,9 @@ void PyodideBundleManager::setPyodideBundleData(
kj::mv(version), {.messageReader = kj::mv(messageReader), .bundle = bundle});
}

const kj::Maybe<kj::ArrayPtr<const unsigned char>> PyodidePackageManager::getPyodidePackage(
const kj::Maybe<const kj::Array<unsigned char>&> PyodidePackageManager::getPyodidePackage(
dom96 marked this conversation as resolved.
Show resolved Hide resolved
kj::StringPtr id) const {
return packages.lockShared()->find(id).map(
[](const kj::Array<unsigned char>& t) { return t.asPtr(); });
return packages.lockShared()->find(id);
}

void PyodidePackageManager::setPyodidePackageData(
Expand Down
2 changes: 1 addition & 1 deletion src/workerd/api/pyodide/pyodide.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class PyodideBundleManager {
class PyodidePackageManager {
public:
void setPyodidePackageData(kj::String id, kj::Array<unsigned char> data) const;
const kj::Maybe<kj::ArrayPtr<const unsigned char>> getPyodidePackage(kj::StringPtr id) const;
const kj::Maybe<const kj::Array<unsigned char>&> getPyodidePackage(kj::StringPtr id) const;

private:
const kj::MutexGuarded<kj::HashMap<kj::String, kj::Array<unsigned char>>> packages;
Expand Down
1 change: 1 addition & 0 deletions src/workerd/server/workerd-api.c++
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ void WorkerdApi::compileModules(jsg::Lock& lockParam,
makePyodideMetadataReader(conf, impl->pythonConfig), jsg::ModuleRegistry::Type::INTERNAL);

// Inject packages tar file
// TODO(later): This shouldn't exist once featureFlags.getPythonExternalPackages() is true.
modules->addBuiltinModule("pyodide-internal:packages_tar_reader",
jsg::alloc<ReadOnlyBuffer>(PYODIDE_PACKAGES_TAR.get()),
workerd::jsg::ModuleRegistry::Type::INTERNAL);
Expand Down