Large Model Unit Tests on Marqo tag a5e265f6457a78fafc83c98d3390f561eb333d25 and Marqo-base tag 1435198e41c67c620848f0f40fc6b794bd88526a #938
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: largemodel_unit_test_CI | |
run-name: Large Model Unit Tests on Marqo tag ${{ inputs.marqo_ref }} and Marqo-base tag ${{ inputs.marqo_base_ref }} | |
# runs unit tests on CUDA machine with large model tests. | |
on: | |
workflow_call: | |
workflow_dispatch: | |
inputs: | |
marqo_ref: | |
type: string | |
required: true | |
description: > | |
Marqo branch, tag, or commit SHA to checkout | |
marqo_base_ref: | |
type: string | |
required: true | |
description: > | |
Base (Marqo-base) branch, tag, or commit SHA to checkout | |
push: | |
branches: | |
- mainline | |
- releases/* | |
paths-ignore: | |
- '**.md' | |
pull_request_target: | |
branches: | |
- mainline | |
- releases/* | |
paths-ignore: | |
- '**.md' | |
concurrency: | |
group: large-model-unit-tests-${{ github.head_ref || github.ref }} | |
cancel-in-progress: true | |
permissions: | |
contents: read | |
jobs: | |
Start-Runner: | |
name: Start self-hosted EC2 runner | |
runs-on: ubuntu-latest | |
outputs: | |
label: ${{ steps.start-ec2-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-1 | |
- name: Start EC2 runner | |
id: start-ec2-runner | |
uses: machulav/ec2-github-runner@v2 | |
with: | |
mode: start | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
ec2-image-id: ${{ vars.MARQO_CUDA_TESTS_INSTANCE_AMI }} | |
ec2-instance-type: g4dn.2xlarge | |
subnet-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SUBNET_ID }} | |
security-group-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SECURITY_GROUP_ID }} | |
aws-resource-tags: > # optional, requires additional permissions | |
[ | |
{"Key": "Name", "Value": "marqo-github-runner-${{ github.run_id }}"}, | |
{"Key": "GitHubRepo", "Value": "${{ github.repository }}"}, | |
{"Key": "WorkflowName", "Value": "${{ github.workflow }}"}, | |
{"Key": "WorkflowRunId", "Value": "${{ github.run_id }}"}, | |
{"Key": "WorlflowURL", "Value": "${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}"}, | |
{"Key": "PoloRole", "Value": "testing"} | |
] | |
Test-Marqo: | |
name: Run Large Model Unit Tests | |
needs: Start-Runner # required to start the main job when the runner is ready | |
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner | |
environment: marqo-test-suite | |
steps: | |
- name: Checkout marqo repo | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
path: marqo | |
ref: ${{ github.event.inputs.marqo_ref }} | |
- name: Set up Python 3.9 | |
uses: actions/setup-python@v3 | |
with: | |
python-version: "3.9" | |
cache: "pip" | |
- name: Checkout marqo-base for requirements | |
uses: actions/checkout@v3 | |
with: | |
repository: marqo-ai/marqo-base | |
path: marqo-base | |
ref: ${{ github.event.inputs.marqo_base_ref }} | |
- name: Install dependencies | |
run: | | |
pip install -r marqo-base/requirements/amd64-gpu-requirements.txt | |
# override base requirements with marqo requirements, if needed: | |
pip install -r marqo/requirements.dev.txt | |
pip install pytest==7.4.0 | |
- name: Build Vespa | |
run: | | |
systemctl stop unattended-upgrades | |
apt-get remove -y unattended-upgrades | |
echo "Updating package list" | |
apt-get update -y | |
# Build Vespa components | |
echo "Installing jdk 17" | |
sudo apt-get install openjdk-17-jdk -y | |
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 | |
export PATH=$JAVA_HOME/bin:$PATH | |
echo "Installing maven" | |
sudo apt-get install maven -y | |
echo "Building Vespa components" | |
cd marqo/vespa | |
mvn clean package | |
- name: Start Vespa | |
run: | | |
# Define these for checking if Vespa is ready | |
export VESPA_CONFIG_URL=http://localhost:19071 | |
export VESPA_DOCUMENT_URL=http://localhost:8080 | |
export VESPA_QUERY_URL=http://localhost:8080 | |
cd marqo/scripts/vespa_local | |
set -x | |
python vespa_local.py start | |
set +x | |
echo "Waiting for Vespa to start" | |
for i in {1..20}; do | |
echo -ne "Waiting... $i seconds\r" | |
sleep 1 | |
done | |
echo -e "\nDone waiting." | |
# Zip up schemas and services | |
sudo apt-get install zip -y | |
zip -r vespa_tester_app.zip services.xml schemas | |
# Deploy application with test schema | |
curl --header "Content-Type:application/zip" --data-binary @vespa_tester_app.zip http://localhost:19071/application/v2/tenant/default/prepareandactivate | |
# wait for vespa to start (document url): | |
timeout 10m bash -c 'until curl -f -X GET $VESPA_DOCUMENT_URL >/dev/null 2>&1; do echo " Waiting for Vespa document API to be available..."; sleep 10; done;' || \ | |
(echo "Vespa (Document URL) did not start in time" && exit 1) | |
echo "Vespa document API is available. Local Vespa setup complete." | |
# Delete the zip file | |
rm vespa_tester_app.zip | |
echo "Deleted vespa_tester_app.zip" | |
- name: Run Large Model Unit Tests | |
run: | | |
# Define these for use by marqo | |
export VESPA_CONFIG_URL=http://localhost:19071 | |
export VESPA_DOCUMENT_URL=http://localhost:8080 | |
export VESPA_QUERY_URL=http://localhost:8080 | |
export MARQO_MAX_CPU_MODEL_MEMORY=15 | |
export MARQO_MAX_CUDA_MODEL_MEMORY=15 | |
export PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID=${{ secrets.PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID }} | |
export PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY=${{ secrets.PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY }} | |
export PRIVATE_MODEL_TESTS_HF_TOKEN=${{ secrets.PRIVATE_MODEL_TESTS_HF_TOKEN }} | |
export PYTHONPATH="./marqo/tests:./marqo/src:./marqo" | |
pytest marqo/tests/s2_inference/test_large_model_encoding.py::TestLargeClipModels::test_vectorize --largemodel --ignore=marqo/tests/test_documentation.py | |
Stop-Runner: | |
name: Stop self-hosted EC2 runner | |
needs: | |
- Start-Runner # required to get output from the start-runner job | |
- Test-Marqo # required to wait when the main job is done | |
runs-on: ubuntu-latest | |
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-1 | |
- name: Stop EC2 runner | |
uses: machulav/ec2-github-runner@v2 | |
with: | |
mode: stop | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
label: ${{ needs.start-runner.outputs.label }} | |
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |