Skip to content

Commit

Permalink
Fix 2 file staging unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mbthornton-lbl committed Dec 5, 2024
1 parent 4484ed0 commit b8e58ad
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 43 deletions.
10 changes: 5 additions & 5 deletions nmdc_automation/jgi_file_staging/models.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
from pydantic import BaseModel
from datetime import datetime
from typing import Optional
from typing import Optional, List


class Sample(BaseModel):
project: str
apGoldId: str
studyId: str
itsApId: str
projects: str
biosample_id: str
seq_id: str
file_name: str
file_status: str
file_size: int
jdp_file_id: str
md5sum: Optional[str]
md5sum: Optional[str] = None
analysis_project_id: str
create_date: datetime = datetime.now()
update_date: Optional[datetime]
request_id: Optional[str]
update_date: Optional[datetime] = None
request_id: Optional[str] = None
3 changes: 2 additions & 1 deletion tests/test_jgi_file_staging/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def grow_analysis_df():
"apGoldId",
"studyId",
"itsApId",
"projects",
"biosample_id",
"seq_id",
"file_name",
Expand All @@ -45,5 +46,5 @@ def grow_analysis_df():
"analysis_project_id",
]
]
grow_analysis_df["project"] = "test_project"
# grow_analysis_df["project"] = "test_project"
return grow_analysis_df
20 changes: 10 additions & 10 deletions tests/test_jgi_file_staging/fixtures/grow_analysis_projects.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
apGoldId,studyId,itsApId,projects,biosample_id,seq_id,file_name,file_status,file_size,jdp_file_id,md5sum,analysis_project_id
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,52614.1.394702.GCACTAAC-CCAAGACT.filtered-report.txt,RESTORED,3645,6190d7d30de2fc3298da6f7a,fcd87248b5922a8bd0d530bcb23bffae,1323348
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,52614.1.394702.GCACTAAC-CCAAGACT.filter_cmd-METAGENOME.sh,RESTORED,6151,6190d7d30de2fc3298da6f7c,892bfb0ad0f44ce133530e07d24ab37f,1323348
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,Ga0499978_imgap.info,RESTORED,411,61a9d6ee8277d7ede604d0f6,852f507c44a0743e08cc3cc0de9575d2,1323348
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,Ga0499978_proteins.supfam.domtblout,RESTORED,7424934505,61a9d6ef8277d7ede604d105,c09cc12998669c5d4ec3973ff4d27580,1323348
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,Ga0499978_ko.tsv,RESTORED,44433089,61a9d6ef8277d7ede604d0f8,0ad4c6ca9deab065699e6f431d939cdf,1323348
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,Ga0499978_proteins.faa,RESTORED,417512094,61a9d6ef8277d7ede604d101,bd36edd00d12188b1d45a9b1c942bbb4,1323348
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,pairedMapped_sorted.bam.cov,RESTORED,80122313,61a9d6f18277d7ede604d116,30c9b60aab947deba0ffaa6e21755964,1323348
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,Table_8_-_3300049478.taxonomic_composition.txt,RESTORED,16996,61a9d6ed8277d7ede604d0e4,ee3efca7c81b6dbf837be5e91c5bbc78,1323348
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,Ga0499978_annotation_config.yaml,RESTORED,3015,61a9d6ee8277d7ede604d0e5,a7950724aa003bea5f23f2edbd24c596,1323348
Ga0499978,Gs0149396,1323348,['Gp0587070'],Gb0305643,1323445,rqc-stats.pdf,RESTORED,290034,619d6f9850d56abc0a99a4f4,100954063b2bd8a0bafb2488f6f07bdd,1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,52614.1.394702.GCACTAAC-CCAAGACT.filtered-report.txt,RESTORED,3645,6190d7d30de2fc3298da6f7a,fcd87248b5922a8bd0d530bcb23bffae,p1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,52614.1.394702.GCACTAAC-CCAAGACT.filter_cmd-METAGENOME.sh,RESTORED,6151,6190d7d30de2fc3298da6f7c,892bfb0ad0f44ce133530e07d24ab37f,p1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,Ga0499978_imgap.info,RESTORED,411,61a9d6ee8277d7ede604d0f6,852f507c44a0743e08cc3cc0de9575d2,p1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,Ga0499978_proteins.supfam.domtblout,RESTORED,7424934505,61a9d6ef8277d7ede604d105,c09cc12998669c5d4ec3973ff4d27580,p1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,Ga0499978_ko.tsv,RESTORED,44433089,61a9d6ef8277d7ede604d0f8,0ad4c6ca9deab065699e6f431d939cdf,p1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,Ga0499978_proteins.faa,RESTORED,417512094,61a9d6ef8277d7ede604d101,bd36edd00d12188b1d45a9b1c942bbb4,p1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,pairedMapped_sorted.bam.cov,RESTORED,80122313,61a9d6f18277d7ede604d116,30c9b60aab947deba0ffaa6e21755964,p1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,Table_8_-_3300049478.taxonomic_composition.txt,RESTORED,16996,61a9d6ed8277d7ede604d0e4,ee3efca7c81b6dbf837be5e91c5bbc78,p1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,Ga0499978_annotation_config.yaml,RESTORED,3015,61a9d6ee8277d7ede604d0e5,a7950724aa003bea5f23f2edbd24c596,p1323348
Ga0499978,Gs0149396,a1323348,['Gp0587070'],Gb0305643,s1323445,rqc-stats.pdf,RESTORED,290034,619d6f9850d56abc0a99a4f4,100954063b2bd8a0bafb2488f6f07bdd,p1323348
59 changes: 32 additions & 27 deletions tests/test_jgi_file_staging/test_file_metadata.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import mongomock
import pandas as pd
from pathlib import Path
import pytest


from nmdc_automation.jgi_file_staging.jgi_file_metadata import (
get_access_token,
get_mongo_db,
check_access_token,
get_sequence_id,
get_analysis_projects_from_proposal_id,
insert_samples_into_mongodb,
)
from nmdc_automation.jgi_file_staging.models import Sample


FIXTURE_DIR = Path(__file__).parent / "fixtures"
Expand Down Expand Up @@ -84,34 +89,34 @@ def test_get_analysis_projects_from_proposal_id(mock_get):
"projects": "['Gp0503551']",
}

# TODO: fix this test. Data fixtures raise ValidationError from Sample model
# def test_sample_model_instance_creation(monkeypatch, grow_analysis_df):
# sample_dict = grow_analysis_df.to_dict("records")[0]
# sample_model = Sample(**sample_dict)
# assert sample_model.apGoldId == "Ga0499978"
# assert sample_model.studyId == "Gs0149396"
# assert sample_model.itsApId == 1323348
# assert sample_model.projects == "['Gp0587070']"
# assert sample_model.biosample_id == "Ga0499978"
# assert sample_model.seq_id == "Ga0499978"
# assert sample_model.file_name == "Ga0499978.fna.gz"
# assert sample_model.file_status == "uploaded"
# assert sample_model.file_size == 1000
# assert sample_model.jdp_file_id == 123456
# assert sample_model.md5sum == "1234567890abcdef"
# assert sample_model.analysis_project_id == 123456
# assert sample_model.project == "test_project"

def test_sample_model_instance_creation(monkeypatch, grow_analysis_df):
sample_dict = grow_analysis_df.to_dict("records")[0]
sample_model = Sample(**sample_dict)
assert sample_model.apGoldId == "Ga0499978"
assert sample_model.studyId == "Gs0149396"
assert sample_model.itsApId == "a1323348"
assert sample_model.projects == "['Gp0587070']"
assert sample_model.biosample_id == "Gb0305643"
assert sample_model.seq_id == "s1323445"
assert sample_model.file_name == "52614.1.394702.GCACTAAC-CCAAGACT.filtered-report.txt"
assert sample_model.file_status == "RESTORED"
assert sample_model.file_size == 3645
assert sample_model.jdp_file_id == "6190d7d30de2fc3298da6f7a"
assert sample_model.md5sum == "fcd87248b5922a8bd0d530bcb23bffae"
assert sample_model.analysis_project_id == "p1323348"



# TODO: fix this test. Data fixtures are raising ValidationError from
# the pydantic Sample model
# @mongomock.patch(servers=(("localhost", 27017),), on_new="create")
# def test_insert_samples_into_mongodb(monkeypatch, grow_analysis_df):
# monkeypatch.setenv("MONGO_DBNAME", "test_db")
# client = get_mongo_db()
# mdb = client["test_db"]
#
# insert_samples_into_mongodb(grow_analysis_df.to_dict("records"), mdb)
# mdb = get_mongo_db()
# sample = mdb.samples.find_one({"apGoldId": "Ga0499978"})
# assert sample["studyId"] == "Gs0149396"
@mongomock.patch(servers=(("localhost", 27017),), on_new="create")
def test_insert_samples_into_mongodb(monkeypatch, grow_analysis_df):
monkeypatch.setenv("MONGO_DBNAME", "test_db")
client = get_mongo_db()
mdb = client["test_db"]

insert_samples_into_mongodb(grow_analysis_df.to_dict("records"))
mdb = get_mongo_db()
sample = mdb.samples.find_one({"apGoldId": "Ga0499978"})
assert sample["studyId"] == "Gs0149396"

0 comments on commit b8e58ad

Please sign in to comment.