diff --git a/configs/import.yaml b/configs/import.yaml index a418ca73..108aac88 100644 --- a/configs/import.yaml +++ b/configs/import.yaml @@ -85,6 +85,7 @@ Workflows: - Assembly Coverage Stats - Assembly AGP - Assembly Coverage BAM + - Error Corrected Reads - Name: Metagenome Annotation Import: false @@ -479,6 +480,15 @@ Data Objects: output_of: nmdc:MetagenomeAssembly mulitple: false action: rename + - data_object_type: Error Corrected Reads + description: Error correctde reads for {id} + name: bbcms error corrected reads + import_suffix: input.corr.fastq.gz + nmdc_suffix: _input.corr.fastq.gz + input_to: [] + output_of: nmdc:MetagenomeAssembly + mulitple: false + action: rename - data_object_type: GOTTCHA2 Report Full description: GOTTCHA2 Full Report for {id} name: GOTTCHA2 report file diff --git a/nmdc_automation/config/workflows/workflows.yaml b/nmdc_automation/config/workflows/workflows.yaml index 065abf63..23b1350e 100644 --- a/nmdc_automation/config/workflows/workflows.yaml +++ b/nmdc_automation/config/workflows/workflows.yaml @@ -103,9 +103,9 @@ Workflows: - Reads QC Interleave Input_prefix: jgi_metaASM Inputs: - input_file: do:Filtered Sequencing Reads - rename_contig_prefix: "{workflow_execution_id}" + input_files: do:Filtered Sequencing Reads proj: "{workflow_execution_id}" + shortRead: false Workflow Execution: name: "Metagenome Assembly for {id}" type: nmdc:MetagenomeAssembly @@ -135,30 +135,34 @@ Workflows: scaf_powsum: "{outputs.stats.scaf_powsum}" scaffolds: "{outputs.stats.scaffolds}" Outputs: - - output: contig + - output: sr_contig name: Final assembly contigs fasta data_object_type: Assembly Contigs description: "Assembly contigs for {id}" - - output: scaffold + - output: sr_scaffold name: Final assembly scaffolds fasta data_object_type: Assembly Scaffolds description: "Assembly scaffolds for {id}" - - output: covstats + - output: sr_covstats name: Assembled contigs coverage information data_object_type: Assembly Coverage Stats description: "Coverage Stats for {id}" - - output: agp + - output: sr_agp name: An AGP format file that describes the assembly data_object_type: Assembly AGP description: "AGP for {id}" - - output: bam + - output: sr_bam name: Sorted bam file of reads mapping back to the final assembly data_object_type: Assembly Coverage BAM description: "Sorted Bam for {id}" - - output: asminfo + - output: sr_asminfo name: File containing assembly info data_object_type: Assembly Info File description: "Assembly info for {id}" + - output: sr_bbcms_fq + name: bbcms error corrected reads + data_object_type: Error Corrected Reads + description: "Error corrected reads for {id}" - Name: Metagenome Annotation Type: nmdc:MetagenomeAnnotation diff --git a/nmdc_automation/models/workflow.py b/nmdc_automation/models/workflow.py index ed0b29f3..a225d72d 100644 --- a/nmdc_automation/models/workflow.py +++ b/nmdc_automation/models/workflow.py @@ -101,13 +101,16 @@ class WorkflowConfig: # populated after initialization children: Set["WorkflowConfig"] = field(default_factory=set) parents: Set["WorkflowConfig"] = field(default_factory=set) - data_object_types: List[str] = field(default_factory=list) + input_data_object_types: List[str] = field(default_factory=list) def __post_init__(self): - """ Initialize the object """ + """ Parse input data object types from the inputs """ for _, inp_param in self.inputs.items(): + # Some input params are boolean values, skip these + if isinstance(inp_param, bool): + continue if inp_param.startswith("do:"): - self.data_object_types.append(inp_param[3:]) + self.input_data_object_types.append(inp_param[3:]) if not self.type: # Infer the type from the name if self.collection == 'data_generation_set' and 'Sequencing' in self.name: diff --git a/nmdc_automation/workflow_automation/sched.py b/nmdc_automation/workflow_automation/sched.py index 8ddf92d5..1cbea8ec 100644 --- a/nmdc_automation/workflow_automation/sched.py +++ b/nmdc_automation/workflow_automation/sched.py @@ -129,7 +129,11 @@ def create_job_rec(self, job: SchedulerJob): inp = dict() optional_inputs = wf.optional_inputs for k, v in job.workflow.inputs.items(): - if v.startswith("do:"): + # some inputs are booleans and should not be modified + if isinstance(v, bool): + inp[k] = v + continue + elif v.startswith("do:"): do_type = v[3:] dobj = do_by_type.get(do_type) if not dobj: diff --git a/nmdc_automation/workflow_automation/workflow_process.py b/nmdc_automation/workflow_automation/workflow_process.py index f716526f..3a934f71 100644 --- a/nmdc_automation/workflow_automation/workflow_process.py +++ b/nmdc_automation/workflow_automation/workflow_process.py @@ -23,7 +23,7 @@ def get_required_data_objects_map(db, workflows: List[WorkflowConfig]) -> Dict[s # Build up a filter of what types are used required_types = set() for wf in workflows: - required_types.update(set(wf.data_object_types)) + required_types.update(set(wf.input_data_object_types)) required_data_objs_by_id = dict() for rec in db.data_object_set.find({"data_object_type": {"$ne": None}}):