microbiomedata · aclum · Nov 22, 2024 · Nov 22, 2024 · Dec 4, 2024 · aclum
diff --git a/configs/import.yaml b/configs/import.yaml
@@ -85,6 +85,7 @@ Workflows:
       - Assembly Coverage Stats
       - Assembly AGP
       - Assembly Coverage BAM
+      - Error Corrected Reads
 
   - Name: Metagenome Annotation
     Import: false
@@ -479,6 +480,15 @@ Data Objects:
       output_of: nmdc:MetagenomeAssembly
       mulitple: false
       action: rename
+    - data_object_type: Error Corrected Reads
+      description: Error correctde reads for {id}
+      name: bbcms error corrected reads
+      import_suffix: input.corr.fastq.gz
+      nmdc_suffix: _input.corr.fastq.gz
+      input_to: []
+      output_of: nmdc:MetagenomeAssembly
+      mulitple: false
+      action: rename
     - data_object_type: GOTTCHA2 Report Full
       description: GOTTCHA2 Full Report for {id}
       name: GOTTCHA2 report file

diff --git a/nmdc_automation/config/workflows/workflows.yaml b/nmdc_automation/config/workflows/workflows.yaml
@@ -103,9 +103,9 @@ Workflows:
     - Reads QC Interleave
     Input_prefix: jgi_metaASM
     Inputs:
-      input_file: do:Filtered Sequencing Reads
-      rename_contig_prefix: "{workflow_execution_id}"
+      input_files: do:Filtered Sequencing Reads
       proj: "{workflow_execution_id}"
+      shortRead: false
     Workflow Execution:
       name: "Metagenome Assembly for {id}"
       type: nmdc:MetagenomeAssembly
@@ -135,30 +135,34 @@ Workflows:
       scaf_powsum: "{outputs.stats.scaf_powsum}"
       scaffolds: "{outputs.stats.scaffolds}"
     Outputs:
-      - output: contig
+      - output: sr_contig
         name: Final assembly contigs fasta
         data_object_type: Assembly Contigs
         description: "Assembly contigs for {id}"
-      - output: scaffold
+      - output: sr_scaffold
         name: Final assembly scaffolds fasta
         data_object_type: Assembly Scaffolds
         description: "Assembly scaffolds for {id}"
-      - output: covstats
+      - output: sr_covstats
         name: Assembled contigs coverage information
         data_object_type: Assembly Coverage Stats
         description: "Coverage Stats for {id}"
-      - output: agp
+      - output: sr_agp
         name: An AGP format file that describes the assembly
         data_object_type: Assembly AGP
         description: "AGP for {id}"
-      - output: bam
+      - output: sr_bam
         name: Sorted bam file of reads mapping back to the final assembly
         data_object_type: Assembly Coverage BAM
         description: "Sorted Bam for {id}"
-      - output: asminfo
+      - output: sr_asminfo
         name: File containing assembly info
         data_object_type: Assembly Info File
         description: "Assembly info for {id}"
+      - output: sr_bbcms_fq
+        name: bbcms error corrected reads
+        data_object_type: Error Corrected Reads
+        description: "Error corrected reads for {id}"
 
   - Name: Metagenome Annotation
     Type: nmdc:MetagenomeAnnotation

diff --git a/nmdc_automation/models/workflow.py b/nmdc_automation/models/workflow.py
@@ -101,13 +101,16 @@ class WorkflowConfig:
     # populated after initialization
     children: Set["WorkflowConfig"] = field(default_factory=set)
     parents: Set["WorkflowConfig"] = field(default_factory=set)
-    data_object_types: List[str] = field(default_factory=list)
+    input_data_object_types: List[str] = field(default_factory=list)
 
     def __post_init__(self):
-        """ Initialize the object """
+        """ Parse input data object types from the inputs """
         for _, inp_param in self.inputs.items():
+            # Some input params are boolean values, skip these
+            if isinstance(inp_param, bool):
+                continue
             if inp_param.startswith("do:"):
-                self.data_object_types.append(inp_param[3:])
+                self.input_data_object_types.append(inp_param[3:])
         if not self.type:
             # Infer the type from the name
             if self.collection == 'data_generation_set' and 'Sequencing' in self.name:

diff --git a/nmdc_automation/workflow_automation/sched.py b/nmdc_automation/workflow_automation/sched.py
@@ -129,7 +129,11 @@ def create_job_rec(self, job: SchedulerJob):
         inp = dict()
         optional_inputs = wf.optional_inputs
         for k, v in job.workflow.inputs.items():
-            if v.startswith("do:"):
+            # some inputs are booleans and should not be modified
+            if isinstance(v, bool):
+                inp[k] = v
+                continue
+            elif v.startswith("do:"):
                 do_type = v[3:]
                 dobj = do_by_type.get(do_type)
                 if not dobj:

diff --git a/nmdc_automation/workflow_automation/workflow_process.py b/nmdc_automation/workflow_automation/workflow_process.py
@@ -23,7 +23,7 @@ def get_required_data_objects_map(db, workflows: List[WorkflowConfig]) -> Dict[s
     # Build up a filter of what types are used
     required_types = set()
     for wf in workflows:
-        required_types.update(set(wf.data_object_types))
+        required_types.update(set(wf.input_data_object_types))
 
     required_data_objs_by_id = dict()
     for rec in db.data_object_set.find({"data_object_type": {"$ne": None}}):