Skip to content

Commit

Permalink
Merge pull request #23 from microbiomedata/22-review-mag-output-files
Browse files Browse the repository at this point in the history
22 review mag output files
  • Loading branch information
Michal Babinski authored Feb 23, 2024
2 parents 13637c8 + ef19330 commit ad3adc9
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 216 deletions.
4 changes: 2 additions & 2 deletions Docker/create_tarfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,12 @@ def create_new_zip(bin_dirs):
else:
input_files.append(file)
for bin_data in data['mags_list']:
if bin_data['bin_quality'] in ['MQ', 'HQ']:
if bin_data['bin_quality'] in ['MQ', 'HQ', 'LQ']:
print(f"Processing {bin_data['bin_name']}")
bin_id = bin_data['bin_name']
contig_ids = bin_data['members_id']
bin_file = bin_files_dict[bin_id]
output_dir = f"{prefix}_{bin_id}"
output_dir = f"{prefix}_{bin_id}_{bin_data['bin_quality']}"
bin_dirs.append(output_dir)
get_bin_annotations(prefix, bin_id, bin_file, input_files, contig_ids, output_dir)

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ The final [MiMAG](https://www.nature.com/articles/nbt.3893#Tab1) output includes
```
|-- project_name_mags_stats.json
|-- project_name_hqmq_bin.zip
|-- project_name_lq_bin.zip
|-- project_name_bin.info
|-- project_name_bins.lowDepth.fa
|-- project_name_bins.tooShort.fa
Expand Down
40 changes: 19 additions & 21 deletions automation_inputs.json
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
{
"nmdc_mags.contig_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/assembly/nmdc_mga0h53q43_contigs.fna",
"nmdc_mags.gff_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_functional_annotation.gff",
"nmdc_mags.cath_funfam_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_cath_funfam.gff",
"nmdc_mags.supfam_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_supfam.gff",
"nmdc_mags.cog_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_cog.gff",
"nmdc_mags.proj_name": "nmdc:mga0h53q43",
"nmdc_mags.pfam_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_pfam.gff",
"nmdc_mags.product_names_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_product_names.tsv",
"nmdc_mags.tigrfam_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_tigrfam.gff",
"nmdc_mags.ec_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_ec.tsv",
"nmdc_mags.ko_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_ko.tsv",
"nmdc_mags.sam_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/assembly/nmdc_mga0h53q43_pairedMapped_sorted.bam",
"nmdc_mags.smart_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_smart.gff",
"nmdc_mags.proteins_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_proteins.faa",
"nmdc_mags.gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:mga0h53q43/annotation/nmdc_mga0h53q43_gene_phylogeny.tsv",
"nmdc_mags.resource": "NERSC",
"nmdc_mags.informed_by": "test",
"nmdc_mags.git_url": "https://github.com/microbiomedata/metaMAGs",
"nmdc_mags.url_root": "https://data.microbiomedata.org/data/"
}

{
"nmdc_mags.contig_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgas-11-rcs4bt79.1/nmdc_wfmgas-11-rcs4bt79.1_contigs.fna",
"nmdc_mags.gff_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_functional_annotation.gff",
"nmdc_mags.cath_funfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_cath_funfam.gff",
"nmdc_mags.supfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_supfam.gff",
"nmdc_mags.cog_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_cog.gff",
"nmdc_mags.pfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_pfam.gff",
"nmdc_mags.product_names_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_product_names.tsv",
"nmdc_mags.tigrfam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_tigrfam.gff",
"nmdc_mags.ec_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_ec.tsv",
"nmdc_mags.ko_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_ko.tsv",
"nmdc_mags.lineage_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_scaffold_lineage.tsv",
"nmdc_mags.sam_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgas-11-rcs4bt79.1/nmdc_wfmgas-11-rcs4bt79.1_pairedMapped_sorted.bam",
"nmdc_mags.smart_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_smart.gff",
"nmdc_mags.proteins_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_proteins.faa",
"nmdc_mags.gene_phylogeny_file": "https://data.microbiomedata.org/data/nmdc:omprc-11-9mvz7z22/nmdc:wfmgan-11-hdaenp36.1/nmdc_wfmgan-11-hdaenp36.1_gene_phylogeny.tsv",
"nmdc_mags.proj": "nmdc:wfmag-11-9dgz7m72.1"
}
Loading

0 comments on commit ad3adc9

Please sign in to comment.