Skip to content

Commit

Permalink
update app
Browse files Browse the repository at this point in the history
  • Loading branch information
YojanaGadiya committed Oct 1, 2024
1 parent 37bfc19 commit 1274e93
Show file tree
Hide file tree
Showing 6 changed files with 74,343 additions and 74,237 deletions.
1 change: 0 additions & 1 deletion .streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
[theme]
base="light"
primaryColor="#7db5d0"
secondaryBackgroundColor="#1e78bc"
textColor="#080000"
97 changes: 75 additions & 22 deletions AMR-KG_Database.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import pandas as pd
from collections import defaultdict
import streamlit as st
Expand Down Expand Up @@ -30,7 +31,7 @@
st.sidebar.image("data/COMBINE_logo.jpg")

st.markdown(
"<h1 style='text-align: center; color: #78bc1e;'>AMR-KG Database</h1>",
"<h1 style='text-align: center; color: #006c8b;'>AMR-KG Database</h1>",
unsafe_allow_html=True,
)
st.markdown(
Expand Down Expand Up @@ -68,10 +69,10 @@
help="Information on the data in AMR-KG.",
)

st.write(
st.markdown(
"Antimicrobial Resistant Knowledge Graph (AMR-KG) is an exhaustive data warehouse of experimentally validated antibacterial chemicals \
covering Gram-positive, Gram-negative, acid-fast bacteria and fungi. The construction of the AMR-KG involved collecting \
minimum inhibitory concentration (MIC) data from three different public data resources:"
[minimum inhibitory concentration (MIC)](http://purl.obolibrary.org/obo/ARO_3004370) data from three different public data resources:"
)


Expand All @@ -82,7 +83,7 @@
container.write("### [ChEMBL](https://www.ebi.ac.uk/chembl/)")

container.write(
"ChEMBL is a manually curated database of bioactive molecules with drug-like properties. \
"ChEMBL (v.34) is a manually curated database of bioactive molecules with drug-like properties. \
It brings together chemical and bioactivity to aid the translation of experimental information into effective new drugs.",
)

Expand All @@ -109,7 +110,7 @@
divider="orange",
help="Stats on the underlying data.",
)
df = pd.read_csv("data/processed/combined_bioassay_data.tsv", sep="\t")
df = pd.read_csv("data/combined_bioassay_data.tsv", sep="\t")


def get_base_stats():
Expand Down Expand Up @@ -138,19 +139,20 @@ def get_base_stats():
_,
_,
_,
gp_class,
gn_class,
fungi_class,
af_class,
best_class,
) = row

if best_class == "gram-positive":
if pd.notna(gp_class):
pchem_dist_dict["gram-positive"].append(gram_pos)

elif best_class == "gram-negative":
if pd.notna(gn_class):
pchem_dist_dict["gram-negative"].append(gram_neg)

elif best_class == "fungi":
if pd.notna(fungi_class):
pchem_dist_dict["fungi"].append(fungi)

elif best_class == "acid-fast":
if pd.notna(af_class):
pchem_dist_dict["acid-fast"].append(acid_fast)

return (chembl_cmpds, coadd_cmpds, spark_cmpds), pchem_dist_dict
Expand All @@ -162,12 +164,22 @@ def get_base_stats():

plt.subplot(2, 2, 1)
plt.bar(
["ChEMBL", "CO-ADD", "SPARK"],
[len(chembl_cmpds), len(coadd_cmpds), len(spark_cmpds)],
[
"ChEMBL",
"SPARK",
"CO-ADD",
],
[len(chembl_cmpds), len(spark_cmpds), len(coadd_cmpds)],
)
plt.title("Compound distribution across resources")
plt.title("Compound distribution across resources", fontsize=15, fontweight="bold")
plt.yscale("log")
plt.ylabel("Number of compounds")
# display the number of compounds on the bars
for i, v in enumerate([len(chembl_cmpds), len(spark_cmpds), len(coadd_cmpds)]):
plt.text(i, v + 10, str(v), ha="center", va="bottom")
plt.ylabel("Number of deduplicated compounds", fontsize=12)
plt.xlabel("Data source", fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.subplot(2, 2, 2)
plt.hist(
Expand All @@ -178,18 +190,21 @@ def get_base_stats():
)
plt.hist(pchem_dist_dict["fungi"], alpha=0.5, color="green", label="fungi")
plt.hist(pchem_dist_dict["acid-fast"], alpha=0.5, color="orange", label="acid-fast")
plt.legend()
plt.title("Distribution of pChEMBL values")
plt.ylabel("Number of compounds")
plt.xlabel("pChEMBL value")
plt.axvline(5, color="red", linestyle="--", label="pChEMBL threshold")
plt.legend(title="Pathogen class", fontsize=12)
plt.title("Distribution of pChEMBL values", fontsize=15, fontweight="bold")
plt.ylabel("Number of deduplicated compounds", fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlabel("pChEMBL value", fontsize=12)

st.pyplot(fig)


st.header(
":arrow_down: Data Download",
divider="orange",
help="Downloading all the data in the AMR-KG.",
help="Downloading the compound-pathogen data in AMR-KG.",
)


Expand All @@ -200,11 +215,49 @@ def convert_df(df):

csv = convert_df(df)
st.write("The data files contains the following columns in the tab-seperated manner:")
st.markdown(
"""
* `compound_inchikey` - InChI key of the compound \n
* `compound_smiles` - SMILES representation of the compound \n
* `compound_source` - Source database of the compound \n
* `gram_positive` - pMIC value for compound activity against Gram-positive pathogen \n
* `gram_negative` - pMIC value for compound activity against Gram-negative pathogen \n
* `fungi` - pMIC value for compound activity against fungi \n
* `acid_fast` - pMIC value for compound activity against acid-fast pathogen \n
* `chemical_class` - Chemical class disitribution of the compounds based on NPClassifier \n
* `compound_superclass` - Superclass of the compound based on NPClassifier \n
* `compound_pathway` - Pathway of the compound based on NPClassifier \n
* `gram_positive_label` - Indicating whether a compound is active or inactive for a Gram-positive pathogen based on pChEMBL threshold (i.e. 5) \n
* `gram_negative_label` - Indicating whether a compound is active or inactive for a Gram-negative pathogen based on pChEMBL threshold (i.e. 5) \n
* `fungi_label` - Indicating whether a compound is active or inactive for a Fungi pathogen based on pChEMBL threshold (i.e. 5) \n
* `acid_fast_label` - Indicating whether a compound is active or inactive for a Acid-fast pathogen based on pChEMBL threshold (i.e. 5) \n
* `best_class` - Pathogen class of the compound based on the highest pChEMBL values \n
"""
)
st.dataframe(df.head(3))
st.download_button(
"Press to Download", csv, "amrkg_data_dump.tsv", "text/tsv", key="download-tsv"
)

# Publucation note
with st.expander("Check out our publication for more deatils "):
with st.expander(
"If you have found our resource or model useful in your work, please consider citing us: "
):
st.write("""Manuscript in preparation. Please check back soon for more details.""")


# last updated
date = datetime.datetime.now().strftime("%d-%m-%Y %H:%M:%S")
st.markdown(
f"<p style='text-align: center;'>Last updated: {date}</p>",
unsafe_allow_html=True,
)

# footer with text and green background
st.markdown(
"<footer style='background-color: #006c8b; padding: 10px; border-radius: 10px;'>"
"<p style='color: white; text-align: center;'>Fraunhofer ITMP © 2021</p>"
"<p style='color: white;'>This project has received funding from the Innovative Medicines Initiative 2 Joint Undertaking under Grant Agreement No 853967. This Joint Undertaking receives support from the European Union’s Horizon 2020 research and innovation programme and EFPIA companies’ in kind contribution.</p>"
"</footer>",
unsafe_allow_html=True,
)
30 changes: 25 additions & 5 deletions amrkg_chemspace.html

Large diffs are not rendered by default.

Loading

0 comments on commit 1274e93

Please sign in to comment.