update db master table spec and corresponding helper functions (faceb…

…ookresearch#465) Summary: Update db master table spec such that experiment ID and participant is not unique. Functions that used experiment_id as a key have been swapped to use the master table's master key (unique_id). Functions appeared to expect that it's possible to get multiple entries back from a single experiment ID existed but didn't make sense since it was unique, this means nothing really needs to be changed. Replay functions used to leverage the experiment_id, here it is assumed to be unique, which it was. Now replay functions use the master table's unique_id to pick which experiment to replay, which conveniently also means it's a lot easier to just try integers starting from 0 (instead of finding a uuid). Metadata reading has been changed to correctly get all the information from the config to match the master table spec. These are technically breaking changes that may affect old scripts but it's not clear what db utility functions may be used in weird scripts trying to use experiment_ids to identify experiments. Still old dbs should all work and be compatible. Reviewed By: crasanders Differential Revision: D66526187
JasonKChow · Dec 16, 2024 · 07e0daf · 07e0daf
1 parent 669738f
commit 07e0daf
Show file tree

Hide file tree

Showing 9 changed files with 203 additions and 177 deletions.
diff --git a/aepsych/config.py b/aepsych/config.py
@@ -150,14 +150,33 @@ def to_dict(self, deduplicate: bool = True) -> Dict[str, Any]:
         return _dict
 
     # Turn the metadata section into JSON.
-    def jsonifyMetadata(self) -> str:
-        """Turn the metadata section into JSON.
+    def jsonifyMetadata(self, only_extra: bool = False) -> str:
+        """Return a json string of the metadata section.
+
+        Args:
+            only_extra (bool): Only jsonify the extra meta data.
 
         Returns:
-            str: JSON representation of the metadata section.
+            str: A json string representing the metadata dictionary or an empty string
+                if there is no metadata to return.
         """
         configdict = self.to_dict()
-        return json.dumps(configdict["metadata"])
+        metadata = configdict["metadata"].copy()
+
+        if only_extra:
+            default_metadata = [
+                "experiment_name",
+                "experiment_description",
+                "experiment_id",
+                "participant_id",
+            ]
+            for name in default_metadata:
+                metadata.pop(name, None)
+
+        if len(metadata.keys()) == 0:
+            return ""
+        else:
+            return json.dumps(metadata)
 
     # Turn the entire config into JSON format.
     def jsonifyAll(self) -> str:

diff --git a/aepsych/database/db.py b/aepsych/database/db.py
@@ -9,6 +9,7 @@
 import logging
 import os
 import uuid
+import warnings
 from contextlib import contextmanager
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -138,18 +139,18 @@ def get_master_records(self) -> List[tables.DBMasterTable]:
         records = self._session.query(tables.DBMasterTable).all()
         return records
 
-    def get_master_record(self, experiment_id: int) -> Optional[tables.DBMasterTable]:
-        """Grab the list of master record for a specific experiment (master) id.
+    def get_master_record(self, master_id: int) -> Optional[tables.DBMasterTable]:
+        """Grab the list of master record for a specific master id (uniquie_id of master table).
 
         Args:
-            experiment_id (int): The experiment id.
+            master_id (int): The master_id, which is the master key of the master table.
 
         Returns:
             tables.DBMasterTable or None: The master record or None if it doesn't exist.
         """
         records = (
             self._session.query(tables.DBMasterTable)
-            .filter(tables.DBMasterTable.experiment_id == experiment_id)
+            .filter(tables.DBMasterTable.unique_id == master_id)
             .all()
         )
 
@@ -162,7 +163,7 @@ def get_replay_for(self, master_id: int) -> Optional[List[tables.DbReplayTable]]
         """Get the replay records for a specific master row.
 
         Args:
-            master_id (int): The master id.
+            master_id (int): The unique id for the master row (it's the master key).
 
         Returns:
             List[tables.DbReplayTable] or None: The replay records or None if they don't exist.
@@ -178,7 +179,7 @@ def get_strats_for(self, master_id: int = 0) -> Optional[List[Any]]:
         """Get the strat records for a specific master row.
 
         Args:
-            master_id (int): The master id. Defaults to 0.
+            master_id (int): The master table unique ID. Defaults to 0.
 
         Returns:
             List[Any] or None: The strat records or None if they don't exist.
@@ -247,6 +248,13 @@ def get_all_params_for(self, master_id: int) -> Optional[List[tables.DbRawTable]
         Returns:
             List[tables.DbRawTable] or None: The parameters or None if they don't exist.
         """
+        warnings.warn(
+            "get_all_params_for is the same as get_param_for since there can only be one instance of any master_id",
+            DeprecationWarning,
+        )
+        return self.get_param_for(master_id=master_id)
+
+        # TODO: This function should change to being able to get params for all experiments given specific metadata
         raw_record = self.get_raw_for(master_id)
         params = []
 
@@ -258,14 +266,11 @@ def get_all_params_for(self, master_id: int) -> Optional[List[tables.DbRawTable]
 
         return None
 
-    def get_param_for(
-        self, master_id: int, iteration_id: int
-    ) -> Optional[List[tables.DbRawTable]]:
+    def get_param_for(self, master_id: int) -> Optional[List[tables.DbRawTable]]:
         """Get the parameters for a specific iteration of a specific experiment.
 
         Args:
             master_id (int): The master id.
-            iteration_id (int): The iteration id.
 
         Returns:
             List[tables.DbRawTable] or None: The parameters or None if they don't exist.
@@ -274,7 +279,7 @@ def get_param_for(
 
         if raw_record is not None:
             for raw in raw_record:
-                if raw.unique_id == iteration_id:
+                if raw.unique_id == master_id:
                     return raw.children_param
 
         return None
@@ -288,6 +293,13 @@ def get_all_outcomes_for(self, master_id: int) -> Optional[List[tables.DbRawTabl
         Returns:
             List[tables.DbRawTable] or None: The outcomes or None if they don't exist.
         """
+        warnings.warn(
+            "get_all_outcomes_for is the same as get_outcome_for since there can only be one instance of any master_id",
+            DeprecationWarning,
+        )
+        return self.get_outcome_for(master_id=master_id)
+
+        # TODO: This function should change to being able to get outcomes for all experiments given specific metadata
         raw_record = self.get_raw_for(master_id)
         outcomes = []
 
@@ -299,14 +311,11 @@ def get_all_outcomes_for(self, master_id: int) -> Optional[List[tables.DbRawTabl
 
         return None
 
-    def get_outcome_for(
-        self, master_id: int, iteration_id: int
-    ) -> Optional[List[tables.DbRawTable]]:
+    def get_outcome_for(self, master_id: int) -> Optional[List[tables.DbRawTable]]:
         """Get the outcomes for a specific iteration of a specific experiment.
 
         Args:
             master_id (int): The master id.
-            iteration_id (int): The iteration id.
 
         Returns:
             List[tables.DbRawTable] or None: The outcomes or None if they don't exist.
@@ -315,56 +324,46 @@ def get_outcome_for(
 
         if raw_record is not None:
             for raw in raw_record:
-                if raw.unique_id == iteration_id:
+                if raw.unique_id == master_id:
                     return raw.children_outcome
 
         return None
 
     def record_setup(
         self,
-        description: str,
-        name: str,
+        description: str = None,
+        name: str = None,
         extra_metadata: Optional[str] = None,
-        id: Optional[str] = None,
+        exp_id: Optional[str] = None,
         request: Dict[str, Any] = None,
-        participant_id: Optional[int] = None,
+        par_id: Optional[int] = None,
     ) -> str:
         """Record the setup of an experiment.
 
         Args:
-            description (str): The description of the experiment.
-            name (str): The name of the experiment.
+            description (str, optional): The description of the experiment, defaults to None.
+            name (str, optional): The name of the experiment, defaults to None.
             extra_metadata (str, optional): Extra metadata. Defaults to None.
-            id (str, optional): The id of the experiment. Defaults to None.
-            request (Dict[str, Any]): The request. Defaults to None.
-            participant_id (int, optional): The participant id. Defaults to None.
+            exp_id (str, optional): The id of the experiment. Defaults to a generated uuid.
+            request (Dict[str, Any], optional): The request. Defaults to None.
+            par_id (int, optional): The participant id. Defaults to generated uuid.
 
         Returns:
             str: The experiment id.
         """
         self.get_engine()
 
-        if id is None:
-            master_table = tables.DBMasterTable()
-            master_table.experiment_description = description
-            master_table.experiment_name = name
-            master_table.experiment_id = str(uuid.uuid4())
-            if participant_id is not None:
-                master_table.participant_id = participant_id
-            else:
-                master_table.participant_id = str(
-                    uuid.uuid4()
-                )  # no p_id specified will result in a generated UUID
-
-            master_table.extra_metadata = extra_metadata
-
-            self._session.add(master_table)
+        master_table = tables.DBMasterTable()
+        master_table.experiment_description = description
+        master_table.experiment_name = name
+        master_table.experiment_id = exp_id if exp_id is not None else str(uuid.uuid4())
+        master_table.participant_id = (
+            par_id if par_id is not None else str(uuid.uuid4())
+        )
+        master_table.extra_metadata = extra_metadata
+        self._session.add(master_table)
 
-            logger.debug(f"record_setup = [{master_table}]")
-        else:
-            master_table = self.get_master_record(id)
-            if master_table is None:
-                raise RuntimeError(f"experiment id {id} doesn't exist in the db.")
+        logger.debug(f"record_setup = [{master_table}]")
 
         record = tables.DbReplayTable()
         record.message_type = "setup"

diff --git a/aepsych/database/tables.py b/aepsych/database/tables.py
@@ -32,27 +32,6 @@
 
 Base = declarative_base()
 
-"""
-Original Schema
-CREATE TABLE master (
-unique_id INTEGER NOT NULL,
-experiment_name VARCHAR(256),
-experiment_description VARCHAR(2048),
-experiment_id VARCHAR(10),
-PRIMARY KEY (unique_id),
-UNIQUE (experiment_id)
-);
-CREATE TABLE replay_data (
-unique_id INTEGER NOT NULL,
-timestamp DATETIME,
-message_type VARCHAR(64),
-message_contents BLOB,
-master_table_id INTEGER,
-PRIMARY KEY (unique_id),
-FOREIGN KEY(master_table_id) REFERENCES master (unique_id)
-);
-"""
-
 
 class DBMasterTable(Base):
     """
@@ -62,10 +41,10 @@ class DBMasterTable(Base):
     __tablename__ = "master"
 
     unique_id = Column(Integer, primary_key=True, autoincrement=True)
-    experiment_name = Column(String(256))
-    experiment_description = Column(String(2048))
-    experiment_id = Column(String(10), unique=True)
-    participant_id = Column(String(50), unique=True)
+    experiment_name = Column(String(256), nullable=True)
+    experiment_description = Column(String(2048), nullable=True)
+    experiment_id = Column(String(10))
+    participant_id = Column(String(50))
 
     extra_metadata = Column(String(4096))  # JSON-formatted metadata
 
@@ -176,6 +155,19 @@ def _add_column(engine: Engine, column: str) -> None:
         except Exception as e:
             logger.debug(f"Column already exists, no need to alter. [{e}]")
 
+    @staticmethod
+    def _update_column(engine: Engine, column: str, spec: str) -> None:
+        """Update column with a new spec.
+
+        Args:
+            engine (Engine): The sqlalchemy engine.
+            column (str): The column name.
+            spec (str): The new column spec.
+        """
+        logger.debug(f"Altering the master table column: {column} to this spec {spec}")
+        engine.execute(f"ALTER TABLE master MODIFY {column} {spec}")
+        engine.commit()
+
 
 class DbReplayTable(Base):
     __tablename__ = "replay_data"

diff --git a/aepsych/server/message_handlers/handle_setup.py b/aepsych/server/message_handlers/handle_setup.py
@@ -13,8 +13,6 @@
 from aepsych.version import __version__
 
 logger = utils_logging.getLogger(logging.INFO)
-DEFAULT_DESC = "default description"
-DEFAULT_NAME = "default name"
 
 
 def _configure(server, config):
@@ -81,39 +79,39 @@ def handle_setup(server, request):
     ):
         tempconfig = Config(**request["message"])
         if not server.is_performing_replay:
-            experiment_id = None
-            if server._db_master_record is not None:
-                experiment_id = server._db_master_record.experiment_id
             if "metadata" in tempconfig.keys():
-                cdesc = (
-                    tempconfig["metadata"]["experiment_description"]
-                    if ("experiment_description" in tempconfig["metadata"].keys())
-                    else DEFAULT_DESC
-                )
-                cname = (
-                    tempconfig["metadata"]["experiment_name"]
-                    if ("experiment_name" in tempconfig["metadata"].keys())
-                    else DEFAULT_NAME
-                )
-                cid = (
-                    tempconfig["metadata"]["experiment_id"]
-                    if ("experiment_id" in tempconfig["metadata"].keys())
-                    else None
+                # Get metadata
+                exp_name = tempconfig["metadata"].get("experiment_name", fallback=None)
+                exp_desc = tempconfig["metadata"].get(
+                    "experiment_description", fallback=None
                 )
+                par_id = tempconfig["metadata"].get("participant_id", fallback=None)
+
+                # This may be populated when replaying
+                if server._db_master_record is not None:
+                    exp_id = server._db_master_record.experiment_id
+                else:
+                    exp_id = tempconfig["metadata"].get("experiment_id", fallback=None)
+
+                extra_metadata = tempconfig.jsonifyMetadata(only_extra=True)
+
                 server._db_master_record = server.db.record_setup(
-                    description=cdesc,
-                    name=cname,
+                    description=exp_desc,
+                    name=exp_name,
                     request=request,
-                    id=cid,
-                    extra_metadata=tempconfig.jsonifyMetadata(),
+                    exp_id=exp_id,
+                    par_id=par_id,
+                    extra_metadata=extra_metadata if extra_metadata != "" else None,
+                )
+            else:  # No metadata set, still record the master
+                exp_id = (
+                    server._db_master_record.experiment_id
+                    if server._db_master_record is not None
+                    else None
                 )
-            ### if the metadata does not exist, we are going to log nothing
-            else:
+
                 server._db_master_record = server.db.record_setup(
-                    description=DEFAULT_DESC,
-                    name=DEFAULT_NAME,
-                    request=request,
-                    id=experiment_id,
+                    request=request, exp_id=exp_id
                 )
 
         strat_id = configure(server, config=tempconfig)