Skip to content

Commit

Permalink
update db master table spec and corresponding helper functions (faceb…
Browse files Browse the repository at this point in the history
…ookresearch#465)

Summary:

Update db master table spec such that experiment ID and participant is not unique.

Functions that used experiment_id as a key have been swapped to use the master table's master key (unique_id). Functions appeared to expect that it's possible to get multiple entries back from a single experiment ID existed but didn't make sense since it was unique, this means nothing really needs to be changed.

Replay functions used to leverage the experiment_id, here it is assumed to be unique, which it was. Now replay functions use the master table's unique_id to pick which experiment to replay, which conveniently also means it's a lot easier to just try integers starting from 0 (instead of finding a uuid).

Metadata reading has been changed to correctly get all the information from the config to match the master table spec.

These are technically breaking changes that may affect old scripts but it's not clear what db utility functions may be used in weird scripts trying to use experiment_ids to identify experiments. Still old dbs should all work and be compatible.

Differential Revision: D66526187
  • Loading branch information
JasonKChow authored and facebook-github-bot committed Nov 27, 2024
1 parent 47f62eb commit 15f2ea6
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 173 deletions.
28 changes: 26 additions & 2 deletions aepsych/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,33 @@ def to_dict(self, deduplicate: bool = True) -> Dict[str, Any]:
return _dict

# Turn the metadata section into JSON.
def jsonifyMetadata(self) -> str:
def jsonifyMetadata(self, only_extra: bool = False) -> str:
"""Return a json string of the metadata section.
Args:
only_extra (bool): Only jsonify the extra meta data.
Returns:
str: A json string representing the metadata dictionary or an empty string
if there is no metadata to return.
"""
configdict = self.to_dict()
return json.dumps(configdict["metadata"])
metadata = configdict["metadata"].copy()

if only_extra:
default_metadata = [
"experiment_name",
"experiment_description",
"experiment_id",
"participant_id",
]
for name in default_metadata:
metadata.pop(name, None)

if len(metadata.keys()) == 0:
return ""
else:
return json.dumps(metadata)

# Turn the entire config into JSON format.
def jsonifyAll(self) -> str:
Expand Down
89 changes: 44 additions & 45 deletions aepsych/database/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import os
import uuid
import warnings
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Dict, List, Optional
Expand Down Expand Up @@ -138,18 +139,18 @@ def get_master_records(self) -> List[tables.DBMasterTable]:
records = self._session.query(tables.DBMasterTable).all()
return records

def get_master_record(self, experiment_id: int) -> Optional[tables.DBMasterTable]:
"""Grab the list of master record for a specific experiment (master) id.
def get_master_record(self, master_id: int) -> Optional[tables.DBMasterTable]:
"""Grab the list of master record for a specific master id (uniquie_id of master table).
Args:
experiment_id (int): The experiment id.
master_id (int): The master_id, which is the master key of the master table.
Returns:
tables.DBMasterTable or None: The master record or None if it doesn't exist.
"""
records = (
self._session.query(tables.DBMasterTable)
.filter(tables.DBMasterTable.experiment_id == experiment_id)
.filter(tables.DBMasterTable.unique_id == master_id)
.all()
)

Expand All @@ -162,7 +163,7 @@ def get_replay_for(self, master_id: int) -> Optional[List[tables.DbReplayTable]]
"""Get the replay records for a specific master row.
Args:
master_id (int): The master id.
master_id (int): The unique id for the master row (it's the master key).
Returns:
List[tables.DbReplayTable] or None: The replay records or None if they don't exist.
Expand All @@ -178,7 +179,7 @@ def get_strats_for(self, master_id: int = 0) -> Optional[List[Any]]:
"""Get the strat records for a specific master row.
Args:
master_id (int): The master id. Defaults to 0.
master_id (int): The master table unique ID. Defaults to 0.
Returns:
List[Any] or None: The strat records or None if they don't exist.
Expand Down Expand Up @@ -247,6 +248,13 @@ def get_all_params_for(self, master_id: int) -> Optional[List[tables.DbRawTable]
Returns:
List[tables.DbRawTable] or None: The parameters or None if they don't exist.
"""
warnings.warn(
"get_all_params_for is the same as get_param_for since there can only be one instance of any master_id",
DeprecationWarning,
)
return self.get_param_for(master_id=master_id)

# TODO: This function should change to being able to get params for all experiments given specific metadata
raw_record = self.get_raw_for(master_id)
params = []

Expand All @@ -258,14 +266,11 @@ def get_all_params_for(self, master_id: int) -> Optional[List[tables.DbRawTable]

return None

def get_param_for(
self, master_id: int, iteration_id: int
) -> Optional[List[tables.DbRawTable]]:
def get_param_for(self, master_id: int) -> Optional[List[tables.DbRawTable]]:
"""Get the parameters for a specific iteration of a specific experiment.
Args:
master_id (int): The master id.
iteration_id (int): The iteration id.
Returns:
List[tables.DbRawTable] or None: The parameters or None if they don't exist.
Expand All @@ -274,7 +279,7 @@ def get_param_for(

if raw_record is not None:
for raw in raw_record:
if raw.unique_id == iteration_id:
if raw.unique_id == master_id:
return raw.children_param

return None
Expand All @@ -288,6 +293,13 @@ def get_all_outcomes_for(self, master_id: int) -> Optional[List[tables.DbRawTabl
Returns:
List[tables.DbRawTable] or None: The outcomes or None if they don't exist.
"""
warnings.warn(
"get_all_outcomes_for is the same as get_outcome_for since there can only be one instance of any master_id",
DeprecationWarning,
)
return self.get_outcome_for(master_id=master_id)

# TODO: This function should change to being able to get outcomes for all experiments given specific metadata
raw_record = self.get_raw_for(master_id)
outcomes = []

Expand All @@ -299,14 +311,11 @@ def get_all_outcomes_for(self, master_id: int) -> Optional[List[tables.DbRawTabl

return None

def get_outcome_for(
self, master_id: int, iteration_id: int
) -> Optional[List[tables.DbRawTable]]:
def get_outcome_for(self, master_id: int) -> Optional[List[tables.DbRawTable]]:
"""Get the outcomes for a specific iteration of a specific experiment.
Args:
master_id (int): The master id.
iteration_id (int): The iteration id.
Returns:
List[tables.DbRawTable] or None: The outcomes or None if they don't exist.
Expand All @@ -315,56 +324,46 @@ def get_outcome_for(

if raw_record is not None:
for raw in raw_record:
if raw.unique_id == iteration_id:
if raw.unique_id == master_id:
return raw.children_outcome

return None

def record_setup(
self,
description: str,
name: str,
description: str = None,
name: str = None,
extra_metadata: Optional[str] = None,
id: Optional[str] = None,
exp_id: Optional[str] = None,
request: Dict[str, Any] = None,
participant_id: Optional[int] = None,
par_id: Optional[int] = None,
) -> str:
"""Record the setup of an experiment.
Args:
description (str): The description of the experiment.
name (str): The name of the experiment.
description (str, optional): The description of the experiment, defaults to None.
name (str, optional): The name of the experiment, defaults to None.
extra_metadata (str, optional): Extra metadata. Defaults to None.
id (str, optional): The id of the experiment. Defaults to None.
request (Dict[str, Any]): The request. Defaults to None.
participant_id (int, optional): The participant id. Defaults to None.
exp_id (str, optional): The id of the experiment. Defaults to a generated uuid.
request (Dict[str, Any], optional): The request. Defaults to None.
par_id (int, optional): The participant id. Defaults to generated uuid.
Returns:
str: The experiment id.
"""
self.get_engine()

if id is None:
master_table = tables.DBMasterTable()
master_table.experiment_description = description
master_table.experiment_name = name
master_table.experiment_id = str(uuid.uuid4())
if participant_id is not None:
master_table.participant_id = participant_id
else:
master_table.participant_id = str(
uuid.uuid4()
) # no p_id specified will result in a generated UUID

master_table.extra_metadata = extra_metadata

self._session.add(master_table)
master_table = tables.DBMasterTable()
master_table.experiment_description = description
master_table.experiment_name = name
master_table.experiment_id = exp_id if exp_id is not None else str(uuid.uuid4())
master_table.participant_id = (
par_id if par_id is not None else str(uuid.uuid4())
)
master_table.extra_metadata = extra_metadata
self._session.add(master_table)

logger.debug(f"record_setup = [{master_table}]")
else:
master_table = self.get_master_record(id)
if master_table is None:
raise RuntimeError(f"experiment id {id} doesn't exist in the db.")
logger.debug(f"record_setup = [{master_table}]")

record = tables.DbReplayTable()
record.message_type = "setup"
Expand Down
42 changes: 17 additions & 25 deletions aepsych/database/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,6 @@

Base = declarative_base()

"""
Original Schema
CREATE TABLE master (
unique_id INTEGER NOT NULL,
experiment_name VARCHAR(256),
experiment_description VARCHAR(2048),
experiment_id VARCHAR(10),
PRIMARY KEY (unique_id),
UNIQUE (experiment_id)
);
CREATE TABLE replay_data (
unique_id INTEGER NOT NULL,
timestamp DATETIME,
message_type VARCHAR(64),
message_contents BLOB,
master_table_id INTEGER,
PRIMARY KEY (unique_id),
FOREIGN KEY(master_table_id) REFERENCES master (unique_id)
);
"""


class DBMasterTable(Base):
"""
Expand All @@ -62,10 +41,10 @@ class DBMasterTable(Base):
__tablename__ = "master"

unique_id = Column(Integer, primary_key=True, autoincrement=True)
experiment_name = Column(String(256))
experiment_description = Column(String(2048))
experiment_id = Column(String(10), unique=True)
participant_id = Column(String(50), unique=True)
experiment_name = Column(String(256), nullable=True)
experiment_description = Column(String(2048), nullable=True)
experiment_id = Column(String(10))
participant_id = Column(String(50))

extra_metadata = Column(String(4096)) # JSON-formatted metadata

Expand Down Expand Up @@ -176,6 +155,19 @@ def _add_column(engine: Engine, column: str) -> None:
except Exception as e:
logger.debug(f"Column already exists, no need to alter. [{e}]")

@staticmethod
def _update_column(engine: Engine, column: str, spec: str) -> None:
"""Update column with a new spec.
Args:
engine (Engine): The sqlalchemy engine.
column (str): The column name.
spec (str): The new column spec.
"""
logger.debug(f"Altering the master table column: {column} to this spec {spec}")
engine.execute(f"ALTER TABLE master MODIFY {column} {spec}")
engine.commit()


class DbReplayTable(Base):
__tablename__ = "replay_data"
Expand Down
56 changes: 27 additions & 29 deletions aepsych/server/message_handlers/handle_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from aepsych.version import __version__

logger = utils_logging.getLogger(logging.INFO)
DEFAULT_DESC = "default description"
DEFAULT_NAME = "default name"


def _configure(server, config):
Expand Down Expand Up @@ -81,39 +79,39 @@ def handle_setup(server, request):
):
tempconfig = Config(**request["message"])
if not server.is_performing_replay:
experiment_id = None
if server._db_master_record is not None:
experiment_id = server._db_master_record.experiment_id
if "metadata" in tempconfig.keys():
cdesc = (
tempconfig["metadata"]["experiment_description"]
if ("experiment_description" in tempconfig["metadata"].keys())
else DEFAULT_DESC
)
cname = (
tempconfig["metadata"]["experiment_name"]
if ("experiment_name" in tempconfig["metadata"].keys())
else DEFAULT_NAME
)
cid = (
tempconfig["metadata"]["experiment_id"]
if ("experiment_id" in tempconfig["metadata"].keys())
else None
# Get metadata
exp_name = tempconfig["metadata"].get("experiment_name", fallback=None)
exp_desc = tempconfig["metadata"].get(
"experiment_description", fallback=None
)
par_id = tempconfig["metadata"].get("participant_id", fallback=None)

# This may be populated when replaying
if server._db_master_record is not None:
exp_id = server._db_master_record.experiment_id
else:
exp_id = tempconfig["metadata"].get("experiment_id", fallback=None)

extra_metadata = tempconfig.jsonifyMetadata(only_extra=True)

server._db_master_record = server.db.record_setup(
description=cdesc,
name=cname,
description=exp_desc,
name=exp_name,
request=request,
id=cid,
extra_metadata=tempconfig.jsonifyMetadata(),
exp_id=exp_id,
par_id=par_id,
extra_metadata=extra_metadata if extra_metadata != "" else None,
)
else: # No metadata set, still record the master
exp_id = (
server._db_master_record.experiment_id
if server._db_master_record is not None
else None
)
### if the metadata does not exist, we are going to log nothing
else:

server._db_master_record = server.db.record_setup(
description=DEFAULT_DESC,
name=DEFAULT_NAME,
request=request,
id=experiment_id,
request=request, exp_id=exp_id
)

strat_id = configure(server, config=tempconfig)
Expand Down
Loading

0 comments on commit 15f2ea6

Please sign in to comment.