"""Module containing the DatabaseCreator class."""
import logging
import re
from typing import Dict, List, Optional, Tuple
from bout_runners.database.database_connector import DatabaseConnector
from bout_runners.database.database_utils import get_system_info_as_sql_type
[docs]class DatabaseCreator:
r"""
Class for creating the schema of the database.
Attributes
----------
db_connector : DatabaseConnector
The database object to write to
Methods
-------
create_all_schema_tables(parameters_as_sql_types)
Create the all the tables for a schema
get_create_table_statement(table_name, columns=None,
primary_key='id', foreign_keys=None)
Return a SQL string which can be used to create the table
_create_single_table(table_str)
Create a table in the database
_create_system_info_table()
Create a table for the system info
_create_split_table()
Create a table which stores the grid split
_create_file_modification_table()
Create a table for file modifications
_create_parameter_tables(parameters_as_sql_types)
Create a table for each BOUT.settings section and a join table
_create_run_table()
Create a table for the metadata_recorder of a run
Examples
--------
Import dependencies
>>> from pathlib import Path
>>> from bout_runners.executor.bout_paths import BoutPaths
>>> from bout_runners.parameters.default_parameters import DefaultParameters
>>> from bout_runners.parameters.final_parameters import FinalParameters
>>> from bout_runners.database.database_connector import DatabaseConnector
Create the `bout_paths` object
>>> project_path = Path().joinpath('path', 'to', 'project')
>>> bout_inp_src_dir = Path().joinpath('path', 'to', 'source', 'BOUT.inp')
>>> bout_inp_dst_dir = Path().joinpath('path', 'to', 'destination', 'BOUT.inp')
>>> bout_paths = BoutPaths(project_path=project_path,
... bout_inp_src_dir=bout_inp_src_dir,
... bout_inp_dst_dir=bout_inp_dst_dir)
Obtain the parameters
>>> default_parameters = DefaultParameters(bout_paths)
>>> final_parameters = FinalParameters(default_parameters)
>>> final_parameters_dict = final_parameters.get_final_parameters()
>>> final_parameters_as_sql_types = \
... final_parameters.cast_to_sql_type(final_parameters_dict)
Create the database
>>> db_connector = DatabaseConnector('name', project_path)
>>> db_creator = DatabaseCreator(db_connector)
>>> db_creator.create_all_schema_tables(final_parameters_as_sql_types)
"""
def __init__(self, db_connector: DatabaseConnector) -> None:
"""
Set the database to use.
Parameters
----------
db_connector : DatabaseConnector
The database object to write to
"""
self.db_connector = db_connector
[docs] def create_all_schema_tables(
self, parameters_as_sql_types: Dict[str, Dict[str, str]]
) -> None:
"""
Create the all the tables for a schema.
The database schema will be on normalized form, see [1]_ for a quick
overview, and [2]_ for a slightly deeper explanation
Parameters
----------
parameters_as_sql_types : dict
The dictionary on the form
>>> {'section': {'parameter': 'value_type'}}
References
----------
.. [1] https://www.databasestar.com/database-normalization/
.. [2] http://www.bkent.net/Doc/simple5.htm
"""
# Check if tables are created
tables = list()
tables.append(self._create_system_info_table())
tables.append(self._create_split_table())
tables.append(self._create_file_modification_table())
tables.extend(self._create_parameter_tables(parameters_as_sql_types))
tables.append(self._create_run_table())
logging.info(
"Created the following tables in %s: %s", self.db_connector.db_path, tables
)
[docs] @staticmethod
def get_create_table_statement(
table_name: str,
columns: Optional[Dict[str, str]] = None,
primary_key: str = "id",
foreign_keys: Optional[Dict[str, Tuple[str, str]]] = None,
) -> str:
"""
Return a SQL string which can be used to create the table.
Parameters
----------
table_name : str
Name of the table
columns : dict or None
Dictionary where the key is the column name and the value is
the type
primary_key : str
Name of the primary key (the type is set to INTEGER)
foreign_keys : dict or None
Dictionary where the key is the column in this table to be used as a
foreign key and the value is the tuple consisting of (name_of_the_table,
key_in_table) to refer to
Returns
-------
create_statement : str
The SQL statement which creates table
"""
create_statement = f"CREATE TABLE {table_name} \n("
create_statement += f" {primary_key} INTEGER PRIMARY KEY,\n"
# These are not known during submission time
nullable_fields = ("start_time", "stop_time")
if columns is not None:
for name, sql_type in columns.items():
create_statement += f" {name} {sql_type}"
if name in nullable_fields:
nullable_str = ",\n"
else:
nullable_str = " NOT NULL,\n"
create_statement += nullable_str
if foreign_keys is not None:
# Create the key as column
# NOTE: All keys are integers
for name in foreign_keys.keys():
create_statement += f" {name} INTEGER NOT NULL,\n"
# Add constraint
for name, (f_table_name, key_in_table) in foreign_keys.items():
# If the parent is updated or deleted, we would like the
# same effect to apply to its child (thereby the CASCADE
# parameter)
create_statement += (
f" FOREIGN KEY({name}) \n"
f" REFERENCES {f_table_name}"
f"({key_in_table})\n"
f" ON UPDATE CASCADE\n"
f" ON DELETE CASCADE,"
f"\n"
)
# Replace last comma with )
create_statement = f"{create_statement[:-2]})"
return create_statement
def _create_single_table(self, table_str: str) -> str:
"""
Create a table in the database.
Parameters
----------
table_str : str
The query to execute
Returns
-------
table_name : str
Name of the table
Raises
------
ValueError
If the table_str is not understood
"""
# Obtain the table name
pattern = r"CREATE TABLE (\w*)"
match = re.match(pattern, table_str)
if match is None:
msg = f'table_str "{table_str}" not understood'
logging.critical(msg)
raise ValueError(msg)
table_name = match.group(1)
self.db_connector.execute_statement(table_str)
return table_name
def _create_system_info_table(self) -> str:
"""
Create a table for the system info.
Returns
-------
str
Name of the table
"""
sys_info_dict = get_system_info_as_sql_type()
sys_info_statement = self.get_create_table_statement(
table_name="system_info", columns=sys_info_dict
)
return self._create_single_table(sys_info_statement)
def _create_split_table(self) -> str:
"""
Create a table which stores the grid split.
Returns
-------
str
Name of the table
"""
split_statement = self.get_create_table_statement(
table_name="split",
columns={
"number_of_processors": "INTEGER",
"number_of_nodes": "INTEGER",
"processors_per_node": "INTEGER",
},
)
return self._create_single_table(split_statement)
def _create_file_modification_table(self) -> str:
"""
Create a table for file modifications.
Returns
-------
str
Name of the table
"""
file_modification_statement = self.get_create_table_statement(
table_name="file_modification",
columns={
"project_makefile_modified": "TIMESTAMP",
"project_executable_modified": "TIMESTAMP",
"project_git_sha": "TEXT",
"bout_lib_modified": "TIMESTAMP",
"bout_git_sha": "TEXT",
},
)
return self._create_single_table(file_modification_statement)
def _create_parameter_tables(
self, parameters_as_sql_types: Dict[str, Dict[str, str]]
) -> List[str]:
"""
Create a table for each BOUT.settings section and a join table.
Parameters
----------
parameters_as_sql_types : dict
The dictionary on the form
>>> {'section': {'parameter': 'value_type'}}
Returns
-------
tables : list of str
Tuple of table names
Notes
-----
All `:` will be replaced by `_` in the section names
"""
parameters_foreign_keys = dict()
tables = list()
for section in parameters_as_sql_types.keys():
# Replace bad characters for SQL
section_name = section.replace(":", "_")
# Generate foreign keys for the parameters table
parameters_foreign_keys[f"{section_name}_id"] = (section_name, "id")
columns = dict()
for parameter, value_type in parameters_as_sql_types[section].items():
# Generate the columns
columns[parameter] = value_type
# Creat the section table
section_statement = self.get_create_table_statement(
table_name=section_name, columns=columns
)
tables.append(self._create_single_table(section_statement))
# Create the join table
parameters_statement = self.get_create_table_statement(
table_name="parameters", foreign_keys=parameters_foreign_keys
)
tables.append(self._create_single_table(parameters_statement))
return tables
def _create_run_table(self) -> str:
"""
Create a table for the metadata of a run.
Returns
-------
str
Name of the table
"""
run_statement = self.get_create_table_statement(
table_name="run",
columns={
"name": "TEXT",
"submitted_time": "TIMESTAMP",
"start_time": "TIMESTAMP",
"stop_time": "TIMESTAMP",
"latest_status": "TEXT",
},
foreign_keys={
"file_modification_id": ("file_modification", "id"),
"split_id": ("split", "id"),
"parameters_id": ("parameters", "id"),
"system_info_id": ("system_info", "id"),
},
)
return self._create_single_table(run_statement)