Source code for bout_runners.log.log_reader

"""Module containing the LogReader class."""


import logging
import re
from datetime import datetime
from io import StringIO
from pathlib import Path
from typing import Optional

import pandas as pd


[docs]class LogReader: """ Class for reading BOUT++ log files. Attributes ---------- __log_path : Path Path to the log file file_str : str The log file as a string start_time : None or str The time of the execution start (given that it has started) end_time : None or str The time of the execution end (given that it has started) pid : None or int The processor id of the part of the program writing to the log file Methods ------- started() Whether or not the execution has started ended() Whether or not the execution has ended pid_exist() Whether or not the pid can be found get_simulation_steps() Return the simulation steps as a dataframe __is_str_in_file(pattern) Check whether regex-pattern exists in file __find_local_time(pattern) Return the local time of a regex capture Examples -------- >>> from pathlib import Path >>> path = Path().joinpath('path', 'to', 'BOUT.log.0') >>> log_reader = LogReader(path) >>> log_reader.start_time '2020-05-01 17:07:10' >>> log_reader.end_time '2020-05-01 17:07:14' >>> log_reader.pid 1190 """ def __init__(self, log_path: Path) -> None: """ Open and read a log file. Parameters ---------- log_path : str or Path Absolute path to log file """ self.__log_path = log_path with Path(log_path).open("r") as log_file: self.file_str = log_file.read() logging.debug("Opened log_file %s", log_path)
[docs] def started(self) -> bool: """ Check whether the run has a start time. Returns ------- bool True if the start signature is found in the file """ return self.__is_str_in_file(r"^Run started at")
[docs] def ended(self) -> bool: """ Check whether the run has an end time. Returns ------- bool True if the end signature is found in the file """ return self.__is_str_in_file(r"^Run finished at")
[docs] def pid_exist(self) -> bool: """ Check whether a process id exist. Returns ------- bool True if the pid is found """ return self.__is_str_in_file(r"^pid\s*:\s*")
[docs] def get_simulation_steps(self) -> pd.DataFrame: """ Return the simulation steps as a dataframe. Returns ------- simulation_steps : DataFrame Data frame containing details of the simulation steps """ file_str_list = self.file_str.split("\n") found_line_nr = -1 for line_nr, line in enumerate(file_str_list): if line.startswith("Sim Time |"): found_line_nr = line_nr break if found_line_nr != -1: file_str_list = file_str_list[found_line_nr:] file_str_list[0] = file_str_list[0].replace("|", "") file_str_list[0] = file_str_list[0].replace("Sim Time", "Sim_time") file_str_list[0] = file_str_list[0].replace("RHS evals", "RHS_evals") file_str_list[0] = file_str_list[0].replace("Wall Time", "Wall_time") # Remove blank line file_str_list.pop(1) # Get all the lines which starts with a number pattern = r"^\d\.\d{3}e[+-]\d{2}" # Two header lines found_line_nr = 0 for line_nr, line in enumerate(file_str_list[2:]): if re.search(pattern, line) is None: found_line_nr = line_nr break # Add two as we enumerate from [2:] file_str_list = file_str_list[: found_line_nr + 2] else: logging.warning("Could not find steps in %s", self.__log_path) return pd.DataFrame() file_str = re.sub(" +", ",", "\n".join(file_str_list)) simulation_steps = pd.read_csv(StringIO(file_str)) return simulation_steps
@property def start_time(self) -> Optional[datetime]: """ Return the start time of the process. Returns ------- datetime or None The start time on date time format """ if self.started(): return self.__find_local_time(r"^Run started at : (.*)") return None @property def end_time(self) -> Optional[datetime]: """ Return the end time of the process. Returns ------- datetime or None The end time on date time format """ if self.ended(): return self.__find_local_time(r"^Run finished at : (.*)") return None @property def pid(self) -> Optional[int]: """ Return the pid of the process. Returns ------- int or None The pid of the process """ if self.pid_exist(): pattern = r"^pid:\s*(\d*)\s*$" # Using search as match will only search the beginning of # the string # https://stackoverflow.com/a/32134461/2786884 match = re.search(pattern, self.file_str, flags=re.MULTILINE) if match is None: return None return int(match.group(1)) return None def __is_str_in_file(self, pattern: str) -> bool: """ Check whether regex-pattern exists in file. Parameters ---------- pattern : str Regex pattern to search for Returns ------- bool True if pattern exist """ # Using search as match will only search the beginning of the # string # https://stackoverflow.com/a/32134461/2786884 match = re.search(pattern, self.file_str, flags=re.MULTILINE) if match is None: return False return True def __find_local_time(self, pattern: str) -> datetime: """ Return the local time of a regex capture. Parameters ---------- pattern : str String to search for Returns ------- time : datetime The local datetime Raises ------ ValueError If no matches for pattern is found in self.file_str """ # Using search as match will only search the beginning of the # string # https://stackoverflow.com/a/32134461/2786884 match = re.search(pattern, self.file_str, flags=re.MULTILINE) if match is None: msg = f"No matches in {self.file_str} with pattern {pattern}" logging.critical(msg) raise ValueError(msg) time_str = match.group(1) try: time = datetime.strptime(time_str, "%c") except ValueError: # Observed on CentOS time = datetime.strptime(time_str, "%a %d %b %Y %H:%M:%S %p %Z") return time