Source code for pyfracval.schemas

"""
Pydantic models for simulation configuration and results data structure.
"""

import logging
import time
from datetime import datetime
from pathlib import Path
from typing import Any, Self

import numpy as np
import yaml
from pydantic import BaseModel, ConfigDict, Field

logger = logging.getLogger(__name__)


[docs] class SimulationParameters(BaseModel): """Input parameters for a FracVAL simulation run. Used for validation and type hinting of the simulation configuration. Attributes ---------- N : int Target number of primary particles. Df : float Target fractal dimension. kf : float Target fractal prefactor. rp_g : float Geometric mean radius of primary particles. rp_gstd : float Geometric standard deviation of radii (must be >= 1.0). tol_ov : float Overlap tolerance (must be > 0.0). n_subcl_percentage : float Target fraction for PCA subcluster size (0.0 < perc <= 0.5). ext_case : int CCA sticking ext_case (0 or 1). seed : int | None Random seed used for generation (optional). """
[docs] N: int = Field(..., description="Target number of primary particles.")
[docs] Df: float = Field(..., description="Target fractal dimension.")
[docs] kf: float = Field(..., description="Target fractal prefactor.")
[docs] rp_g: float = Field(..., description="Geometric mean radius of primary particles.")
[docs] rp_gstd: float = Field( ..., ge=1.0, description="Geometric standard deviation of radii (>= 1.0)." )
[docs] tol_ov: float = Field(..., gt=0.0, description="Overlap tolerance.")
[docs] n_subcl_percentage: float = Field( ..., gt=0.0, le=0.5, description="Target fraction for PCA subcluster size." )
[docs] ext_case: int = Field( ..., ge=0, le=1, description="CCA sticking ext_case (0 or 1)." )
[docs] seed: int | None = Field(None, description="Random seed used for generation.")
# Add other tunable parameters from config if needed
[docs] model_config = ConfigDict(extra="allow")
[docs] class AggregateProperties(BaseModel): """Calculated properties of the final generated aggregate.""" N_particles_actual: int = Field( ..., description="Actual number of particles in the final aggregate." ) radius_of_gyration: float | None = Field( None, description="Calculated radius of gyration (mass weighted)." ) center_of_mass: list[float] | None = Field( None, description="Calculated center of mass [X, Y, Z]." )
# Add r_max etc. if calculated and needed
[docs] class GenerationInfo(BaseModel): """Information about the generation process.""" script_name: str = "PyFracVAL" timestamp: datetime = Field( default_factory=datetime.now, description="Timestamp of generation completion." ) iteration: int = Field(..., description="Aggregate iteration number.")
# Add git commit hash, hostname, execution time?
[docs] class Metadata(BaseModel): """Complete output model including parameters, properties, and generation info. Designed for easy serialization (e.g., to YAML in header). Attributes ---------- generation_info : GenerationInfo Information about the run environment and time. simulation_parameters : SimulationParameters The input parameters used for this simulation run. aggregate_properties : AggregateProperties | None Calculated properties of the final aggregate (None if calculation failed). """
[docs] generation_info: GenerationInfo
[docs] simulation_parameters: SimulationParameters
[docs] aggregate_properties: AggregateProperties | None = ( None # Calculated after generation )
[docs] model_config = ConfigDict( json_encoders={ datetime: lambda v: v.isoformat(), # Add other encoders if needed (e.g., for NumPy types if stored directly) } # Consider adding validate_assignment = True if you want validation on attribute changes # validate_assignment = True )
[docs] def to_dict(self) -> dict[str, Any]: """Convert the metadata model to a dictionary. Suitable for YAML/JSON serialization. Uses Pydantic's `model_dump`. Returns ------- dict[str, Any] A dictionary representation of the metadata. """ # mode='json' uses encoders like datetime -> isoformat str return self.model_dump(mode="json", exclude_none=True)
[docs] def to_yaml_header(self) -> str: """Generate a commented YAML header string for file output. Serializes the metadata to a multi-line YAML string where each line is prefixed with '# '. Returns ------- str The formatted YAML header string. """ metadata_dict = self.to_dict() # Add comments dynamically if needed for clarity within YAML # metadata_dict['simulation_parameters']['N'] = f"{metadata_dict['simulation_parameters']['N']} # Target N" # Example yaml_string = yaml.dump( metadata_dict, sort_keys=False, default_flow_style=False, indent=2, # width=80, allow_unicode=True, ) # Prepend comment marker to each line header_lines = [f"# {line}\n" for line in yaml_string.splitlines()] header_string = "".join(header_lines) return header_string
[docs] def save_to_file( self, folderpath: str | Path, coords: np.ndarray, radii: np.ndarray ): """Save metadata (as YAML header) and numerical data to a file. Constructs a filename based on simulation parameters and timestamp. Writes the YAML header followed by the coordinate and radius data formatted as space-delimited columns. Parameters ---------- folderpath : str | Path The directory where the output file will be saved. coords : np.ndarray Nx3 NumPy array of final particle coordinates. radii : np.ndarray N NumPy array of final particle radii. Raises ------ IOError If writing to the file fails. """ n_str = f"{self.simulation_parameters.N}" df_str = f"{self.simulation_parameters.Df:.2f}".replace(".", "p") kf_str = f"{self.simulation_parameters.kf:.2f}".replace(".", "p") rpg_str = f"{self.simulation_parameters.rp_g:.1f}".replace(".", "p") rpgstd_str = f"{self.simulation_parameters.rp_gstd:.2f}".replace(".", "p") seed_str = f"{self.simulation_parameters.seed}" # Use N_A if no seed agg_str = f"{self.generation_info.iteration}" timestamp = time.strftime("%Y%m%d-%H%M%S") filepath = Path(folderpath) filepath.mkdir(parents=True, exist_ok=True) filepath /= ( "fracval_" + "_".join( [ f"N{n_str}", f"Df{df_str}", f"kf{kf_str}", f"rpg{rpg_str}", f"rpgstd{rpgstd_str}", # f"seed{seed_str}", f"agg{agg_str}", f"{timestamp}", ] ) + ".dat" ) header_string = self.to_yaml_header() data_to_save = np.hstack((coords, radii.reshape(-1, 1))) with open(filepath, "w", encoding="utf-8") as f: f.write(header_string) np.savetxt(f, data_to_save, fmt="%18.10e", delimiter=" ") logger.info("Successfully saved aggregate data and metadata to") logger.info(f" Folder: {filepath.parent}") logger.info(f" Filename: {filepath.name}")
@classmethod
[docs] def from_file(cls, filepath: str | Path) -> tuple[Self, np.ndarray]: """Load metadata and data from a FracVAL output file. Parses the commented YAML header to reconstruct the Metadata object and loads the subsequent numerical data into a NumPy array. Parameters ---------- filepath : str | Path Path to the FracVAL `.dat` file. Returns ------- tuple[Metadata | None, np.ndarray | None] A tuple containing: - The loaded Metadata object, or None if the header is missing, invalid, or fails validation. - The loaded Nx4 NumPy data array [X, Y, Z, R], or None if data loading fails or the data is invalid. Raises ------ FileNotFoundError If the specified `filepath` does not exist. Exception If YAML parsing fails or loaded data has unexpected dimensions. """ filepath = Path(filepath) yaml_lines = [] data_lines = [] if not filepath.is_file(): raise FileNotFoundError( f"Metadata load failed: File not found - {filepath}" ) with open(filepath, "r", encoding="utf-8") as f: # Read header lines starting with '#' for line in f: if line.startswith("#"): # Remove the comment marker with whitespace yaml_lines.append(line[2:]) else: # First non-comment line is data data_lines.append(line) data_lines.extend(f) # Add rest of file break # Try parsing the extracted YAML if yaml_lines: yaml_string = "".join(yaml_lines) metadata_dict = yaml.safe_load(yaml_string) if not isinstance(metadata_dict, dict): raise Exception( f"Parsed YAML header in {filepath.name} is not a dictionary." ) else: logger.warning(f"No commented header lines found in {filepath.name}") # Try parsing the numerical data data_array = np.loadtxt(filepath) if data_array.ndim == 0: raise Exception(f"Loaded numerical data is scalar in {filepath.name}.") elif data_array.ndim == 1 and data_array.shape[0] == 4: data_array = data_array.reshape(1, 4) elif data_array.ndim != 2 or data_array.shape[1] != 4: logger.warning( f"Loaded data array has unexpected shape {data_array.shape} from {filepath.name}. Expected Nx4." ) # Validate and create Metadata model instance *if* metadata was loaded metadata_instance = cls(**metadata_dict) logger.debug(f"Successfully validated metadata from: {filepath.name}") # Return None, None only if file read failed completely at the start return metadata_instance, data_array