diff --git a/pyproject.toml b/pyproject.toml index 4b58ce7..98c53a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ sources = ["src"] # testing [tool.pytest.ini_options] -minversion = "6.0" +minversion = "7.0" pythonpath = "src" addopts = [ "--import-mode=importlib", @@ -96,6 +96,34 @@ testpaths = [ [tool.coverage.run] source = ["src"] +omit = [ + "*/example_fixtures/*", + ] + +[tool.coverage.report] +# Regexes for lines to exclude from consideration +exclude_also = [ + # Don't complain about missing debug-only code: + "def __repr__", + "if self\\.debug", + + # Don't complain if tests don't hit defensive assertion code: + "raise AssertionError", + "raise NotImplementedError", + + # Don't complain if non-runnable code isn't run: + "if 0:", + "if __name__ == .__main__.:", + + # Don't complain about abstract methods, they aren't run: + "@(abc\\.)?abstractmethod", + ] + +ignore_errors = true + +[tool.coverage.html] +directory = "coverage_html_report" + [tool.bumpversion] current_version = "0.8.0" @@ -117,16 +145,6 @@ filename= "README.md" search = "The current version is v{current_version}" replace = "The current version is v{new_version}" -# [[bumpversion.files]] -# filename= "src/raman_fitting/__init__.py" -# search = "__version__ = '{current_version}'" -# replace = "__version__ = '{new_version}'" - -# [[bumpversion.files]] -# filename= "src/raman_fitting/_version.py" -# search = "version = '{current_version}'" -# replace = "version = '{new_version}'" - [[tool.bumpversion.files]] filename= "pyproject.toml" search = "current_version = '{current_version}'" diff --git a/src/raman_fitting/MANIFEST.in b/src/raman_fitting/MANIFEST.in deleted file mode 100644 index 4c11fe7..0000000 --- a/src/raman_fitting/MANIFEST.in +++ /dev/null @@ -1,15 +0,0 @@ -include *.txt -include *.md -include *.cfg -include *.pkl - -recursive-include ./raman_fitting/* - -include raman_fitting/datafiles/example_files/*.txt - - -include ./requirements.txt -exclude *.log - -recursive-exclude * __pycache__ -recursive-exclude * *.py[co] diff --git a/src/raman_fitting/delegating/main_delegator.py b/src/raman_fitting/delegating/main_delegator.py index 8d5f7d6..55e3cc2 100644 --- a/src/raman_fitting/delegating/main_delegator.py +++ b/src/raman_fitting/delegating/main_delegator.py @@ -1,7 +1,6 @@ # pylint: disable=W0614,W0401,W0611,W0622,C0103,E0401,E0402 from dataclasses import dataclass, field from typing import Dict, List, Sequence, Any -from typing import TypeAlias from raman_fitting.config.path_settings import ( @@ -16,8 +15,6 @@ from raman_fitting.models.deconvolution.base_model import ( get_models_and_peaks_from_definitions, ) -from raman_fitting.models.spectrum import SpectrumData -from raman_fitting.models.fit_models import SpectrumFitModel from raman_fitting.models.splitter import RegionNames from raman_fitting.exports.exporter import ExportManager from raman_fitting.imports.files.file_indexer import ( @@ -34,12 +31,11 @@ from raman_fitting.delegating.pre_processing import ( prepare_aggregated_spectrum_from_files, ) - -from loguru import logger +from raman_fitting.types import LMFitModelCollection +from .run_fit_spectrum import run_fit_over_selected_models -LMFitModelCollection: TypeAlias = Dict[str, Dict[str, BaseLMFitModel]] -SpectrumFitModelCollection: TypeAlias = Dict[str, Dict[str, SpectrumFitModel]] +from loguru import logger @dataclass @@ -53,6 +49,7 @@ class MainDelegator: """ run_mode: RunModes + use_multiprocessing: bool = False lmfit_models: LMFitModelCollection = field( default_factory=get_models_and_peaks_from_definitions ) @@ -142,6 +139,7 @@ def main_run(self): logger.info("No fit models were selected.") results = {} + for group_name, grp in groupby_sample_group(selection): results[group_name] = {} for sample_id, sample_grp in groupby_sample_id(grp): @@ -162,13 +160,17 @@ def main_run(self): _error_msg = f"Handle multiple source files for a single position on a sample, {group_name} {sample_id}" results[group_name][sample_id]["errors"] = _error_msg logger.debug(_error_msg) - model_result = run_fit_over_selected_models(sgrp, self.selected_models) + model_result = run_fit_over_selected_models( + sgrp, + self.selected_models, + use_multiprocessing=self.use_multiprocessing, + ) results[group_name][sample_id]["fit_results"] = model_result self.results = results -def run_fit_over_selected_models( - raman_files: List[RamanFileInfo], models: LMFitModelCollection +def get_results_over_selected_models( + raman_files: List[RamanFileInfo], models: LMFitModelCollection, fit_model_results ) -> Dict[RegionNames, AggregatedSampleSpectrumFitResult]: results = {} for region_name, region_grp in models.items(): @@ -177,12 +179,6 @@ def run_fit_over_selected_models( ) if aggregated_spectrum is None: continue - fit_model_results = {} - for model_name, model in region_grp.items(): - spectrum_fit = run_sample_fit_with_model( - aggregated_spectrum.spectrum, model - ) - fit_model_results[model_name] = spectrum_fit fit_region_results = AggregatedSampleSpectrumFitResult( region_name=region_name, aggregated_spectrum=aggregated_spectrum, @@ -192,21 +188,6 @@ def run_fit_over_selected_models( return results -def run_sample_fit_with_model( - spectrum: SpectrumData, model: BaseLMFitModel -) -> SpectrumFitModel: - name = model.name - region = model.region_name.name - spec_fit = SpectrumFitModel(spectrum=spectrum, model=model, region=region) - # include optional https://lmfit.github.io/lmfit-py/model.html#saving-and-loading-modelresults - spec_fit.run_fit() - logger.debug( - f"Fit with model {name} on {region} success: {spec_fit.fit_result.success} in {spec_fit.elapsed_time:.2f}s." - ) - # spec_fit.fit_result.plot(show_init=True) - return spec_fit - - def make_examples(): # breakpoint() _main_run = MainDelegator( diff --git a/src/raman_fitting/imports/spectrum/datafile_parsers.py b/src/raman_fitting/imports/spectrum/datafile_parsers.py index ec8da5f..2218149 100644 --- a/src/raman_fitting/imports/spectrum/datafile_parsers.py +++ b/src/raman_fitting/imports/spectrum/datafile_parsers.py @@ -1,8 +1,7 @@ -from typing import List, Sequence +from typing import Sequence from pathlib import Path import numpy as np -import pandas as pd from tablib import Dataset from loguru import logger @@ -84,27 +83,3 @@ def use_np_loadtxt(filepath, usecols=(0, 1), **kwargs) -> np.array: logger.error(_msg) raise ValueError(_msg) from exc return array - - -def cast_array_into_spectrum_frame(array, keys: List[str] = None) -> pd.DataFrame: - """cast array into spectrum frame""" - if array.ndim != len(keys): - raise ValueError( - f"Array dimension {array.ndim} does not match the number of keys {len(keys)}" - ) - - try: - spectrum_data = pd.DataFrame(array, columns=keys) - return spectrum_data - except Exception as exc: - _msg = f"Can not create DataFrame from array object: {array}\n{exc}" - logger.error(_msg) - raise ValueError(_msg) from exc - - -def load_spectrum_from_txt(filepath, **kwargs) -> pd.DataFrame: - """load spectrum from txt file""" - keys = kwargs.pop("keys") - array = use_np_loadtxt(filepath, **kwargs) - spectrum_data = cast_array_into_spectrum_frame(array, keys=keys) - return spectrum_data diff --git a/src/raman_fitting/imports/spectrumdata_parser.py b/src/raman_fitting/imports/spectrumdata_parser.py index 527681e..6a89e6a 100644 --- a/src/raman_fitting/imports/spectrumdata_parser.py +++ b/src/raman_fitting/imports/spectrumdata_parser.py @@ -38,7 +38,7 @@ } -def get_file_parser(filepath: Path) -> Callable[Path, Dataset]: +def get_file_parser(filepath: Path) -> Callable[[Path], Dataset]: "Get callable file parser function." suffix = filepath.suffix parser = SPECTRUM_FILETYPE_PARSERS[suffix]["method"] @@ -49,9 +49,9 @@ def get_file_parser(filepath: Path) -> Callable[Path, Dataset]: @dataclass class SpectrumReader: """ - Reads a clean spectrum from a file Path or str + Reads a spectrum from a 'raw' data file Path or str - with columns "ramanshift" and "intensity". + with spectrum_data_keys "ramanshift" and "intensity". Double checks the values Sets a hash attribute afterwards """ @@ -65,14 +65,14 @@ class SpectrumReader: spectrum_hash: str = field(default=None, repr=False) spectrum_length: int = field(default=0, init=False) - def __post_init__(self, **kwargs): + def __post_init__(self): super().__init__() self.filepath = validate_filepath(self.filepath) self.spectrum_length = 0 if self.filepath is None: - return + raise ValueError(f"File is not valid. {self.filepath}") parser = get_file_parser(self.filepath) parsed_spectrum = parser(self.filepath, self.spectrum_data_keys) if parsed_spectrum is None: @@ -86,7 +86,11 @@ def __post_init__(self, **kwargs): logger.warning( f"The values of {spectrum_key} of this spectrum are invalid. {validator}" ) - spec_init = {"label": self.label, "region_name": self.region_name} + spec_init = { + "label": self.label, + "region_name": self.region_name, + "source": self.filepath, + } _parsed_spec_dict = { k: parsed_spectrum[k] for k in spectrum_keys_expected_values.keys() } diff --git a/src/raman_fitting/models/deconvolution/base_peak.py b/src/raman_fitting/models/deconvolution/base_peak.py index 76b3c4c..40d2384 100644 --- a/src/raman_fitting/models/deconvolution/base_peak.py +++ b/src/raman_fitting/models/deconvolution/base_peak.py @@ -18,6 +18,7 @@ parmeter_to_dict, ) from raman_fitting.config.default_models import load_config_from_toml_files +from raman_fitting.utils.string_operations import prepare_text_from_param ParamHintDict = Dict[str, Dict[str, Optional[float | bool | str]]] @@ -197,19 +198,8 @@ def __str__(self): def make_string_from_param_hints(param_hints: Parameters) -> str: - text = "" param_center = param_hints.get("center", {}) - if param_center: - center_txt = "" - center_val = param_center.value - center_min = param_center.min - if center_min != center_val: - center_txt += f"{center_min} < " - center_txt += f"{center_val}" - center_max = param_center.max - if center_max != center_val: - center_txt += f" > {center_max}" - text += f", center : {center_txt}" + text = prepare_text_from_param(param_center) return text diff --git a/src/raman_fitting/models/deconvolution/lmfit_parameter.py b/src/raman_fitting/models/deconvolution/lmfit_parameter.py index eab314c..a7636c0 100644 --- a/src/raman_fitting/models/deconvolution/lmfit_parameter.py +++ b/src/raman_fitting/models/deconvolution/lmfit_parameter.py @@ -1,7 +1,7 @@ -from logging import warn import math from enum import StrEnum from typing import List, Optional, Dict +from warnings import warn from lmfit import Parameter from lmfit.models import GaussianModel, LorentzianModel, Model, VoigtModel diff --git a/src/raman_fitting/models/spectrum.py b/src/raman_fitting/models/spectrum.py index 47d3980..0c2d804 100644 --- a/src/raman_fitting/models/spectrum.py +++ b/src/raman_fitting/models/spectrum.py @@ -16,7 +16,7 @@ class SpectrumData(BaseModel): intensity: pnd.Np1DArrayFp32 = Field(repr=False) label: str region_name: str | None = None - source: Sequence[str] | None = None + source: FilePath | Sequence[FilePath] | str | Sequence[str] | None = None @model_validator(mode="after") def validate_equal_length(self): diff --git a/src/raman_fitting/models/splitter.py b/src/raman_fitting/models/splitter.py index 863c7ed..a2a080e 100644 --- a/src/raman_fitting/models/splitter.py +++ b/src/raman_fitting/models/splitter.py @@ -29,6 +29,7 @@ def process_spectrum(self) -> "SplitSpectrum": self.spectrum.intensity, spec_region_limits=self.region_limits, label=self.spectrum.label, + source=self.spectrum.source, ) self.spec_regions = spec_regions return self @@ -56,7 +57,11 @@ def get_default_spectrum_region_limits( def split_spectrum_data_in_regions( - ramanshift: np.array, intensity: np.array, spec_region_limits=None, label=None + ramanshift: np.array, + intensity: np.array, + spec_region_limits=None, + label=None, + source=None, ) -> Dict[str, SpectrumData]: """ For splitting of spectra into the several SpectrumRegionLimits, @@ -78,6 +83,7 @@ def split_spectrum_data_in_regions( "intensity": intensity[ind], "label": region_lbl, "region_name": region_name, + "source": source, } spec_regions[region_lbl] = SpectrumData(**_data) return spec_regions diff --git a/src/raman_fitting/processing/baseline_subtraction.py b/src/raman_fitting/processing/baseline_subtraction.py index 241060b..258bf4f 100644 --- a/src/raman_fitting/processing/baseline_subtraction.py +++ b/src/raman_fitting/processing/baseline_subtraction.py @@ -40,7 +40,7 @@ def subtract_baseline_per_region(spec: SpectrumData, split_spectrum: SplitSpectr def subtract_baseline_from_split_spectrum( - split_spectrum: SplitSpectrum, label=None + split_spectrum: SplitSpectrum = None, label=None ) -> SplitSpectrum: _bl_spec_regions = {} _info = {} @@ -53,7 +53,8 @@ def subtract_baseline_from_split_spectrum( "ramanshift": spec.ramanshift, "intensity": blcorr_int, "label": new_label, - "regionn_name": region_name, + "region_name": region_name, + "source": spec.source, } ) _bl_spec_regions.update(**{region_name: spec}) @@ -62,15 +63,3 @@ def subtract_baseline_from_split_spectrum( update={"spec_regions": _bl_spec_regions, "info": _info} ) return bl_corrected_spectra - - -def subtract_baseline( - ramanshift: np.array, intensity: np.array, label: str = None -) -> SplitSpectrum: - "Subtract the a baseline of background intensity of a spectrum." - spectrum = SpectrumData(ramanshift=ramanshift, intensity=intensity, label=label) - split_spectrum = SplitSpectrum(spectrum=spectrum) - blcorrected_spectrum = subtract_baseline_from_split_spectrum( - split_spectrum, label=label - ) - return blcorrected_spectrum diff --git a/src/raman_fitting/processing/despike.py b/src/raman_fitting/processing/despike.py index 8c7c084..e563bf1 100644 --- a/src/raman_fitting/processing/despike.py +++ b/src/raman_fitting/processing/despike.py @@ -23,7 +23,7 @@ class SpectrumDespiker(BaseModel): moving_region_size: int = 1 ignore_lims: Tuple[int, int] = (20, 46) info: Dict = Field(default_factory=dict) - despiked_spectrum: SpectrumData = Field(None) + processed_spectrum: SpectrumData = Field(None) @model_validator(mode="after") def process_spectrum(self) -> "SpectrumDespiker": @@ -36,7 +36,7 @@ def process_spectrum(self) -> "SpectrumDespiker": update={"intensity": despiked_intensity}, deep=True ) SpectrumData.model_validate(despiked_spec, from_attributes=True) - self.despiked_spectrum = despiked_spec + self.processed_spectrum = despiked_spec self.info.update(**result_info) return self diff --git a/src/raman_fitting/processing/filter.py b/src/raman_fitting/processing/filter.py index 1a78d01..041f3b7 100644 --- a/src/raman_fitting/processing/filter.py +++ b/src/raman_fitting/processing/filter.py @@ -40,7 +40,7 @@ def process_intensity(self, intensity: np.ndarray) -> np.ndarray: def filter_spectrum( - spectrum: SpectrumData, filter_name="savgol_filter" + spectrum: SpectrumData = None, filter_name="savgol_filter" ) -> SpectrumData: if filter_name not in available_filters: raise ValueError(f"Chosen filter {filter_name} not available.") diff --git a/src/raman_fitting/processing/normalization.py b/src/raman_fitting/processing/normalization.py index 5b39ec0..9e2ab17 100644 --- a/src/raman_fitting/processing/normalization.py +++ b/src/raman_fitting/processing/normalization.py @@ -50,6 +50,7 @@ def normalize_regions_in_split_spectrum( "intensity": spec.intensity * norm_factor, "label": norm_label, "region_name": region_name, + "source": spec.source, } ) norm_spec_regions.update(**{region_name: _data}) @@ -61,7 +62,7 @@ def normalize_regions_in_split_spectrum( def normalize_split_spectrum( - split_spectrum: SplitSpectrum, + split_spectrum: SplitSpectrum = None, ) -> SplitSpectrum: "Normalize the spectrum intensity according to normalization method." normalization_factor = get_normalization_factor(split_spectrum) diff --git a/src/raman_fitting/processing/post_processing.py b/src/raman_fitting/processing/post_processing.py index bdd036d..c0570a5 100644 --- a/src/raman_fitting/processing/post_processing.py +++ b/src/raman_fitting/processing/post_processing.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -import logging from typing import Protocol from raman_fitting.models.spectrum import SpectrumData @@ -10,39 +9,44 @@ from ..models.splitter import SplitSpectrum from .normalization import normalize_split_spectrum -logger = logging.getLogger(__name__) - -POST_PROCESS_KWARGS = {"filter_name": "savgol_"} +class PreProcessor(Protocol): + def process_spectrum(self, spectrum: SpectrumData = None): ... class PostProcessor(Protocol): - def process_spectrum(self, spectrum: SpectrumData): ... + def process_spectrum(self, split_spectrum: SplitSpectrum = None): ... @dataclass class SpectrumProcessor: spectrum: SpectrumData processed: bool = False + clean_spectrum: SplitSpectrum | None = None def __post_init__(self): processed_spectrum = self.process_spectrum() - self.processed_spectrum = processed_spectrum self.clean_spectrum = processed_spectrum self.processed = True def process_spectrum(self) -> SplitSpectrum: - pre_processed_spectrum = self.pre_process_intensity() - post_processed_spectra = self.post_process_spectrum(pre_processed_spectrum) + pre_processed_spectrum = self.pre_process_intensity(spectrum=self.spectrum) + post_processed_spectra = self.post_process_spectrum( + spectrum=pre_processed_spectrum + ) return post_processed_spectra - def pre_process_intensity(self) -> SpectrumData: - filtered_spectrum = filter_spectrum(self.spectrum) - despiker = SpectrumDespiker(**{"spectrum": filtered_spectrum}) - return despiker.despiked_spectrum + def pre_process_intensity(self, spectrum: SpectrumData = None) -> SpectrumData: + filtered_spectrum = filter_spectrum(spectrum=spectrum) + despiker = SpectrumDespiker(spectrum=filtered_spectrum) + return despiker.processed_spectrum - def post_process_spectrum(self, spectrum: SpectrumData) -> SplitSpectrum: + def post_process_spectrum(self, spectrum: SpectrumData = None) -> SplitSpectrum: split_spectrum = SplitSpectrum(spectrum=spectrum) - baseline_subtracted = subtract_baseline_from_split_spectrum(split_spectrum) - normalized_spectra = normalize_split_spectrum(baseline_subtracted) + baseline_subtracted = subtract_baseline_from_split_spectrum( + split_spectrum=split_spectrum + ) + normalized_spectra = normalize_split_spectrum( + split_spectrum=baseline_subtracted + ) return normalized_spectra diff --git a/src/raman_fitting/types.py b/src/raman_fitting/types.py new file mode 100644 index 0000000..e077616 --- /dev/null +++ b/src/raman_fitting/types.py @@ -0,0 +1,7 @@ +from typing import TypeAlias, Dict + +from raman_fitting.models.deconvolution.base_model import BaseLMFitModel +from raman_fitting.models.fit_models import SpectrumFitModel + +LMFitModelCollection: TypeAlias = Dict[str, Dict[str, BaseLMFitModel]] +SpectrumFitModelCollection: TypeAlias = Dict[str, Dict[str, SpectrumFitModel]] diff --git a/src/raman_fitting/utils/string_operations.py b/src/raman_fitting/utils/string_operations.py index 98ce19f..754c06b 100644 --- a/src/raman_fitting/utils/string_operations.py +++ b/src/raman_fitting/utils/string_operations.py @@ -1,6 +1,26 @@ +from lmfit.parameter import Parameter + + def join_prefix_suffix(prefix: str, suffix: str) -> str: prefix_ = prefix.rstrip("_") suffix_ = suffix.lstrip("_") if prefix.endswith(suffix_): return prefix_ return f"{prefix_}_{suffix_}" + + +def prepare_text_from_param(param: Parameter) -> str: + text = "" + if not param: + return text + _ptext = "" + _val = param.value + _min = param.min + if _min != _val: + _ptext += f"{_min} < " + _ptext += f"{_val}" + _max = param.max + if _max != _val: + _ptext += f" > {_max}" + text += f", center : {_ptext}" + return text diff --git a/tests/delegating/test_main_delegator.py b/tests/delegating/test_main_delegator.py index bf9041a..5b4369b 100644 --- a/tests/delegating/test_main_delegator.py +++ b/tests/delegating/test_main_delegator.py @@ -23,7 +23,5 @@ def test_delegator_index(delegator): assert len(delegator.index.raman_files) == len(selection) -@pytest.mark.skip(reason="enable main_run before release.") -def test_main_run(): - delegator.main_run() +def test_main_run(delegator): assert delegator.results diff --git a/tests/processing/test_spectrum_constructor.py b/tests/processing/test_spectrum_constructor.py index b178592..ee33ce0 100644 --- a/tests/processing/test_spectrum_constructor.py +++ b/tests/processing/test_spectrum_constructor.py @@ -1,18 +1,18 @@ -from raman_fitting.imports.spectrum.spectrum_constructor import ( - SpectrumDataLoader, -) +import pytest + +from raman_fitting.imports.spectrumdata_parser import SpectrumReader +from raman_fitting.models.deconvolution.spectrum_regions import RegionNames def test_spectrum_data_loader_empty(): - spd = SpectrumDataLoader("empty.txt") - assert spd.file == "empty.txt" - assert spd.clean_spectrum is None + with pytest.raises(ValueError): + SpectrumReader("empty.txt") def test_spectrum_data_loader_file(example_files): for file in example_files: - spd = SpectrumDataLoader( - file, run_kwargs=dict(sample_id=file.stem, sample_pos=1) - ) - assert len(spd.clean_spectrum.spectrum) == 1600 - assert len(spd.clean_spectrum.spec_regions) >= 5 + sprdr = SpectrumReader(file) + assert len(sprdr.spectrum.intensity) == 1600 + assert len(sprdr.spectrum.ramanshift) == 1600 + assert sprdr.spectrum.source == file + assert sprdr.spectrum.region_name == RegionNames.full diff --git a/tests/test_fixtures/__init__.py b/tests/test_fixtures/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py deleted file mode 100644 index 792d600..0000000 --- a/tests/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#