diff --git a/.github/workflows/build-test-codecov.yml b/.github/workflows/build-test-codecov.yml index 6102852..b4695fd 100644 --- a/.github/workflows/build-test-codecov.yml +++ b/.github/workflows/build-test-codecov.yml @@ -17,7 +17,7 @@ jobs: fail-fast: true matrix: os: [ ubuntu-latest, macos-latest, windows-latest ] - python-version: [3.8, 3.9, "3.10", 3.11] + python-version: [3.11] steps: @@ -32,8 +32,7 @@ jobs: run: | python3 -m pip install -U pip python3 -m pip install -U build - python3 -m pip install flake8 pytest - python3 -m pip install -r requirements.txt + python3 -m pip install flake8 - name: Lint with flake8 run: | @@ -46,11 +45,7 @@ jobs: run: | python3 -m build # install the package in editable mode for the coverage report - python3 -m pip install -e . - - - name: Install dependencies for coverage report - run: | - python3 -m pip install pytest pytest-cov coverage + python3 -m pip install -e .["pytest"] - name: Generate coverage report run: | @@ -58,6 +53,8 @@ jobs: - name: Upload Coverage to Codecov uses: codecov/codecov-action@v1 - - name: raman_fitting -M make_examples + - name: raman_fitting run examples + + run: | - raman_fitting -M make_examples + raman_fitting run examples diff --git a/.github/workflows/test-release-candidate.yaml b/.github/workflows/test-release-candidate.yaml index c9bcb9e..c1b4963 100644 --- a/.github/workflows/test-release-candidate.yaml +++ b/.github/workflows/test-release-candidate.yaml @@ -53,8 +53,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] - # windows-latest !=> gives error in 'if' statetement - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.11"] env: OS: ${{ matrix.os }} PYTHON: ${{ matrix.python-version }} @@ -71,10 +70,8 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 - python -m pip install pytest pytest-cov coverage python -m pip install -U build - python -m pip install -r requirements.txt - python -m pip install --editable .[test] + python -m pip install --editable .["pytest"] - name: Sanity check with flake8 run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 023aad1..a58889b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ # Temporary disabling hooks: SKIP=flake8 git commit -m "foo" repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-added-large-files name: Check for files larger than 5 MB @@ -13,23 +13,16 @@ repos: exclude: '\.Rd' - id: trailing-whitespace name: Check for trailing whitespaces (auto-fixes) - # - repo: https://github.com/pycqa/isort # raises an error disabled for now - # rev: 5.11.2 - # hooks: - # - id: isort - # name: isort (python) - - repo: https://github.com/psf/black - rev: 23.3.0 # Replace by any tag/version: https://github.com/psf/black/tags + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.3.2 hooks: - - id: black - name: black - consistent Python code formatting (auto-fixes) - language_version: python # Should be a command that runs python3.6+ - # - repo: https://github.com/pycqa/flake8 - # rev: 6.0.0 - # hooks: - # - id: flake8 - # name: flake8 - Python linting + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format - repo: https://github.com/gitleaks/gitleaks - rev: v8.16.1 + rev: v8.18.2 hooks: - id: gitleaks diff --git a/Dockerfile b/Dockerfile index 6aa9095..f0b78df 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,16 @@ # set base image (host OS) FROM python:3.11 +RUN addgroup -S nonroot \ + && adduser -S nonroot -G nonroot + +USER nonroot + # set the working directory in the container WORKDIR /code # copy the dependencies file to the working directory -COPY . . +COPY ./raman-fitting ./raman-fitting # copy setup.cfg to work dir # COPY setup.cfg . @@ -25,5 +30,4 @@ RUN pip install -e ./ #COPY src/ . # command to run on container start -CMD [ "raman_fitting -M make_examples" ] -# CMD [ "python", "./raman_fitting/docker/run_make_examples.py" ] +CMD [ "raman_fitting run examples" ] diff --git a/README.md b/README.md index 41c263f..1462471 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ In batch processing mode this package will index the raman data files in a chosen folder. First, it will try to extract a sample ID and position number from the filenames and create an index of the files in a dataframe. Over this index a preprocessing, fitting and exporting loop will start. -There are several models, each with a different combination of typical peaks, used for fitting. Each individual typical peak is defined as a class in the deconvolution/default_peaks folder with some added literature reference in the docstring. Here, the individual peak parameter settings can also be easily adjusted for initial values, limits, shape (eg. Lorentzian, Gaussian and Voigt) or be fixed at certain initial values. +There are several models, each with a different combination of typical peaks, used for fitting. Each individual typical peak is defined as a class in the deconvolution/default_models folder with some added literature reference in the docstring. Here, the individual peak parameter settings can also be easily adjusted for initial values, limits, shape (eg. Lorentzian, Gaussian and Voigt) or be fixed at certain initial values. Export is done with plots and excel files for the spectral data and fitting parameters for further analysis. @@ -57,7 +57,7 @@ python -m pip install -e raman-fitting/ In order to test the package after installation, please try the following command in a terminal CLI. ``` bash -raman_fitting -M make_examples +raman_fitting run examples ``` or these commands in the Python interpreter or in a Jupyter Notebook. ``` python @@ -86,7 +86,7 @@ raman_fitting # If you add a lot of files, try to check if the index is properly constructed # before fitting them. -raman_fitting -M make_index +raman_fitting make index # Location of index home/.raman_fitting/datafiles/results/raman_fitting_index.csv @@ -106,7 +106,7 @@ sample2-100_3.txt => sampleID = 'sample2-100', position = 3 ``` ### Version -The current version is v0.7.0 +The current version is v0.8.0 ### Dependencies diff --git a/pyproject.toml b/pyproject.toml index 741c1d6..98c53a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,17 +10,12 @@ authors = [ ] description = "Python framework for the batch processing and deconvolution of raman spectra." readme = {file = "README.md", content-type = "text/markdown"} -long_description = {file = "README.md", content-type = "text/markdown"} keywords = ["spectroscopy", "Raman", "fitting", "deconvolution", "batch processing", "carbonaceous materials"] classifiers = [ "License :: OSI Approved :: MIT License", "Natural Language :: English", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", @@ -28,7 +23,40 @@ classifiers = [ "Topic :: Scientific/Engineering :: Physics", "Topic :: Scientific/Engineering :: Chemistry", ] -dynamic = ["version", "long_description"] +dynamic = ["version"] +dependencies = [ + "pandas~=2.1.2", + "scipy~=1.11.3", + "lmfit~=1.2.2", + "matplotlib~=3.8.0", + "numpy~=1.26.1", + "tablib~=3.5.0", + "pydantic>=2.5", + "pydantic-settings>=2.1", + "pydantic_numpy>=4.1", + "loguru>=0.7", + "typer[all]", + "mpire[dill]~=2.10.0", +] + +[project.optional-dependencies] +pytest = [ + "pytest", + "pytest-cov", + "pytest-flake8", + "mock", + "wheel" + ] +dev = [ + "isort", + "pylint", + "flake8", + "autopep8", + "pydocstyle", + "black", + "bump2version", + "raman_fitting[pytest]" + ] [project.urls] homepage = "https://pypi.org/project/raman-fitting/" @@ -36,7 +64,8 @@ repository = "https://github.com/MyPyDavid/raman-fitting.git" # documentation = "https://raman-fitting.readthedocs.io/en/latest/" [project.scripts] -raman_fitting = "raman_fitting.interfaces:main" +raman_fitting = "raman_fitting.interfaces.typer_cli:app" + [tool.hatch.version] source = "vcs" @@ -53,7 +82,7 @@ sources = ["src"] # testing [tool.pytest.ini_options] -minversion = "6.0" +minversion = "7.0" pythonpath = "src" addopts = [ "--import-mode=importlib", @@ -65,36 +94,58 @@ testpaths = [ "tests", ] +[tool.coverage.run] +source = ["src"] +omit = [ + "*/example_fixtures/*", + ] + +[tool.coverage.report] +# Regexes for lines to exclude from consideration +exclude_also = [ + # Don't complain about missing debug-only code: + "def __repr__", + "if self\\.debug", + + # Don't complain if tests don't hit defensive assertion code: + "raise AssertionError", + "raise NotImplementedError", + + # Don't complain if non-runnable code isn't run: + "if 0:", + "if __name__ == .__main__.:", + + # Don't complain about abstract methods, they aren't run: + "@(abc\\.)?abstractmethod", + ] + +ignore_errors = true + +[tool.coverage.html] +directory = "coverage_html_report" + + [tool.bumpversion] -current_version = "0.7.0" +current_version = "0.8.0" commit = true commit_args = "--no-verify" +message = "Bump version: {current_version} → {new_version}" tag = true allow_dirty = true tag_name = "{new_version}" +tag_message = "Bump version: {current_version} → {new_version}" parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(\\.dev(?P\\d+))?" serialize =[ "{major}.{minor}.{patch}.dev{dev}", "{major}.{minor}.{patch}" ] -message = "Version updated from {current_version} to {new_version}" -[[bumpversion.files]] +[[tool.bumpversion.files]] filename= "README.md" search = "The current version is v{current_version}" replace = "The current version is v{new_version}" -# [[bumpversion.files]] -# filename= "src/raman_fitting/__init__.py" -# search = "__version__ = '{current_version}'" -# replace = "__version__ = '{new_version}'" - -# [[bumpversion.files]] -# filename= "src/raman_fitting/_version.py" -# search = "version = '{current_version}'" -# replace = "version = '{new_version}'" - -[[bumpversion.files]] +[[tool.bumpversion.files]] filename= "pyproject.toml" search = "current_version = '{current_version}'" replace = "current_version = '{new_version}'" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 26aabde..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -pandas >= 1.0.0 -scipy >= 1.6.0 -lmfit >= 1.2.0 -matplotlib >= 3.5.0 -numpy >= 1.19.2 -openpyxl >= 3.1.2 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 2e115cb..0000000 --- a/setup.cfg +++ /dev/null @@ -1,91 +0,0 @@ - -[options] -python_requires = >=3.7.0 -include_package_data = True -package_dir = - = src -packages = find: -# setup_requires = setuptools_scm -tests_require = - pytest - pytest-cov - coverage[toml] - nose -install_requires = - pandas >= 1.0.0 - scipy >= 1.5.2 - lmfit >= 1.0.0 - matplotlib >= 3.1.2 - numpy >= 1.19.2 - openpyxl >= 3.0.1 - -[options.package_data] -* = *.txt - -[options.packages.find] -where = src -exclude = - build* - dist* - docs* - tests* - *.tests - tools* - - -[options.extras_require] -testing = - pytest - pytest-cov - pytest-flake8 - mock - wheel -dev = - isort - pylint - flake8 - autopep8 - pydocstyle - black - bump2version - -[check-manifest] -ignore = - .github - .github/* - - - - -[pylint] -extension-pkg-whitelist = numpy, pandas, matplotlib, lmfit - -[flake8] -ignore = - E402, - W503, - W504, - E126 -max-line-length = 140 -exclude = .tox,.eggs,ci/templates,build,dist - -[pydocstyle] -convention = numpy -match-dir = 'src\/.*' - -[yapf] -based_on_style = pep8 -dedent_closing_brackets = true -coalesce_brackets = true -split_complex_comprehension = true -split_before_dot = true - -[tool:isort] -force_single_line = False -profile = black -src_paths = ["src", "test"] -line_length = 140 -known_first_party = raman_fitting -default_section = THIRDPARTY -forced_separate = test_raman_fitting -skip = .tox,.eggs,ci/templates,build,dist diff --git a/src/raman_fitting/MANIFEST.in b/src/raman_fitting/MANIFEST.in deleted file mode 100644 index 4c11fe7..0000000 --- a/src/raman_fitting/MANIFEST.in +++ /dev/null @@ -1,15 +0,0 @@ -include *.txt -include *.md -include *.cfg -include *.pkl - -recursive-include ./raman_fitting/* - -include raman_fitting/datafiles/example_files/*.txt - - -include ./requirements.txt -exclude *.log - -recursive-exclude * __pycache__ -recursive-exclude * *.py[co] diff --git a/src/raman_fitting/__init__.py b/src/raman_fitting/__init__.py index 0d07c6a..186796f 100644 --- a/src/raman_fitting/__init__.py +++ b/src/raman_fitting/__init__.py @@ -1,7 +1,3 @@ -# pylint: disable=W0614,W0611,W0622 -# flake8: noqa -# isort:skip_file - __author__ = "David Wallace" __docformat__ = "restructuredtext" __status__ = "Development" @@ -9,6 +5,7 @@ __current_package_name__ = "raman_fitting" __package_name__ = __current_package_name__ +import importlib.util try: from ._version import __version__ @@ -22,59 +19,26 @@ __version__ = _gv(_path.join(_path.dirname(__file__), _path.pardir)) except ModuleNotFoundError: __version__ = "importerr_modulenotfound_version" - except Exception as e: + except Exception: __version__ = "importerr_exception_version" -except Exception as e: +except Exception: __version__ = "catch_exception_version" - -import logging import sys import warnings -# Configure logger for use in package -logger = logging.getLogger(__package_name__) - -log_format = ( - "[%(asctime)s] — %(name)s — %(levelname)s —" - "%(funcName)s:%(lineno)d—12s %(message)s" -) -# '[%(asctime)s] %(levelname)-8s %(name)-12s %(message)s') - -# Define basic configuration -logging.basicConfig( - # Define logging level - level=logging.DEBUG, - # Define the format of log messages - format=log_format, - # Provide the filename to store the log messages - filename=("debug.log"), -) - -formatter = logging.Formatter(log_format) -from raman_fitting.config import logging_config - -logger.addHandler(logging_config.get_console_handler()) - -# create console handler -ch = logging.StreamHandler(stream=sys.stdout) -ch.setLevel(logging.INFO) -ch.setFormatter(formatter) +from loguru import logger -# add the handlers to the logger -logger.addHandler(ch) - -# This code is written for Python 3. -if sys.version_info.major < 3 and sys.version_info.minor < 7: - logger.error(f"{__package_name__} requires Python 3.7 or higher.") +# This code is written for Python 3.11 and higher +if sys.version_info.major < 3 and sys.version_info.minor < 11: + logger.error(f"{__package_name__} requires Python 3.11 or higher.") sys.exit(1) # Let users know if they're missing any hard dependencies -hard_dependencies = ("numpy", "pandas", "scipy", "matplotlib", "lmfit") +hard_dependencies = ("numpy", "pandas", "scipy", "matplotlib", "lmfit", "pydantic") soft_dependencies = {} missing_dependencies = [] -import importlib for dependency in hard_dependencies: if not importlib.util.find_spec(dependency): @@ -90,23 +54,3 @@ ) del hard_dependencies, soft_dependencies, dependency, missing_dependencies - -# Main Loop Delegator -from raman_fitting.delegating.main_delegator import MainDelegator, make_examples - -# Indexer -from raman_fitting.indexing.indexer import MakeRamanFilesIndex as make_index - -# Processing -from raman_fitting.processing.spectrum_template import SpectrumTemplate -from raman_fitting.processing.spectrum_constructor import ( - SpectrumDataLoader, - SpectrumDataCollection, -) - -# Modelling / fitting -from raman_fitting.deconvolution_models.fit_models import InitializeModels, Fitter - -# Exporting / Plotting -from raman_fitting.exporting.exporter import Exporter -from raman_fitting.config import filepath_settings diff --git a/src/raman_fitting/config/__init__.py b/src/raman_fitting/config/__init__.py index e69de29..07f035c 100644 --- a/src/raman_fitting/config/__init__.py +++ b/src/raman_fitting/config/__init__.py @@ -0,0 +1,3 @@ +from raman_fitting.config.base_settings import Settings + +settings = Settings() \ No newline at end of file diff --git a/src/raman_fitting/config/base_settings.py b/src/raman_fitting/config/base_settings.py new file mode 100644 index 0000000..f8b6d6b --- /dev/null +++ b/src/raman_fitting/config/base_settings.py @@ -0,0 +1,48 @@ +from typing import Dict +from pathlib import Path + +from pydantic import ( + Field, +) + +from pydantic_settings import BaseSettings + +from raman_fitting.models.deconvolution.base_model import BaseLMFitModel +from raman_fitting.models.deconvolution.base_model import ( + get_models_and_peaks_from_definitions, +) +from raman_fitting.models.deconvolution.spectrum_regions import ( + get_default_regions_from_toml_files, +) +from .default_models import load_config_from_toml_files +from .path_settings import create_default_package_dir_or_ask, InternalPathSettings +from types import MappingProxyType + + +def get_default_models_and_peaks_from_definitions(): + models_and_peaks_definitions = load_config_from_toml_files() + return get_models_and_peaks_from_definitions(models_and_peaks_definitions) + + +class Settings(BaseSettings): + default_models: Dict[str, Dict[str, BaseLMFitModel]] = Field( + default_factory=get_default_models_and_peaks_from_definitions, + alias="my_default_models", + init_var=False, + validate_default=False, + ) + default_regions: Dict[str, Dict[str, float]] | None = Field( + default_factory=get_default_regions_from_toml_files, + alias="my_default_regions", + init_var=False, + validate_default=False, + ) + default_definitions: MappingProxyType | None = Field( + default_factory=load_config_from_toml_files, + alias="my_default_definitions", + init_var=False, + validate_default=False, + ) + + destination_dir: Path = Field(default_factory=create_default_package_dir_or_ask) + internal_paths: InternalPathSettings = Field(default_factory=InternalPathSettings) diff --git a/src/raman_fitting/config/constants.py b/src/raman_fitting/config/constants.py deleted file mode 100644 index 6c78a25..0000000 --- a/src/raman_fitting/config/constants.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Created on Tue Jun 22 10:36:02 2021 - -@author: DW -""" diff --git a/src/raman_fitting/config/default_models/__init__.py b/src/raman_fitting/config/default_models/__init__.py new file mode 100644 index 0000000..508bf66 --- /dev/null +++ b/src/raman_fitting/config/default_models/__init__.py @@ -0,0 +1,14 @@ +from pathlib import Path +from types import MappingProxyType +import tomllib + + +def load_config_from_toml_files() -> MappingProxyType: + current_parent_dir = Path(__file__).resolve().parent + default_peak_settings = {} + for i in current_parent_dir.glob("*.toml"): + default_peak_settings.update(tomllib.loads(i.read_bytes().decode())) + if not default_peak_settings: + raise ValueError("default models should not be empty.") + + return MappingProxyType(default_peak_settings) diff --git a/src/raman_fitting/config/default_models/first_order.toml b/src/raman_fitting/config/default_models/first_order.toml new file mode 100644 index 0000000..321d650 --- /dev/null +++ b/src/raman_fitting/config/default_models/first_order.toml @@ -0,0 +1,112 @@ +[first_order] + +[first_order.models] +1peak = "G" +2peaks = "G+D" +3peaks = "G+D+D3" +4peaks = "G+D+D3+D4" +5peaks = "G+D+D2+D3+D4" +6peaks = "G+D+D2+D3+D4+D5" + +[first_order.peaks] + +[first_order.peaks.G] +docstring = """ +Graphite belongs to the P63/mmc (D46h) space group. If considering only a graphene plane, at +the à point of the Brillouin zone, there are six normal modes that possess only one mode (doubly +degenerate in plane) with a E2g representation, which is Raman active +G ; Ideal graphitic lattice (E2g-symmetry) +G peak center stable over different laser wavelengths. +Influenced by potential, HSO4 adsorption (or ionization of G- and G+), +magnetic fields, pressure +Für G: 1580-1590 D5 und D2 weiß ich nicht +""" +peak_name = "G" +peak_type = "Lorentzian" +[first_order.peaks.G.param_hints] +center = {value = 1571, min = 1545, max = 1595} +sigma = {value = 30, min = 5, max = 150} +amplitude = {value = 100, min = 1e-05, max = 500} + + +[first_order.peaks.D] +docstring = """ +D or D1 ; Disordered graphitic lattice (graphene layer edges,A1gsymmetry) +A defective graphite presents other bands that can be as intense as the G band at D=1350 and D'=1615 cm-1 +These bands are activated by defects due to the breaking of the crystal symmetry that relax the Raman selection rules. +Für D1: 1340-1350 +""" +peak_name = "D" +peak_type = "Lorentzian" +[first_order.peaks.D.param_hints] +center = {value = 1350, min = 1330, max = 1380} +sigma = {value = 35, min = 1, max = 150} +amplitude = {value = 120, min = 1e-05, max = 500} + +[first_order.peaks.D2] +docstring = """ +D2 or D' ; Right next to the G peak, sometimes not obvious as G peak split. +Disordered graphitic lattice (surface graphene layers,E2g-symmetry) +j.molstruc.2010.12.065 +""" +peak_name = "D2" +peak_type = "Lorentzian" +[first_order.peaks.D2.param_hints] +center = {value = 1606, min = 1592, max = 1635} +sigma = {value = 30, min = 5, max = 150} +amplitude = {value = 35, min = 5, max = 500} + + +[first_order.peaks.D3] +docstring = """ +D3 or D'' or A or Am ; Between the D and G peak, sometimes too broad. +For amorphous carbon (Gaussian[26]or Lorentzian[3,18,27]line shape). +Für D3: 1495-1515 +""" +peak_name = "D3" +peak_type = "Lorentzian" +[first_order.peaks.D3.param_hints] +center = {value = 1480, min = 1450, max = 1525} +sigma = {value = 25, min = 1, max = 150} +amplitude = {value = 25, min = 1e-02, max = 500} + +[first_order.peaks.D4] +docstring = """ +D4 or I ; Below D band, a shoulder sometimes split with D5 band. +Disordered graphitic lattice (A1gsymmetry)[10],polyenes[3,27], ionic impurities +D4 peak at 1212 cm−1 +Jurkiewicz, K., Pawlyta, M., Zygadło, D. et al. J Mater Sci (2018) 53: 3509. https://doi.org/10.1007/s10853-017-1753-7 +Für D4: 1185-1210, but depends on if there is D5 or not. +""" +peak_name = "D4" +peak_type = "Lorentzian" +[first_order.peaks.D4.param_hints] +center = {value = 1230, min = 1180, max = 1310} +sigma = {value = 40, min = 1, max = 150} +amplitude = {value = 20, min = 1e-02, max = 200} + +[first_order.peaks.D5] +docstring = """ +D5 peak at 1110 cm−1. At lowest should of D peak, below D4. +Ref: Jurkiewicz, K., Pawlyta, M., Zygadło, D. et al. J Mater Sci (2018) 53: 3509. https://doi.org/10.1007/s10853-017-1753-7 +""" +peak_name = "D5" +peak_type = "Lorentzian" +[first_order.peaks.D5.param_hints] +center = {value = 1110, min = 1080, max = 1150} +sigma = {value = 40, min = 1, max = 150} +amplitude = {value = 20, min = 1e-02, max = 200} + +[first_order.peaks.Si1] +docstring = """ +===== Extra peak at ca. 960 cm-1 presumably from Si substrate 2nd order === not from Nafion... +=> Either cut the Spectra 1000-2000 +=> Place an extra Gaussian peak at 960 in the fit +""" +peak_name = "Si1" +peak_type = "Gaussian" +is_substrate = true +[first_order.peaks.Si1.param_hints] +center = {value = 960, min = 900, max = 980} +sigma = {value = 10, min = 0, max = 150} +amplitude = {value = 10, min = 0, max = 200} diff --git a/src/raman_fitting/config/default_models/normalization.toml b/src/raman_fitting/config/default_models/normalization.toml new file mode 100644 index 0000000..01dcf2e --- /dev/null +++ b/src/raman_fitting/config/default_models/normalization.toml @@ -0,0 +1,30 @@ +[normalization] + +[normalization.models] +norm = "norm_G+norm_D" + +[normalization.peaks] + +[normalization.peaks.norm_G] +docstring = """ +G_peak used for normalization +""" +peak_name = "norm_G" +peak_type = "Lorentzian" +is_for_normalization = true +[normalization.peaks.norm_G.param_hints] +center = {"value" = 1581, "min" = 1500, "max" = 1600} +sigma = {"value" = 40, "min" = 1e-05, "max" = 1e3} +amplitude = {"value" = 8e4, "min" = 1e2} + +[normalization.peaks.norm_D] +docstring = """ +D_peak for normalization +""" +peak_name = "norm_D" +peak_type = "Lorentzian" +is_for_normalization = true +[normalization.peaks.norm_D.param_hints] +center = {"value" = 1350, "min" = 1300, "max" = 1400} +sigma = {"value" = 90, "min" = 1e-05} +amplitude = {"value" = 10e5, "min" = 1e2} diff --git a/src/raman_fitting/config/default_models/second_order.toml b/src/raman_fitting/config/default_models/second_order.toml new file mode 100644 index 0000000..1697d02 --- /dev/null +++ b/src/raman_fitting/config/default_models/second_order.toml @@ -0,0 +1,38 @@ +[second_order] + +[second_order.models] +2nd_4peaks = "D4D4+D1D1+GD1+D2D2" + +[second_order.peaks] + +[second_order.peaks.D4D4] +peak_name = "D4D4" +peak_type = "Lorentzian" +[second_order.peaks.D4D4.param_hints] +center = {value = 2435, min = 2400, max = 2550} +sigma = {value = 30, min = 1, max = 200} +amplitude = {value = 2, min = 1e-03, max = 100} + +[second_order.peaks.D1D1] +peak_name = "D1D1" +peak_type = "Lorentzian" +[second_order.peaks.D1D1.param_hints] +center = {value = 2650, min = 2600, max = 2750} +sigma = {value = 60, min = 1, max = 200} +amplitude = {value = 14, min = 1e-03, max = 100} + +[second_order.peaks.GD1] +peak_name = "GD1" +peak_type = "Lorentzian" +[second_order.peaks.GD1.param_hints] +center = {value = 2900, min = 2800, max = 2950} +sigma = {value = 50, min = 1, max = 200} +amplitude = {value = 10, min = 1e-03, max = 100} + +[second_order.peaks.D2D2] +peak_type = "Lorentzian" +peak_name = "D2D2" +[second_order.peaks.D2D2.param_hints] +center = {value = 3250, min = 3000, max = 3400} +sigma = {value = 60, min = 20, max = 200} +amplitude = {value = 1, min = 1e-03, max = 100} diff --git a/src/raman_fitting/config/default_models/spectrum_regions.toml b/src/raman_fitting/config/default_models/spectrum_regions.toml new file mode 100644 index 0000000..61ad1f8 --- /dev/null +++ b/src/raman_fitting/config/default_models/spectrum_regions.toml @@ -0,0 +1,10 @@ +[spectrum] + +[spectrum.regions] +full = {"min" = 200, "max" = 3600} +full_first_and_second = {"min" = 800, "max" = 3500} +low = {"min" = 150, "max" = 850, "extra_margin" = 10} +first_order = {"min" = 900, "max" = 2000} +mid = {"min" = 1850, "max" = 2150, "extra_margin" = 10} +normalization = {"min" = 1500, "max" = 1675, "extra_margin" = 10} +second_order = {"min" = 2150, "max" = 3380} \ No newline at end of file diff --git a/src/raman_fitting/config/filepath_helper.py b/src/raman_fitting/config/filepath_helper.py index a4ff004..7a44138 100644 --- a/src/raman_fitting/config/filepath_helper.py +++ b/src/raman_fitting/config/filepath_helper.py @@ -1,140 +1,48 @@ """ this module prepares the local file paths for data and results""" -from typing import Dict -import logging -import sys -from pathlib import Path - -# from .. import __package_name__ - -logger = logging.getLogger(__name__) - -from raman_fitting.config import filepath_settings as config - -# %% - - -def get_directory_paths_for_run_mode(run_mode: str = "", **kwargs) -> Dict: - """ - Parameters - ---------- - run_mode : str, optional - this is name of the run mode. The default is ''. - **kwargs : TYPE - kwargs can contain keys such as DATASET_DIR to overwrite the standard config paths. - - Returns - ------- - dest_dirs : dict - dict containing 3 keys [RESULTS_DIR, DATASET_DIR, INDEX_FILE] - - """ - dest_dirs = {} - DATASET_DIR = None - RESULTS_DIR = None - - if run_mode in ("DEBUG", "testing"): - # self.debug = True - RESULTS_DIR = config.TESTS_RESULTS_DIR - DATASET_DIR = config.TESTS_DATASET_DIR - elif run_mode == "make_examples": - RESULTS_DIR = config.PACKAGE_HOME.joinpath("example_results") - DATASET_DIR = config.TESTS_DATASET_DIR - # self._kwargs.update({'default_selection' : 'all'}) - - elif run_mode in ("normal", "make_index"): - RESULTS_DIR = config.RESULTS_DIR - DATASET_DIR = config.DATASET_DIR - # INDEX_FILE = config.INDEX_FILE - else: - logger.warning(f"Run mode {run_mode} not recognized. Exiting...") - - INDEX_FILE = RESULTS_DIR / config.INDEX_FILE_NAME +from pathlib import Path - dest_dirs = { - "RESULTS_DIR": Path(RESULTS_DIR), - "DATASET_DIR": Path(DATASET_DIR), - "INDEX_FILE": Path(INDEX_FILE), - } +from loguru import logger - if kwargs: - dest_dirs = override_from_kwargs(dest_dirs, **kwargs) - check_and_make_dirs(dest_dirs) +def check_and_make_dirs(destdir: Path) -> None: + _destfile = None + if destdir.suffix: + _destfile = destdir + destdir = _destfile.parent - return dest_dirs + if not destdir.is_dir(): + destdir.mkdir(exist_ok=True, parents=True) + logger.info( + f"check_and_make_dirs the results directory did not exist and was created at:\n{destdir}\n" + ) + if _destfile: + _destfile.touch() -def check_and_make_dirs(dest_dirs: dict = {}): - DATASET_DIR = dest_dirs.get("DATASET_DIR", None) - if DATASET_DIR: - create_dataset_dir(DATASET_DIR) - else: - logger.warning(f"No datafiles directory was set for . Exiting...") - RESULTS_DIR = dest_dirs.get("RESULTS_DIR", None) - if RESULTS_DIR: - if not RESULTS_DIR.is_dir(): - RESULTS_DIR.mkdir(exist_ok=True, parents=True) - logger.info( - f"check_and_make_dirs the results directory did not exist and was created at:\n{RESULTS_DIR}\n" +def create_dir_or_ask_user_input(destdir: Path, ask_user=True): + counter, max_attempts = 0, 10 + while not destdir.exists() and counter < max_attempts: + answer = "y" + if ask_user: + answer = input( + f"Directory to store files raman_fitting:\n{destdir}\nCan this be folder be created? (y/n)" ) - """ returns index file path """ - + if "y" in answer.lower(): + destdir.mkdir(exist_ok=True, parents=True) -def override_from_kwargs(_dict, **kwargs): - _kwargs = kwargs - if _kwargs: - _keys = [i for i in _dict.keys() if i in _kwargs.keys()] - _new_dict = { - k: Path(val) if not _kwargs.get(k, None) else _kwargs[k] - for k, val in _dict.items() - } - if _new_dict != _dict: - logger.debug(f"override_from_kwargs keys {_keys} were overwritten") - return _new_dict - else: - return _dict - - -def create_dataset_dir(DATASET_DIR): # pragma: no cover - if not DATASET_DIR.is_dir(): - logger.warning( - f"The datafiles directory does not exist yet, the program will now try to create this folder.\n{DATASET_DIR}" - # therefore {config.__package_name__} can not find any files. - # The program will now try to create this folder. - ) - try: - DATASET_DIR.mkdir() - logger.warning( - f"""The datafiles directory has now been created at: -{DATASET_DIR} -please place your raman datafiles in this folder and run {config.__package_name__} again. -{config.__package_name__} exits now. -""" - ) - sys.exit() - # IDEA build in daemon version with folder watcher.... - except Exception as exc: - logger.warning( - f"""The datafiles directory could not be created at: -{DATASET_DIR} -An unexpected error ocurred: -{exc} -please redefine the path for dataset_dir in the config settings. -""" - ) - else: - # Check if dir is not empty else raise a warning - _diter = DATASET_DIR.iterdir() - try: - next(_diter) - except StopIteration: - logger.warning( - f"""The datafiles directory is empty: -{DATASET_DIR} -please place your files in here or -change this path in the config settings. -""" + if "y" not in answer.lower(): + new_path_user = input( + "Please provide the directory to store files raman_fitting:" ) + try: + new_path = Path(new_path_user).resolve() + except Exception as e: + print(f"Exception: {e}") + counter += 1 + destdir = new_path + + logger.info(f"Directory created: {destdir}") + return destdir diff --git a/src/raman_fitting/config/filepath_settings.py b/src/raman_fitting/config/filepath_settings.py deleted file mode 100644 index dd72cab..0000000 --- a/src/raman_fitting/config/filepath_settings.py +++ /dev/null @@ -1,62 +0,0 @@ -# from .logging_config import get_console_handler -import logging -import pathlib -from sys import exit - -from raman_fitting import __package_name__ - -logger = logging.getLogger(__name__) - - -# import pandas as pd -# pd.options.display.max_rows = 10 -# pd.options.display.max_columns = 10 -# __package_name__ = 'raman_fitting' - -CONFIG_FILE = pathlib.Path(__file__).resolve() -PACKAGE_ROOT = CONFIG_FILE.parent.parent -MODEL_DIR = PACKAGE_ROOT / "deconvolution_models" - -# TESTS_ROOT_DIR = PACKAGE_ROOT.parent.parent / "tests" -# TESTS_ROOT_DIR = -TESTS_DATASET_DIR = PACKAGE_ROOT / "datafiles" / "example_files" - -# Home dir from pathlib.Path for storing the results -PACKAGE_HOME = ( - pathlib.Path.home() / f".{__package_name__}" -) # pyramdeconv is the new version package name - - -try: - if not PACKAGE_HOME.is_dir(): - try: - logger.warning( - f"Package home directory did not exist, will now be created at:\n{PACKAGE_HOME}\n--------------------" - ) - PACKAGE_HOME.mkdir() - except Exception as exc: - logger.warning( - f"Package home mkdir unexpected error\n{exc}.\nFolder{PACKAGE_HOME} could not be created, exiting." - ) - exit() - else: - logger.info( - f"Package home directory exists at:\n{PACKAGE_HOME}\n--------------------" - ) -except Exception as exc: - logger.warning( - f"Unexpected error with checking for package home folder:\nFolder:{PACKAGE_HOME}\nError:\n{exc}\n {__package_name__} can not run." - ) - exit() - - -TESTS_RESULTS_DIR = PACKAGE_HOME / "test_results" - -DATASET_DIR = PACKAGE_HOME / "datafiles" -RESULTS_DIR = PACKAGE_HOME / "results" - -# Optional local configuration file -LOCAL_CONFIG_FILE = PACKAGE_HOME / "local_config.py" - -# Storage file of the index -INDEX_FILE_NAME = f"{__package_name__}_index.csv" diff --git a/src/raman_fitting/config/logging_config.py b/src/raman_fitting/config/logging_config.py index 05a9dc1..360bfe3 100644 --- a/src/raman_fitting/config/logging_config.py +++ b/src/raman_fitting/config/logging_config.py @@ -7,7 +7,7 @@ # it is in the same Python interpreter process. FORMATTER = logging.Formatter( - "%(asctime)s — %(name)s — %(levelname)s —" "%(funcName)s:%(lineno)d — %(message)s" + "%(asctime)s — %(name)s — %(levelname)s —%(funcName)s:%(lineno)d — %(message)s" ) diff --git a/src/raman_fitting/config/path_settings.py b/src/raman_fitting/config/path_settings.py new file mode 100644 index 0000000..6f3c18a --- /dev/null +++ b/src/raman_fitting/config/path_settings.py @@ -0,0 +1,148 @@ +from pathlib import Path +import tempfile +from enum import StrEnum, auto + + +from pydantic import ( + BaseModel, + DirectoryPath, + FilePath, + ConfigDict, + Field, + model_validator, +) + + +from .filepath_helper import check_and_make_dirs + + +PACKAGE_NAME = "raman_fitting" +CURRENT_FILE: Path = Path(__file__).resolve() +PACKAGE_ROOT: Path = CURRENT_FILE.parent.parent +REPO_ROOT: Path = PACKAGE_ROOT.parent +INTERNAL_DEFAULT_MODELS: Path = CURRENT_FILE.parent / "default_models" +# MODEL_DIR: Path = PACKAGE_ROOT / "deconvolution_models" +INTERNAL_EXAMPLE_FIXTURES: Path = PACKAGE_ROOT / "example_fixtures" +INTERNAL_PYTEST_FIXTURES: Path = REPO_ROOT / "tests" / "test_fixtures" + +# Home dir from pathlib.Path for storing the results +USER_HOME_PACKAGE: Path = Path.home() / PACKAGE_NAME +# pyramdeconv is the new version package name + +# Optional local configuration file +USER_LOCAL_CONFIG_FILE: Path = USER_HOME_PACKAGE / f"{PACKAGE_NAME}/toml" + +INDEX_FILE_NAME = f"{PACKAGE_NAME}_index.csv" +# Storage file of the index +USER_INDEX_FILE_PATH: Path = USER_HOME_PACKAGE / INDEX_FILE_NAME + +TEMP_DIR = Path(tempfile.mkdtemp(prefix="raman-fitting-")) +TEMP_RESULTS_DIR: Path = TEMP_DIR / "results" + +CLEAN_SPEC_REGION_NAME_PREFIX = "savgol_filter_raw_region_" + +ERROR_MSG_TEMPLATE = "{sample_group} {sampleid}: {msg}" + + +class InternalPathSettings(BaseModel): + settings_file: FilePath = Field(CURRENT_FILE) + package_root: DirectoryPath = Field(PACKAGE_ROOT) + default_models_dir: DirectoryPath = Field(INTERNAL_DEFAULT_MODELS) + example_fixtures: DirectoryPath = Field(INTERNAL_EXAMPLE_FIXTURES) + pytest_fixtures: DirectoryPath = Field(INTERNAL_PYTEST_FIXTURES) + temp_dir: DirectoryPath = Field(TEMP_RESULTS_DIR) + temp_index_file: FilePath = Field(TEMP_DIR / INDEX_FILE_NAME) + + +EXPORT_FOLDER_NAMES = { + "plots": "fitting_plots", + "components": "fitting_components", + "raw_data": "raw_data", +} + + +class RunModes(StrEnum): + NORMAL = auto() + PYTEST = auto() + EXAMPLES = auto() + DEBUG = auto() + + +def get_run_mode_paths(run_mode: RunModes, user_package_home: Path = None): + if user_package_home is None: + user_package_home = USER_HOME_PACKAGE + if isinstance(run_mode, str): + run_mode = RunModes(run_mode) + + RUN_MODE_PATHS = { + RunModes.PYTEST.name: { + "RESULTS_DIR": TEMP_RESULTS_DIR, + "DATASET_DIR": INTERNAL_EXAMPLE_FIXTURES, + "USER_CONFIG_FILE": INTERNAL_EXAMPLE_FIXTURES / f"{PACKAGE_NAME}.toml", + "INDEX_FILE": TEMP_RESULTS_DIR / f"{PACKAGE_NAME}_index.csv", + }, + RunModes.EXAMPLES.name: { + "RESULTS_DIR": user_package_home / "examples", + "DATASET_DIR": INTERNAL_EXAMPLE_FIXTURES, + "USER_CONFIG_FILE": INTERNAL_EXAMPLE_FIXTURES / f"{PACKAGE_NAME}.toml", + "INDEX_FILE": user_package_home / "examples" / f"{PACKAGE_NAME}_index.csv", + }, + RunModes.NORMAL.name: { + "RESULTS_DIR": user_package_home / "results", + "DATASET_DIR": user_package_home / "datafiles", + "USER_CONFIG_FILE": user_package_home / "raman_fitting.toml", + "INDEX_FILE": user_package_home / f"{PACKAGE_NAME}_index.csv", + }, + } + if run_mode.name not in RUN_MODE_PATHS: + raise ValueError(f"Choice of run_mode {run_mode.name} not supported.") + return RUN_MODE_PATHS[run_mode.name] + + +class ExportPathSettings(BaseModel): + results_dir: Path + plots: DirectoryPath = Field(None, validate_default=False) + components: DirectoryPath = Field(None, validate_default=False) + raw_data: DirectoryPath = Field(None, validate_default=False) + + @model_validator(mode="after") + def set_export_path_settings(self) -> "ExportPathSettings": + if not self.results_dir.is_dir(): + self.results_dir.mkdir(exist_ok=True, parents=True) + + plots: DirectoryPath = self.results_dir.joinpath(EXPORT_FOLDER_NAMES["plots"]) + self.plots = plots + components: DirectoryPath = self.results_dir.joinpath( + EXPORT_FOLDER_NAMES["components"] + ) + self.components = components + raw_data: DirectoryPath = self.results_dir.joinpath( + EXPORT_FOLDER_NAMES["raw_data"] + ) + self.raw_data = raw_data + return self + + +class RunModePaths(BaseModel): + model_config = ConfigDict(alias_generator=str.upper) + + run_mode: RunModes + results_dir: DirectoryPath + dataset_dir: DirectoryPath + user_config_file: Path + index_file: Path + + +def initialize_run_mode_paths( + run_mode: RunModes, user_package_home: Path = None +) -> RunModePaths: + run_mode_paths = get_run_mode_paths(run_mode, user_package_home=user_package_home) + + for destname, destdir in run_mode_paths.items(): + destdir = Path(destdir) + check_and_make_dirs(destdir) + return RunModePaths(RUN_MODE=run_mode, **run_mode_paths) + + +def create_default_package_dir_or_ask(): + return USER_HOME_PACKAGE diff --git a/src/raman_fitting/datafiles/__init__.py b/src/raman_fitting/datafiles/__init__.py deleted file mode 100644 index 9b0cb91..0000000 --- a/src/raman_fitting/datafiles/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Created on Mon Jun 28 10:06:40 2021 - -@author: DW -""" diff --git a/src/raman_fitting/datafiles/example_files/__init__.py b/src/raman_fitting/datafiles/example_files/__init__.py deleted file mode 100644 index f5f6efd..0000000 --- a/src/raman_fitting/datafiles/example_files/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Created on Mon Jun 28 10:06:52 2021 - -@author: DW -""" diff --git a/src/raman_fitting/deconvolution_models/base_model.py b/src/raman_fitting/deconvolution_models/base_model.py deleted file mode 100644 index 06d1324..0000000 --- a/src/raman_fitting/deconvolution_models/base_model.py +++ /dev/null @@ -1,270 +0,0 @@ -""" The members of the validated collection of BasePeaks are assembled here into fitting Models""" -import logging -from warnings import warn - -from lmfit import Model - -from raman_fitting.deconvolution_models.peak_validation import PeakModelValidator - -logger = logging.getLogger(__name__) - -_SUBSTRATE_PEAK = "Si1_peak" - - -# ====== InitializeMode======= # -class InitializeModels: - """ - This class will initialize and validate the different fitting models. - The models are of type lmfit.model.CompositeModel and stored in a dict with names - for the models as keys. - """ - - _standard_1st_order_models = { - "2peaks": "G+D", - "3peaks": "G+D+D3", - "4peaks": "G+D+D3+D4", - "5peaks": "G+D+D2+D3+D4", - "6peaks": "G+D+D2+D3+D4+D5", - } - _standard_2nd_order_models = {"2nd_4peaks": "D4D4+D1D1+GD1+D2D2"} - - def __init__(self, standard_models=True): - self._cqnm = self.__class__.__name__ - - self.peak_collection = self.get_peak_collection(PeakModelValidator) - - self.all_models = {} - self.construct_standard_models() - - def get_peak_collection(self, func): - try: - peak_collection = func() - logger.debug( - f"{self._cqnm} collection of peaks validated with {func}:\n{peak_collection}" - ) - - except Exception as e: - logger.error(f"{self._cqnm} failure in call {func}.\n\t{e}") - peak_collection = [] - return peak_collection - - def construct_standard_models(self): - _models = {} - _models_1st = { - f"1st_{key}": BaseModel( - peak_collection=self.peak_collection, model_name=value - ) - for key, value in self._standard_1st_order_models.items() - } - _models.update(_models_1st) - _models_1st_no_substrate = { - f"1st_{key}": BaseModel( - peak_collection=self.peak_collection, model_name=value - ) - for key, value in self._standard_1st_order_models.items() - } - _models.update(_models_1st_no_substrate) - self.first_order = {**_models_1st, **_models_1st_no_substrate} - - _models_2nd = { - key: BaseModel(peak_collection=self.peak_collection, model_name=value) - for key, value in self._standard_2nd_order_models.items() - } - _models.update(_models_2nd) - self.second_order = _models_2nd - self.all_models = _models - - def __repr__(self): - _t = "\n".join(map(str, self.all_models.values())) - return _t - - -class BaseModelWarning(UserWarning): - pass - - -class BaseModel: - """ - This Model class combines the collection of valid peaks from BasePeak into a regression model of type lmfit.model.CompositeModel - that is compatible with the lmfit Model and fit functions. - The model_name, include_substrate and lmfit_model attributes are kept consistent w.r.t. their meaning when they are set. - - Parameters - -------- - model_name: string ==> is converted to lmfit Model object - include_substrate: bool ==> toggle between True and False to include a substrate peak - - """ - - _SEP = "+" - _SUFFIX = "_" - - # IDEA change include substrate to has substrate and remove from init - def __init__( - self, - model_name: str = "", - peak_collection=PeakModelValidator(), - substrate_peak_name: str = _SUBSTRATE_PEAK, - ): - self.peak_collection = peak_collection - self.peak_options = self.set_peak_options() - self.substrate_peak_name = substrate_peak_name - self._substrate_name = self.substrate_peak_name.split(self._SUFFIX)[0] - self.model_name = model_name - self.lmfit_model = self.model_constructor_from_model_name(self.model_name) - - def set_peak_options(self): - _opts = {} - for _pk in self.peak_collection.options: - try: - _prefix = _pk.split(self._SUFFIX)[0] - if _prefix: - _opts.update({_prefix: _pk}) - except Exception as e: - warn( - f'Peak {_pk} not valid name "{self._SUFFIX}, error:\n{e}', - BaseModelWarning, - ) - return _opts - - @property - def model_name(self): - return self._model_name - - @model_name.setter - def model_name(self, name): - """Model name for str => model conversion""" - _ch = True - name = self.validate_model_name_input(name) - if hasattr(self, "_model_name"): - if name == self._model_name: - _ch = False - if _ch: - self.lmfit_model = self.model_constructor_from_model_name(name) - self._model_name = name - - @property - def has_substrate(self): - _has = False - if hasattr(self, "model_name"): - _has = self.name_contains_substrate(self.model_name) - - return _has - - @has_substrate.setter - def has_substrate(self, value): - raise AttributeError( - f'{self.__class__.__name__} this property can not be set "{value}", use add_ or remove_ substrate function.' - ) - - def name_contains_substrate(self, _name): - """Checks if name contains the substrate name, returns bool""" - _name_contains_any = False - if type(_name) == str: - _name_contains_any = any( - i == self._substrate_name for i in _name.split("+") - ) - return _name_contains_any - - def remove_substrate(self): - if hasattr(self, "model_name"): - _name = self.model_name - if self.name_contains_substrate(_name): - warn( - f'\n{self.__class__.__name__} remove substrate is called so "{self._substrate_name}" is removed from {_name}.\n', - BaseModelWarning, - ) - _new_name = "+".join( - i for i in _name.split("+") if i not in self._substrate_name - ) # remove substr name - if _new_name != _name: - self.model_name = _new_name - - def add_substrate(self): - if hasattr(self, "model_name"): - _name = self.model_name - if not self.name_contains_substrate(_name): - _new_name = _name + f"+{self._substrate_name}" # add substr name - if _new_name != _name: - self.model_name = _new_name - - def validate_model_name_input(self, value): - """checks if given input name is valid""" - if type(value) != str: - raise TypeError( - f'Given name "{value}" for model_name should be a string insteady of type({type(value).__name__})' - ) - elif not value: - warn(f'\n\tThis name "{value}" is an empty string', BaseModelWarning) - return value - elif "+" not in value: - warn( - f'\n\tThis name "{value}" does not contain the separator "+". (could be just 1 Peak)', - BaseModelWarning, - ) - return value - else: - _clean_string = "".join([i for i in value if i.isalnum() or i == "+"]) - _splitname = _clean_string.split("+") - if not _splitname or not any(bool(i) for i in _splitname): - raise ValueError(f'The split with sep "+" of name {value} is empty') - else: - return "+".join([i for i in _splitname if i]) - - def model_constructor_from_model_name(self, _name): - """Construct a lmfit.Model from the string model name""" - - _discarded_terms = [] - _peak_names = [] - if _name: - for _peak in _name.split(self._SEP): # filter model name for last time - _peak_from_opts = self.peak_options.get(_peak, None) - if _peak_from_opts: - _peak_names.append(_peak_from_opts) - else: - _discarded_terms.append(_peak) - - _peak_models = [ - self.peak_collection.model_dict.get(i) for i in _peak_names if i - ] - if _discarded_terms: - warn( - f'Model evalution for "{_name}" discarded terms {",".join(_discarded_terms)} => clean: {_peak_names}', - BaseModelWarning, - ) - - if not _peak_models: - _lmfit_model = None - elif len(_peak_models) == 1: - _lmfit_model = _peak_models[0].peak_model - elif len(_peak_models) >= 2: - _composite_model = None - for _pkmod in _peak_models: - _mod = _pkmod.peak_model - if not _composite_model: - _composite_model = _mod - else: - try: - _composite_model += _mod - except Exception as e: - warn( - f"Model add operation failed for constructing Composite Model {_pkmod.name}.\n {e}", - BaseModelWarning, - ) - _lmfit_model = _composite_model - - if not issubclass(type(_lmfit_model), Model): - warn( - f"Model constructor does not yield type ({type(Model)} {type(_lmfit_model)}.", - BaseModelWarning, - ) - return _lmfit_model - - def __repr__(self): - _choice = "no" if not self.has_substrate else "yes" - _txt = f"{self.model_name}, substrate ({_choice}): " - if hasattr(self, "lmfit_model"): - _txt += "\n\t" + repr(self.lmfit_model) - else: - _txt += "empty model" - return _txt diff --git a/src/raman_fitting/deconvolution_models/default_peaks/__init__.py b/src/raman_fitting/deconvolution_models/default_peaks/__init__.py deleted file mode 100644 index 27ebb8d..0000000 --- a/src/raman_fitting/deconvolution_models/default_peaks/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .base_peak import BasePeak -from .first_order_peaks import * -from .normalization_peaks import * -from .second_order_peaks import * diff --git a/src/raman_fitting/deconvolution_models/default_peaks/base_peak.py b/src/raman_fitting/deconvolution_models/default_peaks/base_peak.py deleted file mode 100644 index 9836164..0000000 --- a/src/raman_fitting/deconvolution_models/default_peaks/base_peak.py +++ /dev/null @@ -1,574 +0,0 @@ -import inspect -from functools import partialmethod -from keyword import iskeyword as _iskeyword -from warnings import warn - -from lmfit import Parameter, Parameters - -from lmfit.models import GaussianModel, LorentzianModel, Model, VoigtModel - -if __name__ in ("__main__",): - from raman_fitting.utils.coordinators import FieldsTracker -else: - from ...utils.coordinators import FieldsTracker - - -class BasePeakWarning(UserWarning): # pragma: no cover - pass - - -class BasePeak(type): - """ - Base class for easier definition of typical intensity peaks found in the - raman spectra. - - The goal of is this metaclass is to be able to more easily write - peak class definitions (for possible user input). It tries to find three - fields in the definition, which are required for a LMfit model creation, - namely: peak_name, peak_type and the param hints. - - peak_name: - arbitrary name as prefix for the peak - peak_type: - defines the lineshape of the peak, the following options are implemented: - "Lorentzian", "Gaussian", "Voigt" - params_hints: - initial values for the parameters of the peak, at least - a value for the center position of the peak should be given. - - It tries to find these fields in different sources such as: the class definition - with only class attributes, init attributes or even in the keywords arguments. - The FieldsTracker class instance (fco) keeps track of the definition in different - sources and can check when all are ready. If there are multiple sources with definitions - for the same field than the source with highest priority will be chosen (based on tuple order). - Each field is a propery which validates the assigments. - - Sort of wrapper for lmfit.model definition. - Several of these peaks combined are used to make the lmfit CompositeModel - (composed in the fit_models module), which will be used for the fit. - - -------- - Example usage - -------- - - "Example class definition with attribute definitions" - class New_peak(metaclass=BasePeak): - "New peak child class for easier definition" - - param_hints = { 'center': {'value': 2435,'min': 2400, 'max': 2550}} - peak_type = 'Voigt' #'Voigt' - peak_name ='R2D2' - - New_peak().peak_model == - - "Example class definition with keyword arguments" - - New_peak = BasePeak('new', - peak_name='D1', - peak_type= 'Lorentzian', - param_hints = { 'center': {'value': 1500}} - ) - New_peak() - - - """ - - _fields = ["peak_name", "peak_type", "param_hints"] - _sources = ("user_input", "kwargs", "cls_dict", "init", "class_name") - _synonyms = { - "peak_name": [], - "peak_type": [], - "param_hints": ["input_param_settings"], - } - - PEAK_TYPE_OPTIONS = ["Lorentzian", "Gaussian", "Voigt"] - - # ('value', 'vary', 'min', 'max', 'expr') # optional - default_settings = {"gamma": {"value": 1, "min": 1e-05, "max": 70, "vary": False}} - subclasses = [] - - _debug = False - - def __prepare__(name, bases, **kwargs): - """prepare method only for debugging""" - if "debug" in kwargs.keys(): - if kwargs.get("debug", False): - print(f"__prepare ,name {name}, bases{bases}, kwargs {kwargs}") - return kwargs - - def __new__(mcls, name, *args, **kwargs): - if len(args) == 2: - bases, cls_dict = args - else: - bases, cls_dict = (), {} - - if kwargs.get("debug", False): - mcls._debug = True - if cls_dict.get("debug", False): - mcls._debug = True - if mcls._debug: - print( - f"Called __new ({mcls} ),name {name}, bases{bases}, cls_dict {cls_dict.keys()} kwargs {kwargs}" - ) - for name_ in [name] + list(kwargs.keys()): - if type(name_) is not str: - raise TypeError("Class name and keywords names must be strings") - if not name_.isidentifier(): - raise ValueError( - "Class name and keywords names must be valid " - f"identifiers: {name_!r}" - ) - if _iskeyword(name_): - raise ValueError( - "Class name and keywords names cannot be a " f"keyword: {name_!r}" - ) - - # Init the fco which check and stores the values for each field - # when fco.status == True then the green light for class initialization is given - fco = FieldsTracker(fields=mcls._fields, sources=mcls._sources) - - # First thing add input from source kwargs to fco - fco.multi_store("kwargs", **kwargs) - - # Also add input class name to fco - fco.multi_store("class_name", **{"peak_name": name}) - # kwargs_ = {k : val for k,val in cls_dict.items() if k in cls_object._fields} - _cls_dict_field_kwargs = {} - # Second thing, check class dict for field values, - # store and replace field values with properties from metaclass - for field in mcls._fields: - if field in cls_dict.keys(): # delete field from cls_dict and store in fco - value = cls_dict[field] - fco.store("cls_dict", field, value) - _cls_dict_field_kwargs.update({field: value}) - del cls_dict[field] - if hasattr(mcls, field): # store new field property in cls dict - _obj = getattr(mcls, field) - if isinstance( - _obj, property - ): # if mcls has property function with name == field - cls_dict[ - field - ] = _obj # stores property in cls_dict before init so it will be set as a property for cls instance - - # Third: Define the 'new' init for the class, which sets the values from the fco results - def _init_subclass_replacement(self, *args, **kwargs): - if self._debug: - print(f"child __init__ mod called {self}, {kwargs}") - # super() - if hasattr(self, "fco"): - for k, val in self.fco.results.items(): - setattr(self, k, val["value"]) - if self._debug: - print(f"fco child __init__ setattr called {k}, {val}") - - # if hasattr(self, 'peak_model'): - setattr(self, "create_peak_model", self.create_peak_model) - setattr(self, "peak_model", self.create_peak_model()) - - # Fourth thing, check class __init__ for setting of field values and delete from cls dict - if "__init__" in cls_dict.keys(): - cls_init_part_obj = partialmethod(cls_dict["__init__"]) - - sig = inspect.signature(cls_dict["__init__"]) - _cls_init_part_obj_funcs = { - k: val - for k, val in cls_dict.items() - if inspect.isfunction(val) and k != "__init__" - } - - for fname, func in _cls_init_part_obj_funcs.items(): - setattr(cls_init_part_obj, fname, func) - cls_init_part_obj_dct_keys = set(cls_init_part_obj.__dict__.keys()) - sig = inspect.signature(func) - try: - func(cls_init_part_obj) - except Exception as e: - warn( - f"Definition of the __init__ {fname} fails, please redefine init in class, \n{e}", - BasePeakWarning, - ) - try: - cls_init_part_obj.func(cls_init_part_obj) - except AttributeError: - warn( - f"Definition of the __init__ {name} fails, please redefine {fco.missing} in class", - BasePeakWarning, - ) - _cls_init_part_fco_dict = mcls._cleanup_init_dict( - cls_init_part_obj.__dict__ - ) - fco.multi_store("init", **_cls_init_part_fco_dict) - - cls_dict["_original_init_"] = cls_dict["__init__"] - - del cls_dict["__init__"] - else: - pass - - cls_dict["__init__"] = _init_subclass_replacement - - if fco.status: - pass - else: - warn( - f"Definition for {name} is not complete, please redefine {fco.missing} in class", - BasePeakWarning, - ) - - if mcls._debug: - print( - f"Calling super__new__() ({mcls} ),name {name}, bases{bases}, cls_dict {cls_dict.keys()}" - ) - cls_object = super().__new__( - mcls, name, bases, cls_dict - ) # ,*args, **{**_attrs_found, **kwargs}) - if mcls._debug: - print(f"Called super__new__() ({mcls} ),cls_object: {cls_object}") - # setattr(cls_object, "__init__", init_) - setattr(cls_object, "_fields", mcls._fields) - setattr(cls_object, "_debug", mcls._debug) - setattr(cls_object, "fco", fco) - setattr(cls_object, "PEAK_TYPE_OPTIONS", mcls.PEAK_TYPE_OPTIONS) - cls_object = mcls._set_other_methods(cls_object) - return cls_object - - def __init__(self, name, *args, **kwargs): - # subclassess are appended here - if self not in self.subclasses: - self.subclasses.append(self) - - @classmethod - def _cleanup_init_dict(cls, _dict): - """cleans up the __init__ dictionary from defined class""" - _dkeys = list(_dict.keys()) - _result = {} - while _dkeys: - _dk = _dkeys.pop() - _kmatch = [ - (i, _dk) for i in cls._synonyms.keys() if i in _dk - ] # clean field match - _synmatch = [ - (k, syn) - for k, val in cls._synonyms.items() - for syn in val - if syn in _dk - ] # synonym field match - if _kmatch: - _result.update({i[0]: _dict[i[1]] for i in _kmatch}) - elif not _kmatch and _synmatch: - _result.update({i[0]: _dict[i[1]] for i in _synmatch}) - return _result - - @classmethod - def _set_other_methods(cls, cls_object): - """sets other methods found in this baseclass on the defined cls object""" - _other_methods = [ - i for i in dir(cls) if not i.startswith("_") and not i == "mro" - ] - for method in _other_methods: - _mcls_obj = getattr(cls, method) - if method.endswith("__") and not method.startswith("__"): - method = f"__{method}" - - if not isinstance(_mcls_obj, property): - setattr(cls_object, method, _mcls_obj) - return cls_object - - @property - def peak_type(self): - """This property (str) should be assigned and in self.PEAK_TYPE_OPTIONS""" - return self._peak_type - - @peak_type.setter - def peak_type(self, value: str): - """The peak type property should be in PEAK_TYPE_OPTIONS""" - if not isinstance(value, str): - raise TypeError(f'The value "{value}" is not a string.') - value_ = None - if any([value.upper() == i.upper() for i in self.PEAK_TYPE_OPTIONS]): - value_ = value - elif any([i.upper() in value.upper() for i in self.PEAK_TYPE_OPTIONS]): - _opts = [i for i in self.PEAK_TYPE_OPTIONS if i.upper() in value.upper()] - if len(_opts) == 1: - value_ = _opts[0] - - warn( - f"Peak type misspelling mistake check {value}, forgiven and fixed with {value_}", - BasePeakWarning, - ) - elif len(_opts) > 1: - raise ValueError( - f'Multiple options {_opts} for misspelled value "{value}" in {self.PEAK_TYPE_OPTIONS}.' - ) - else: - raise ValueError( - f'Multiple options {_opts} for misspelled value "{value}" in {self.PEAK_TYPE_OPTIONS}.' - ) - else: - raise ValueError( - f'The value "{value}" for "peak_type" is not in {self.PEAK_TYPE_OPTIONS}.' - ) - if value_: - self._peak_type = value_ - self.fco.store("user_input", "peak_type", value) - self.peak_model = self.create_peak_model() - - @property - def peak_model(self): - if not hasattr(self, "_peak_model"): - self.create_peak_model() - else: - return self._peak_model - - @peak_model.setter - def peak_model(self, value): - """ - This property is an instance of lmfit.Model, - constructed from peak_type, peak_name and param_hints setters - """ - if not isinstance(value, Model): - self._peak_model = self.create_peak_model() - else: - self._peak_model = value - - def create_peak_model(self): - _peak_model = None - if self.fco.status: - if all(hasattr(self, field) for field in self._fields): - try: - create_model_kwargs = dict( - peak_name=self.peak_name, - peak_type=self.peak_type, - param_hints=self.param_hints, - ) - - if hasattr(self, "create_model_kwargs"): - _orig_kwargs = self.create_model_kwargs - - _peak_model = LMfitModelConstructorMethods.create_peak_model_from_name_type_param_hints( - **create_model_kwargs - ) - self.create_model_kwargs = create_model_kwargs - except Exception as e: - print(f"try make models:\n{self}, \n\t {e}") - else: - pass - else: - pass - print(f"missing field {self.fco.missing} {self},\n") - return _peak_model - - @property - def param_hints(self): - """This property is dict of dicts and sets the initial values for the parameters""" - if hasattr(self, "_param_hints"): - if isinstance(self._param_hints, Parameters): - return self._param_hints - else: - raise TypeError( - f"{self.__class__.__name__} self._param_hints is not instance of Parameters" - ) - - @param_hints.setter - def param_hints(self, value, **kwargs): - if isinstance(value, Parameters): - param_hints_ = value - else: - dict_ = {} - if isinstance(value, dict): - dict_ = {**dict_, **value} - if kwargs: - dict_ = {**dict_, **kwargs} - param_hints_ = LMfitModelConstructorMethods.param_hints_constructor( - param_hints=dict_, default_settings=self.default_settings - ) - self._param_hints = param_hints_ - self.fco.store("user_input", "param_hints", param_hints_) - if not isinstance(self, BasePeak): - self.peak_model = self.create_peak_model() - - @property - def peak_name(self): - """This is the name that the peak_model will get as prefix""" - if self._peak_name: - if not self._peak_name.endswith("_"): - self._peak_name = self._peak_name + "_" - return self._peak_name - - @peak_name.setter - def peak_name(self, value: str, maxlen=20): - if len(value) < maxlen: - prefix_set = value + "_" - self._peak_name = value - self.fco.store("user_input", "peak_name", value) - self.peak_model = self.create_peak_model() - else: - raise ValueError( - f'The value "{value}" for peak_name is too long({len(value)}) (max. {maxlen}).' - ) - - def repr__(self): - _repr = f"{self.__class__.__name__}" - if hasattr(self, "peak_model"): - _repr += f", {self.peak_model}" - _param_center = "" - if self.peak_model: - _param_center = self.peak_model.param_hints.get("center", {}) - if _param_center: - _center_txt = "" - _center_val = _param_center.get("value") - _center_min = _param_center.get("min", _center_val) - if _center_min != _center_val: - _center_txt += f"{_center_min} < " - _center_txt += f"{_center_val}" - _center_max = _param_center.get("max", _center_val) - if _center_max != _center_val: - _center_txt += f" > {_center_max}" - _repr += f", center : {_center_txt}" - else: - _repr += ": no Model set" - return _repr - - def print_params(self): - if self.peak_model: - self.peak_model.print_param_hints() - else: - print(f"No model set for: {self}") - - -# %% - - -# %% -# LMfitModelConstructorMethods.create_peak_model_from_name_type_param_hints(peak_model= new.peak_model) -# new.peak_name -# %% -class LMfitModelConstructorMethods: - PARAMETER_ARGS = inspect.signature(Parameter).parameters.keys() - - @classmethod - def create_peak_model_from_name_type_param_hints( - cls, - peak_model: Model = None, - peak_name: str = None, - peak_type: str = None, - param_hints: Parameter = None, - ): - if peak_model: - param_hints_ = peak_model.make_params() - peak_name_ = peak_model.prefix - peak_type_ = peak_model.func.__name__ - if peak_name: - if peak_name != peak_name_: - raise Warning("changed name of peak model") - peak_model.prefix = peak_name - else: - peak_name = peak_name_ - if peak_type: - if peak_type != peak_type_: - raise Warning("changed type of peak model") - peak_model = cls.make_model_from_peak_type_and_name( - peak_name=peak_name, peak_type=peak_type - ) - if param_hints: - if param_hints != param_hints_: - peak_model = cls.set_params_hints_on_model(peak_model, param_hints) - else: - peak_model = cls.set_params_hints_on_model(peak_model, param_hints_) - else: - if peak_name: - pass - else: - raise Warning( - "no peak_name given for create_peak_model, peak_name will be default" - ) - if peak_type: - peak_model = cls.make_model_from_peak_type_and_name( - peak_name=peak_name, peak_type=peak_type - ) - if param_hints: - peak_model = cls.set_params_hints_on_model(peak_model, param_hints) - return peak_model - - def make_model_from_peak_type_and_name(peak_type="Lorentzian", peak_name=""): - """returns the lmfit model instance according to the chosen peak type and sets the prefix from peak_name""" - model = None - if peak_type: - _val_upp = peak_type.upper() - if "Lorentzian".upper() in _val_upp: - model = LorentzianModel(prefix=peak_name) - elif "Gaussian".upper() in _val_upp: - model = GaussianModel(prefix=peak_name) - elif "Voigt".upper() in _val_upp: - model = VoigtModel(prefix=peak_name) - else: - raise NotImplementedError( - f'This peak type or model "{peak_type}" has not been implemented.' - ) - return model - - def param_hints_constructor(param_hints: dict = {}, default_settings: dict = {}): - """ - This method validates and converts the input parameter settings (dict) argument - into a lmfit Parameters class instance. - """ - params = Parameters() - - if default_settings: - try: - _default_params = [ - Parameter(k, **val) for k, val in default_settings.items() - ] - params.add_many(*_default_params) - except Exception as e: - raise ValueError( - f"Unable to create a Parameter from default_parameters {default_settings}:\n{e}" - ) - if not isinstance(param_hints, dict): - raise TypeError( - f"input_param_hints should be of type dictionary not {type(param_hints)}" - ) - else: - _valid_parlst = [] - for k, val in param_hints.items(): - try: - _par = Parameter(k, **val) - _valid_parlst.append(_par) - except Exception as e: - raise ValueError( - f"Unable to create a Parameter from {k} and {val}:\n{e}" - ) - if _valid_parlst: - try: - params.add_many(*_valid_parlst) - except Exception as e: - raise ValueError( - f"Unable to add many Parameters from {_valid_parlst}:\n{e}" - ) - return params - - @classmethod - def set_params_hints_on_model(cls, model, param_hints_): - _error = "" - if issubclass(model.__class__, Model) and issubclass( - param_hints_.__class__, Parameters - ): - try: - for pname, par in param_hints_.items(): - try: - _par_hint_dict = { - pn: getattr(par, pn, None) - for pn in cls.PARAMETER_ARGS - if getattr(par, pn, None) - } - model.set_param_hint(**_par_hint_dict) - except Exception as e: - _error += f"Error in make_model_hints, check param_hints for {pname} with {par}, {e}" - except Exception as e: - _error += f"Error in make_model_hints, check param_hints \n{e}" - else: - _error += f"TypeError in make_model_hints, check types of model {type(model)} param_hints_{type(param_hints_)}" - if _error: - warn("Errors found in setting of param hints: {_error}", BasePeakWarning) - return model diff --git a/src/raman_fitting/deconvolution_models/default_peaks/first_order_peaks.py b/src/raman_fitting/deconvolution_models/default_peaks/first_order_peaks.py deleted file mode 100644 index c93a076..0000000 --- a/src/raman_fitting/deconvolution_models/default_peaks/first_order_peaks.py +++ /dev/null @@ -1,143 +0,0 @@ -""" Default peaks used for 1st order deconvolution""" - -if __name__ == "__main__": - from raman_fitting.deconvolution_models.default_peaks.base_peak import BasePeak -else: - from .base_peak import BasePeak - -__all__ = ["G_peak", "D_peak", "D2_peak", "D3_peak", "D5_peak", "Si1_peak"] - -# ====== FIRST ORDER PEAKS ======= # - - -class G_peak(metaclass=BasePeak): - - """ - Graphite belongs to the P63/mmc (D46h) space group. If considering only a graphene plane, at - the à point of the Brillouin zone, there are six normal modes that possess only one mode (doubly - degenerate in plane) with a E2g representation, which is Raman active - G ; Ideal graphitic lattice (E2g-symmetry) - G peak center stable over different laser wavelengths. - Influenced by potential, HSO4 adsorption (or ionization of G- and G+), - magnetic fields, pressure - Für G: 1580-1590 D5 und D2 weiß ich nicht - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "G" - self.input_param_settings = { - "center": {"value": 1571, "min": 1545, "max": 1595}, - "sigma": {"value": 30, "min": 5, "max": 150}, - "amplitude": {"value": 35, "min": 5, "max": 500}, - } - - -class D_peak(metaclass=BasePeak): - """ - D or D1 ; Disordered graphitic lattice (graphene layer edges,A1gsymmetry) - A defective graphite presents other bands that can be as intense as the G band at D=1350 and D'=1615 cm-1 - These bands are activated by defects due to the breaking of the crystal symmetry that relax the Raman selection rules. - Für D1: 1340-1350 - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "D" - self.input_param_settings = { - "center": {"value": 1350, "min": 1330, "max": 1380}, - "sigma": {"value": 35, "min": 1, "max": 150}, - "amplitude": {"value": 120, "min": 1e-05, "max": 500}, - } - - -class D2_peak(metaclass=BasePeak): - """ - D2 or D' ; Right next to the G peak, sometimes not obvious as G peak split. - Disordered graphitic lattice (surface graphene layers,E2g-symmetry) - j.molstruc.2010.12.065 - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "D2" - self.input_param_settings = { - "center": {"value": 1606, "min": 1592, "max": 1635}, - "sigma": {"value": 30, "min": 5, "max": 150}, - "amplitude": {"value": 35, "min": 5, "max": 500}, - } - - -class D3_peak(metaclass=BasePeak): - """ - D3 or D'' or A or Am ; Between the D and G peak, sometimes too broad. - For amorphous carbon (Gaussian[26]or Lorentzian[3,18,27]line shape). - Für D3: 1495-1515 - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "D3" - self.input_param_settings = { - "center": {"value": 1480, "min": 1450, "max": 1525}, - "sigma": {"value": 25, "min": 1, "max": 150}, - "amplitude": {"value": 25, "min": 1e-02, "max": 500}, - } - - -class D4_peak(metaclass=BasePeak): - """ - D4 or I ; Below D band, a shoulder sometimes split with D5 band. - Disordered graphitic lattice (A1gsymmetry)[10],polyenes[3,27], ionic impurities - D4 peak at 1212 cm−1 - Jurkiewicz, K., Pawlyta, M., Zygadło, D. et al. J Mater Sci (2018) 53: 3509. https://doi.org/10.1007/s10853-017-1753-7 - Für D4: 1185-1210, but depends on if there is D5 or not. - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "D4" - self.input_param_settings = { - "center": {"value": 1230, "min": 1180, "max": 1310}, - "sigma": {"value": 40, "min": 1, "max": 150}, - "amplitude": {"value": 20, "min": 1e-02, "max": 200}, - } - - -class D5_peak(metaclass=BasePeak): - """ - D5 peak at 1110 cm−1. At lowest should of D peak, below D4. - Ref: Jurkiewicz, K., Pawlyta, M., Zygadło, D. et al. J Mater Sci (2018) 53: 3509. https://doi.org/10.1007/s10853-017-1753-7 - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "D5" - self.input_param_settings = { - "center": {"value": 1150, "min": 1100, "max": 1200}, - "sigma": {"value": 40, "min": 1, "max": 250}, - "amplitude": {"value": 20, "min": 1e-02, "max": 200}, - } - - -class Si1_peak(metaclass=BasePeak): - """ - ===== Extra peak at ca. 960 cm-1 presumably from Si substrate 2nd order === not from Nafion... - => Either cut the Spectra 1000-2000 - => Place an extra Gaussian peak at 960 in the fit - """ - - def __init__(self): - self.peak_type = "Gaussian" - self.peak_name = "Si1" - self.input_param_settings = { - "center": {"value": 960, "min": 900, "max": 980}, - "sigma": {"value": 10, "min": 0, "max": 150}, - "amplitude": {"value": 10, "min": 0, "max": 200}, - } - - -def test_for_Si_substrate( - model, -): # IDEA test fit on spectrum to decide wether Si substrate is required or not - """make test fit for only slice 900-1000""" diff --git a/src/raman_fitting/deconvolution_models/default_peaks/normalization_peaks.py b/src/raman_fitting/deconvolution_models/default_peaks/normalization_peaks.py deleted file mode 100644 index 47be679..0000000 --- a/src/raman_fitting/deconvolution_models/default_peaks/normalization_peaks.py +++ /dev/null @@ -1,36 +0,0 @@ -""" Peaks used for normalization""" - -if __name__ == "__main__": - from raman_fitting.deconvolution_models.base_peak import BasePeak -else: - from .base_peak import BasePeak - -__all__ = ["norm_G_peak", "norm_D_peak"] - -# ====== PEAKS USED FOR NORMALIZATION ======= # - - -class norm_G_peak(metaclass=BasePeak): - """G_peak used for normalization""" - - def __init__(self, *args, **kwargs): - self.peak_name = "norm_G" - self.peak_type = "Lorentzian" - self.input_param_settings = { - "center": {"value": 1581, "min": 1500, "max": 1600}, - "sigma": {"value": 40, "min": 1e-05, "max": 1e3}, - "amplitude": {"value": 8e4, "min": 1e2}, - } - - -class norm_D_peak(metaclass=BasePeak): - """D_peak for normalization""" - - def __init__(self, *args, **kwargs): - self.peak_name = "norm_D" - self.peak_type = "Lorentzian" - self.input_param_settings = { - "center": {"value": 1350, "min": 1300, "max": 1400}, - "sigma": {"value": 90, "min": 1e-05}, - "amplitude": {"value": 10e5, "min": 1e2}, - } diff --git a/src/raman_fitting/deconvolution_models/default_peaks/second_order_peaks.py b/src/raman_fitting/deconvolution_models/default_peaks/second_order_peaks.py deleted file mode 100644 index 05e8124..0000000 --- a/src/raman_fitting/deconvolution_models/default_peaks/second_order_peaks.py +++ /dev/null @@ -1,73 +0,0 @@ -if __name__ == "__main__": - from raman_fitting.deconvolution_models.base_peak import BasePeak -else: - from .base_peak import BasePeak - -__all__ = ["D4D4_peak", "D1D1_peak", "GD1_peak", "D2D2_peak"] - - -# ====== SECOND ORDER PEAKS ======= # - - -class D4D4_peak(metaclass=BasePeak): - """ - 2nd order D4 peak - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "D4D4" - self.input_param_settings = { - "center": {"value": 2435, "min": 2400, "max": 2550}, - "sigma": {"value": 30, "min": 1, "max": 200}, - "amplitude": {"value": 2, "min": 1e-03, "max": 100}, - } - - -class D1D1_peak(metaclass=BasePeak): - """ - 2nd order D(1) peak, aka 2D - 2450 cm􀀀1 band, which has been attributed recently to a D + D” - band by Couzi et al. [61], the D + D’ (in literature, the wrong D + G label is often found [62]), the - 2D’ bands and the 2D + G band. - 1627 and 2742 cm􀀀1 bands as the D’ and 2D - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "D1D1" - self.input_param_settings = { - "center": {"value": 2650, "min": 2600, "max": 2750}, - "sigma": {"value": 60, "min": 1, "max": 200}, - "amplitude": {"value": 14, "min": 1e-03, "max": 100}, - } - - -class GD1_peak(metaclass=BasePeak): - """ - 2nd order G+D(1) peak - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "GD1" - self.input_param_settings = { - "center": {"value": 2900, "min": 2800, "max": 2950}, - "sigma": {"value": 50, "min": 1, "max": 200}, - "amplitude": {"value": 10, "min": 1e-03, "max": 100}, - } - - -class D2D2_peak(metaclass=BasePeak): - """ - 2nd order D2 peak, aka 2D2 - """ - - def __init__(self): - self.peak_type = "Lorentzian" - self.peak_name = "D2D2" - self.input_param_settings = { - "center": {"value": 3250, "min": 3000, "max": 3400}, - "sigma": {"value": 60, "min": 20, "max": 200}, - "amplitude": {"value": 1, "min": 1e-03, "max": 100}, - } diff --git a/src/raman_fitting/deconvolution_models/fit_models.py b/src/raman_fitting/deconvolution_models/fit_models.py deleted file mode 100644 index 9687c96..0000000 --- a/src/raman_fitting/deconvolution_models/fit_models.py +++ /dev/null @@ -1,365 +0,0 @@ -import datetime as dt -import logging -from collections import OrderedDict, namedtuple - -import pandas as pd - -from ..processing.spectrum_constructor import SpectrumDataCollection, SpectrumDataLoader -from .base_model import InitializeModels - -logger = logging.getLogger(__name__) - - -class Fitter: - """ - Fitter class for executing the fitting functions and optimizations - - IDEA: implement sensing of spectrum for Si samples - """ - - fit_windows = ["1st_order", "2nd_order"] - - def __init__(self, spectra_arg, RamanModels=InitializeModels(), start_fit=True): - self._qcnm = self.__class__.__qualname__ - logger.debug(f"{self._qcnm} is called with spectrum\n\t{spectra_arg}\n") - self.start_fit = start_fit - self.models = RamanModels - - self.spectra_arg = spectra_arg - self.spectra = spectra_arg - self.fit_delegator() - - @property - def spectra(self): - return self._spectra - - @spectra.setter - def spectra(self, value): - """Checks if value is dict or else takes a dict from class instance value""" - - _errtxt = f"This assignment {value} does not contain valid spectra" - if isinstance(value, dict): - _data = value - elif isinstance(value, SpectrumDataCollection): - _data = value.mean_data - _fit_lbl = "mean" - elif isinstance(value, SpectrumDataLoader): - _data = value.clean_df - _fit_lbl = "int" - elif isinstance(value, pd.DataFrame): - raise AttributeError - # IDEA implement self.sense_windowname(value) - else: - raise ValueError(_errtxt) - - _specs = { - k: val - for k, val in _data.items() - if k in self.fit_windows and type(val) == pd.DataFrame - } - # assert bool(_specs), _errtxt - if not _specs: - self.start_fit = False - - self._spectra = _specs - self.FitResults = {} - info = {} - if hasattr(value, "info"): - info = {**info, **value.info} - self.info = info - - def fit_delegator(self): - if self.start_fit: - logger.info( - f"\n{self._qcnm} is starting to fit the models on spectrum:\n\t{self.info.get('SampleID','no name')}" - ) - - self.fit_models(self.models.second_order) # second order should go first - logger.info( - f"\t - second order finished, {len(self.models.second_order)} model" - ) - # rum:\t{self.info.get('SampleID','no name')}\n") - self.fit_models(self.models.first_order) - logger.info( - f"\t - first order finished, {len(self.models.first_order)} models" - ) - - def fit_models(self, model_selection): - _fittings = {} - logger.debug(f"{self._qcnm} fit_models starting.") - for modname, model in model_selection.items(): - modname, model - _windowname = [i for i in self.fit_windows if modname[0:3] in i][0] - _data = self.spectra.get(_windowname) - _int_lbl = self.get_int_label(_data) - try: - out = self.run_fit( - model.lmfit_model, - _data, - _int_lbl=_int_lbl, - _modelname=modname, - _info=self.info, - ) - prep = PrepareParams(out, extra_fit_results=self.FitResults) - _fittings.update({modname: prep.FitResult}) - except Exception as e: - logger.warning( - f"{self._qcnm} fit_model failed for {modname}: {model}, because:\n {e}" - ) - - self.FitResults.update(**_fittings) - - def run_fit(self, model, _data, method="leastsq", **kws): - # ideas: improve fitting loop so that starting parameters from modelX and modelX+Si are shared, faster... - _fit_res, _param_res = {}, {} - init_params = model.make_params() - x, y = _data.ramanshift, _data[kws.get("_int_lbl")] - out = model.fit(y, init_params, x=x, method=method) # 'leastsq' - for k, val in kws.items(): - if not hasattr(out, k): - _attrkey = k - elif not hasattr(out, f"_{k}"): - _attrkey = f"_{k}" - else: - _attrkey = None - if _attrkey: - setattr(out, _attrkey, val) - return out - - def get_int_label(self, value): - _lbl = "" - if isinstance(value, pd.DataFrame): - cols = [i for i in value.columns if "ramanshift" not in i] - if len(cols) == 0: - _lbl = "" - if len(cols) == 1: - _lbl = cols[0] - elif len(cols) > 1: - if any("mean" in i for i in cols): - _lbl = [i for i in cols if "mean" in i][0] - elif any("int" in i for i in cols): - _lbl = [i for i in cols if "int" in i][0] - - return _lbl - - -class PrepareParams: - fit_attr_export_lst = ( - "chisqr", - "redchi", - "bic", - "aic", - "method", - "message", - "success", - "nfev", - ) - fit_result_template = namedtuple( - "FitResult", - [ - "FitComponents", - "FitParameters", - "FitReport", - "extrainfo", - "model_name", - "raw_data_col", - ], - ) - ratio_params = [("I", "_height"), ("A", "_amplitude")] - _standard_2nd_order = "2nd_4peaks" - - def __init__(self, model_result, extra_fit_results={}): - self._qcnm = self.__class__.__qualname__ - logger.debug(f"{self._qcnm} is called with model_result\n\t{model_result}\n") - self.extra_fit_results = extra_fit_results - self.model_result = model_result - - @property - def model_result(self): - return self._model_result - - @model_result.setter - def model_result(self, value): - """ - Takes the ModelResult class instance from lmfit. - Optional extra functionality with a list of instances. - """ - self.result = {} - - if "ModelResult" in type(value).__name__: - self.result.update(value.params.valuesdict()) - self.comps = value.model.components - elif ("list" or "tuple") in type(value).__name__: - assert all("ModelResult" in type(i).__name__ for i in value) - [self.result.update(mod.params.valuesdict()) for mod in value] - self.comps = [i for mod in value for i in mod.model.components] - - self.peaks = set( - [i.prefix for i in self.comps] - ) # peaks is prefix from components - - _mod_lbl = "Model" - if hasattr(value, "_modelname"): - _mod_lbl = f'Model_{getattr(value,"_modelname")}' - self.model_name_lbl = _mod_lbl - - self.raw_data_lbl = value._int_lbl - - self._model_result = value - - self.make_result() - - def make_result(self): - self.prep_params() - self.prep_components() - self.FitReport = self.model_result.fit_report(show_correl=False) - - self.extra_info = {} - self.prep_extra_info() - self.FitResult = self.fit_result_template( - self.FitComponents, - self.FitParameters, - self.FitReport, - self.extra_info, - self.model_name_lbl, - self.raw_data_lbl, - ) - - def prep_extra_info(self): - self.extra_info = {} - _destfitcomps = self.model_result._info["DestFittingComps"] - _model_destdir = _destfitcomps.joinpath( - f'{self.model_name_lbl}_{self.model_result._info["SampleID"]}' - ) - self.extra_info = { - **self.model_result._info, - **{"DestFittingModel": _model_destdir}, - } - - def prep_params(self): - fit_attrs = OrderedDict( - zip( - [f"lmfit_{i}" for i in self.fit_attr_export_lst], - [getattr(self.model_result, i) for i in self.fit_attr_export_lst], - ) - ) - self.result.update(fit_attrs) - try: - self.add_ratio_params() - except Exception as e: - logger.error(f"{self._qcnm} extra prep params failed\n\t{e}\n") - - self.result.update( - {"_run_date_YmdH": dt.datetime.now().strftime(format="%Y-%m-%d %H:00")} - ) - self.FitParameters = pd.DataFrame(self.result, index=[self.model_name_lbl]) - - def add_ratio_params(self): - # peaks = [i.prefix for i in self.out.model.components] - RatioParams = {} - for a, t in self.ratio_params: - if {"G_", "D_"}.issubset(self.peaks): - RatioParams.update( - {f"{a}D/{a}G": self.result["D" + t] / self.result["G" + t]} - ) - RatioParams.update( - {f"La_{a}G": 4.4 * RatioParams.get(f"{a}D/{a}G") ** -1} - ) - # , 'ID/IG' : fit_params_od['D_height']/fit_params_od['G_height']} - if "D2_" in self.peaks: - RatioParams.update( - { - f"{a}D/({a}G+{a}D2)": self.result["D" + t] - / (self.result["G" + t] + self.result["D2" + t]) - } - ) - RatioParams.update( - { - f"La_{a}G+D2": 4.4 - * RatioParams.get(f"{a}D/({a}G+{a}D2)") ** -1 - } - ) - # : fit_params_od['D'+t]/(fit_params_od['G'+t]+fit_params_od['D2'+t])}) - if "D3_" in self.peaks: - RatioParams.update( - { - f"{a}D3/({a}G+{a}D2": self.result["D3" + t] - / (self.result["G" + t] + self.result["D2" + t]) - } - ) - if "D3_" in self.peaks: - RatioParams.update( - {f"{a}D3/{a}G": self.result["D3" + t] / self.result["G" + t]} - ) - if "D4_" in self.peaks: - RatioParams.update( - {f"{a}D4/{a}G": self.result["D4" + t] / self.result["G" + t]} - ) - - if {"D1D1_", "GD1_"}.issubset(self.peaks): - RatioParams.update( - { - f"{a}D1D1/{a}GD1": self.result["D1D1" + t] - / self.result["GD1" + t] - } - ) - if self.extra_fit_results: - RatioParams.update(self.add_ratio_combined_params(a, t)) - self.ratio_params = RatioParams - self.result.update(RatioParams) - - def add_ratio_combined_params(self, a, t): - _2nd = self._standard_2nd_order - if ( - self.model_result._modelname.startswith("1st") - and _2nd in self.extra_fit_results.keys() - ): - _D1D1 = self.extra_fit_results[_2nd].FitParameters.loc[ - f"Model_{_2nd}", "D1D1" + t - ] - self.result.update({"D1D1" + t: _D1D1}) - return {f"Leq_{a}": 8.8 * _D1D1 / self.result["D" + t]} - else: - return {} - - def prep_components(self): - # FittingParams = pd.DataFrame(fit_params_od,index=[peak_model]) - _fit_comps_data = OrderedDict({"RamanShift": self.model_result.userkws["x"]}) - _fit_comps_data.update(self.model_result.eval_components()) - - # IDEA take out - # print('===/n',self.model_result, '/n') - # print('===/n',self.model_result.__dict__.keys(), '/n') - - _fit_comps_data.update( - { - self.model_name_lbl: self.model_result.best_fit, - "residuals": self.model_result.residual, - self.model_result._int_lbl: self.model_result.data, - } - ) - FittingComps = pd.DataFrame(_fit_comps_data) - self.FitComponents = FittingComps - - -def NormalizeFit(norm_cleaner, plotprint=False): # pragma: no cover - # IDEA: optional add normalization seperately to Fitter - x, y = norm_cleaner.spec.ramanshift, norm_cleaner.blcorr_desp_intensity - Model = InitializeModels("2peaks normalization Lorentzian") - params = Model.make_params() - pre_fit = Model.fit(y, params, x=x) # 'leastsq' - IG, ID = pre_fit.params["G_height"].value, pre_fit.params["D_height"].value - output = { - "factor": 1 / IG, - "ID/IG": ID / IG, - "ID": ID, - "IG": IG, - "G_center": pre_fit.params["G_center"].value, - "D_center": pre_fit.params["D_center"].value, - "Model": Model, - } - # pre_fit = Model.fit(y,params ,x=x,method='differential-evolution') # 'leastsq' - if plotprint: - pre_fit.plot() - print(pre_fit.fit_report()) - return output diff --git a/src/raman_fitting/deconvolution_models/model_config.py b/src/raman_fitting/deconvolution_models/model_config.py deleted file mode 100644 index f790143..0000000 --- a/src/raman_fitting/deconvolution_models/model_config.py +++ /dev/null @@ -1,30 +0,0 @@ -""" -Created on Sun May 30 12:35:58 2021 - -@author: DW -""" -if __name__ == "__main__": - from model_validation import PeakModelValidator - - from raman_fitting.config.config import PACKAGE_HOME - -else: - from model_validation import PeakModelValidator - - from ..config.config import PACKAGE_HOME - - -class ModelConfigurator: - standard_config_file = "model_config_standard.cfg" - - def __init__(self, **kwargs): - self._kwargs = kwargs - - def find_user_config_files(self): - pass - - def file_handler(self): - pass - - def standard_valid_models(self): - peak_collection = PeakModelValidator() diff --git a/src/raman_fitting/deconvolution_models/peak_validation.py b/src/raman_fitting/deconvolution_models/peak_validation.py deleted file mode 100644 index 3e8307e..0000000 --- a/src/raman_fitting/deconvolution_models/peak_validation.py +++ /dev/null @@ -1,330 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Apr 28 15:08:26 2021 - -@author: zmg -""" - -import inspect -import logging -from collections import namedtuple -from itertools import groupby -from pathlib import Path -from typing import Tuple -from warnings import warn - -import matplotlib.pyplot as plt -import pandas as pd -from lmfit import Parameters - -if __name__ == "__main__": # or _file_parent_name == 'deconvolution_models': - # import first_order_peaks - # import second_order_peaks - # import normalization_peaks - from default_peaks import BasePeak - - __package_name__ = __name__ -else: - from .. import __package_name__ - from .default_peaks.base_peak import BasePeak - -logger = logging.getLogger(__package_name__) - - -# %% -class PeakValidationWarning(UserWarning): - pass - - -class NotFoundAnyModelsWarning(PeakValidationWarning): - pass - - -class CanNotInitializeModelWarning(PeakValidationWarning): - pass - - -class PeakModelValidator: - """ - This class collects all BasePeak (=BASE_PEAK) type classes, which are costum lmfit type models, and - constructs an iterable collection of all defined Child class. - Each subclass of BasePeak is: - - validated: instance check - - filtered: optional - - sorted: sorting for valid models based on defined center position of BasePeak - - Followed by color assignment to each BasePeak and collection of lmfit_models - - """ - - # _standard_modules = [first_order_peaks, second_order_peaks, normalization_peaks] - BASE_PEAK = BasePeak - - ModelValidation = namedtuple( - "ModelValidation", "valid peak_group model_inst message" - ) - - CMAP_OPTIONS_DEFAULT = ("Dark2", "tab20") - fallback_color = (0.4, 0.4, 0.4, 1.0) - - debug = False - - def __init__(self, *args, cmap_options=CMAP_OPTIONS_DEFAULT, **kwargs): - self.debug = self._set_debug(**kwargs) - self._cmap_options = cmap_options - - self._inspect_models = self.get_subclasses_from_base(self.BASE_PEAK) - - self.valid_models = [] - self._invalid_models = [] - self.valid_models, self._invalid_models = self.validation_inspect_models( - inspect_models=self._inspect_models - ) - self.selected_models = self.filter_valid_models(self.valid_models) - self.selected_models = self.sort_selected_models(self.selected_models) - - self.lmfit_models = self.assign_colors_to_lmfit_mod_inst(self.selected_models) - self.add_model_names_var_names(self.lmfit_models) - - self.model_dict = self.get_model_dict(self.lmfit_models) - self.options = self.model_dict.keys() - - def _set_debug(self, **value): - _debug = self.debug - if isinstance(value, dict): - if "debug" in value.keys(): - _debug = bool(value.get("debug", False)) - return _debug - - def get_subclasses_from_base(self, _BaseClass): - """Finds subclasses of the BasePeak metaclass, these should give already valid models""" - - _all_subclasses = [] - if inspect.isclass(_BaseClass): - if hasattr(_BaseClass, "subclasses"): - _all_subclasses = _BaseClass.subclasses - elif hasattr(_BaseClass, "__subclassess__"): - _all_subclasses = _BaseClass.__subclasses__ - else: - warn( - f"\nNo baseclasses were found for {str(_BaseClass)}:\n missing attributes", - NotFoundAnyModelsWarning, - ) - else: - warn( - f"\nNo baseclasses were found for {str(_BaseClass)}:\n is not a class", - NotFoundAnyModelsWarning, - ) - - if not _all_subclasses: - warn( - f"\nNo baseclasses were found in inspected modules for {str(_BaseClass)}:\n", - NotFoundAnyModelsWarning, - ) - - return _all_subclasses - - def _inspect_modules_for_classes(self): - """Optional method Inspect other modules for subclasses""" - pass - - def validation_inspect_models(self, inspect_models: list = []): - """Validates each member of a list for making a valid model instance""" - _model_validations = [] - valid_models = [] - - for model in inspect_models: - _module = model.__module__ - try: - _succes, _inst, _msg = self.validate_model_instance(model) - except Exception as e: - _succes, _inst = False, model - _msg = f"Unexpected error for validate model instance : {e}\n" - finally: - _args = (_succes, _module, _inst, _msg) - _model_validations.append(self.ModelValidation(*_args)) - if self.debug: - print(_args) - - _invalid_models = [i for i in _model_validations if not i.valid] - valid_models = [i for i in _model_validations if i.valid] - - if not valid_models: - warn( - f'\nNo valid models were found in:\n {", ".join([str(i) for i in inspect_models])}\ - \t\nOnly invalid models: {", ".join([str(i) for i in _invalid_models])}.\n', - NotFoundAnyModelsWarning, - ) - - return valid_models, _invalid_models - - def filter_valid_models(self, value): - """Optional method for extra filters on valid model selection""" - return value - - def sort_selected_models(self, value): - """Sorting the selected valid models for color assigment etc..""" - _sorted = value - _setting_key = None - try: - _setting_key = [i for i in self.BASE_PEAK._fields if "param_hints" in i] - if value: - if _setting_key: - _sorted = sorted( - value, - key=lambda x: getattr(x.model_inst, _setting_key[0]).get( - "center", 0 - ), - ) - except Exception as e: - raise (f"Unable to sort:\n {value}\n{e}") - finally: - _sorted = sorted(_sorted, key=lambda x: x.peak_group) - return _sorted - - def validate_model_instance(self, value): - """ - Returns a boolean, model and message depending on the validation of the model class. - Invalid classes can raise warnings, but exception only when no valid models are found. - """ - - try: - if self.debug: - print(f"validate model inst value:", value) - _inst = value() - if self.debug: - print(f"validate model inst:", _inst) - except Exception as e: - _err = f"Unable to initialize model {value},\n{e}" - warn(f"{_err}", CanNotInitializeModelWarning) - return (False, value, _err) - - for field in self.BASE_PEAK._fields: - if not hasattr(_inst, field): - return (False, value, f"instance {_inst} has no attr {field}.\n") - if not getattr(_inst, field): - return (False, value, f"instance {_inst}, {field} is None.\n") - if "param_hints" in field: - _settings = getattr(_inst, field) - _center = _settings.get("center", None) - if not _center: - return ( - False, - value, - f"instance {_inst}, settings {_settings} center is None.\n", - ) - return (True, _inst, f"{_inst} is a valid model") - - @staticmethod - def get_cmap_list( - lst, cmap_options: Tuple = (), fallback_color: Tuple = () - ) -> Tuple: - cmap = [(0, 0, 0, 1) for i in lst] # black as fallback default color - - # set fallback color from class - if isinstance(fallback_color, tuple): - if len(fallback_color) == 4: - cmap = [fallback_color for i in lst] - - # set cmap colors from cmap options - if cmap_options: - try: - pltcmaps = [plt.get_cmap(cmap) for cmap in cmap_options] - # Take shortest colormap but not - cmap = min( - [i for i in pltcmaps if len(lst) <= len(i.colors)], - key=lambda x: len(x.colors), - default=cmap, - ) - # if succesfull - if "ListedColormap" in str(type(cmap)): - cmap = cmap.colors - - except Exception as exc: - logger.warning(f"get_cmap_list error setting cmap colors:{exc}") - - return cmap - - def assign_colors_to_lmfit_mod_inst(self, selected_models: list): - cmap_get = self.get_cmap_list( - selected_models, - cmap_options=self._cmap_options, - fallback_color=self.fallback_color, - ) - lmfit_models = [] - for n, _arg in enumerate(selected_models): - _m_inst = _arg.model_inst - _m_inst._modelvalidation = _arg - _m_inst.color = ", ".join([str(i) for i in cmap_get[n]]) - _m_inst._lenpars = len(_m_inst.peak_model.param_names) - lmfit_models.append(_m_inst) - return lmfit_models - - def add_standard_init_params(self): - self.standard_init_params = Parameters() - self.standard_init_params.add_many(*BasePeak._params_guesses_base) - - def add_model_names_var_names(self, lmfit_models): - _mod_param_names = { - i.peak_model.name: i.peak_model.param_names for i in lmfit_models - } - return _mod_param_names - - def get_df_models_parameters(self): - _models = pd.DataFrame( - [ - ( - i.model.name, - len(i.peak_model.param_names), - ", ".join(i.peak_model.param_names), - ) - for i in self.lmfit_models - ], - columns=["Model_EEC", "model_lenpars", "model_parnames"], - ) - return _models - - def get_model_dict(self, lmfit_models): - model_dict = {i.__class__.__name__: i for i in lmfit_models} - return model_dict - - def get_dict(self): - return { - i.__module__ + ", " + i.__class__.__name__: i for i in self.lmfit_models - } - - def __getattr__(self, name): - try: - _options = self.__getattribute__("options") - if name in _options: - return self.model_dict.get(name, None) - raise AttributeError( - f'Chosen name "{name}" not in options: "{", ".join(_options)}".' - ) - except AttributeError: - raise AttributeError(f'Chosen name "{name}" not in attributes') - - def normalization_model(self): - pass # IDEA separate peaks in groups - - def __iter__(self): - for mod_inst in self.lmfit_models: - yield mod_inst - - def __repr__(self): - _repr = "Validated Peak model collection" - if self.selected_models: - _selmods = f", {len(self.selected_models)} models from: " + "\n\t- " - _repr += _selmods - _joinmods = "\n\t- ".join( - [f"{i.peak_group}: {i.model_inst} \t" for i in self.selected_models] - ) - _repr += _joinmods - else: - _repr += ", empty selected models" - return _repr - - -if __name__ == "__main__": - a = PeakModelValidator(debug=True) diff --git a/src/raman_fitting/delegating/__init__.py b/src/raman_fitting/delegating/__init__.py index d76ea1d..e69de29 100644 --- a/src/raman_fitting/delegating/__init__.py +++ b/src/raman_fitting/delegating/__init__.py @@ -1 +0,0 @@ -# import main_delegator# diff --git a/src/raman_fitting/delegating/main_delegator.py b/src/raman_fitting/delegating/main_delegator.py index 2889070..c740497 100644 --- a/src/raman_fitting/delegating/main_delegator.py +++ b/src/raman_fitting/delegating/main_delegator.py @@ -1,284 +1,197 @@ # pylint: disable=W0614,W0401,W0611,W0622,C0103,E0401,E0402 -import logging -import sys -from pathlib import Path +from dataclasses import dataclass, field +from typing import Dict, List, Sequence, Any -import pandas as pd +from raman_fitting.config.path_settings import ( + RunModes, + ERROR_MSG_TEMPLATE, + initialize_run_mode_paths, +) +from raman_fitting.config import settings + +from raman_fitting.imports.models import RamanFileInfo + +from raman_fitting.models.deconvolution.base_model import BaseLMFitModel +from raman_fitting.models.splitter import RegionNames +from raman_fitting.exports.exporter import ExportManager +from raman_fitting.imports.files.file_indexer import ( + RamanFileIndex, + groupby_sample_group, + groupby_sample_id, + IndexSelector, + initialize_index_from_source_files, +) -from raman_fitting.config.filepath_helper import get_directory_paths_for_run_mode -from raman_fitting.deconvolution_models.fit_models import Fitter -from raman_fitting.deconvolution_models.base_model import InitializeModels -from raman_fitting.exporting.exporter import Exporter -from raman_fitting.indexing.indexer import MakeRamanFilesIndex -from raman_fitting.processing.spectrum_constructor import ( - SpectrumDataCollection, - SpectrumDataLoader, +from raman_fitting.delegating.models import ( + AggregatedSampleSpectrumFitResult, ) -from raman_fitting.utils.exceptions import MainDelegatorError +from raman_fitting.delegating.pre_processing import ( + prepare_aggregated_spectrum_from_files, +) +from raman_fitting.types import LMFitModelCollection +from raman_fitting.delegating.run_fit_spectrum import run_fit_over_selected_models -from raman_fitting.processing.spectrum_template import SpectrumTemplate -from raman_fitting.interfaces.cli import RUN_MODES -if __name__ == "__main__": - pass - -logger = logging.getLogger(__name__) +from loguru import logger +@dataclass class MainDelegator: # IDEA Add flexible input handling for the cli, such a path to dir, or list of files # or create index when no kwargs are given. """ Main delegator for the processing of files containing Raman spectra. - Input parameters is DataFrame of index Creates plots and files in the config RESULTS directory. """ - def __init__(self, run_mode="normal", **kwargs): - self.kwargs = kwargs - self._cqnm = __class__.__qualname__ - - self.run_mode = run_mode - if run_mode not in RUN_MODES: - logger.warning( - f"{self}\n\twarning run_mode choice {run_mode} not in\n\t{RUN_MODES}" - ) - - self.dest_dirs = get_directory_paths_for_run_mode(run_mode=run_mode) - self.RESULTS_DIR = self.dest_dirs["RESULTS_DIR"] - self.DATASET_DIR = self.dest_dirs["DATASET_DIR"] - self.INDEX_FILE = self.dest_dirs["INDEX_FILE"] - - self.spectrum = SpectrumTemplate() - - self.run_delegator(**self.kwargs) - - def index_delegator(self, **kwargs): - RF_index = MakeRamanFilesIndex( - **kwargs, - ) - logger.info(f"index_delegator index prepared with len {len(RF_index)}") - return RF_index - - def run_delegator(self, **kwargs): - # IDEA remove self.set_models() removed InitModels - self._failed_samples = [] - self.export_collect = [] - - # assert type(self.index) == type(pd.DataFrame()) - if self.run_mode in ("normal", "DEBUG", "make_index", "make_examples"): - RF_indexer = self.index_delegator( - run_mode=self.run_mode, dataset_dirs=self.dest_dirs, **kwargs + run_mode: RunModes + use_multiprocessing: bool = False + lmfit_models: LMFitModelCollection = field( + default_factory=lambda: settings.default_models + ) + fit_model_region_names: Sequence[RegionNames] = field( + default=(RegionNames.first_order, RegionNames.second_order) + ) + fit_model_specific_names: Sequence[str] | None = None + sample_ids: Sequence[str] = field(default_factory=list) + sample_groups: Sequence[str] = field(default_factory=list) + index: RamanFileIndex = None + selection: Sequence[RamanFileInfo] = field(init=False) + selected_models: Sequence[RamanFileInfo] = field(init=False) + + results: Dict[str, Any] | None = field(default=None, init=False) + export: bool = True + + def __post_init__(self): + run_mode_paths = initialize_run_mode_paths(self.run_mode) + if self.index is None: + raman_files = run_mode_paths.dataset_dir.glob("*.txt") + index_file = run_mode_paths.index_file + self.index = initialize_index_from_source_files( + files=raman_files, index_file=index_file, force_reindex=True ) - self.index = RF_indexer.index_selection - - if self.index.empty: - logger.warning(f"{self._cqnm} index selection empty") - - if self.run_mode == "make_index": - logger.info( - f"{self._cqnm} Debug run mode {self}. Index loaded {RF_indexer}" - ) - sys.exit(0) - models = self.initialize_default_models() - self.kwargs.update({"models": models}) - # IDEA built in a model selection keyword, here or at fitting level and for the cli - logger.info( - f"\n{self._cqnm} models initialized for run mode ({self.run_mode}):\n\n{repr(models)}" + self.selection = self.select_samples_from_index() + self.selected_models = self.select_models_from_provided_models() + self.main_run() + if self.export: + self.exports = self.call_export_manager() + + def select_samples_from_index(self) -> Sequence[RamanFileInfo]: + index = self.index + # breakpoint() + index_selector = IndexSelector( + **dict( + raman_files=index.raman_files, + sample_groups=self.sample_groups, + sample_ids=self.sample_ids, ) - - if self.run_mode in ("normal", "make_examples"): - if not self.index.empty: - logger.debug(f"{self._cqnm}. starting run generator.") - - self._run_gen(**self.kwargs) - else: - pass - # info raman loop finished because index is empty - elif self.run_mode == "DEBUG": - logger.debug(f"Debug run mode {self}. Models initialized {models}") - - try: - # self._run_gen() # IDEA add extra test runs in tests dir - pass - except Exception as e: - raise MainDelegatorError( - "The debug run failed. " f" on {self} because {e}" - ) - # raise('Error in DEBUG run: ', e) - else: - logger.warning(f"Debug run mode {self.run_mode} not recognized") - # IDEA get testing from index and run - else: - logger.warning(f'Debug run mode "{self.run_mode}" not recognized not in ') - # warning run mode not recognized - - def initialize_default_models(self): - try: - return InitializeModels() - except Exception as e: - raise MainDelegatorError( - "The initialization of models failed. " f" on {self} with excp: {e}" - ) - return None - - def sample_group_gen(self): - """Generator for Sample Groups, yields the name of group and group of the index SampleGroup""" - for grpnm, sGrp_grp in self.index.groupby( - self.spectrum.grp_names.sGrp_cols[0] - ): # Loop over SampleGroups - yield grpnm, sGrp_grp - - def _sID_gen(self, grpnm, sGrp_grp): - """Generator for SampleIDs, yields the name of group, name of SampleID and group of the index of the SampleID""" - for nm, sID_grp in sGrp_grp.groupby( - list(self.spectrum.grp_names.sGrp_cols[1:]) - ): # Loop over SampleIDs within SampleGroup - yield (grpnm, nm, sID_grp) - - def _run_gen(self, **kwargs): - # #IDEA sort of coordinator coroutine, can implement still deque - sgrp_grpby = self.index.groupby(self.spectrum.grp_names.sGrp_cols[0]) - logger.info(f"{self._cqnm} _run_gen starting: {kwargs}") - _mygen = self._generator(sgrp_grpby, **kwargs) - Exporter(self.export_collect) # clean up and - logger.info( - f"\n{self._cqnm} run finished.\n Results saved in {self.RESULTS_DIR}" ) - - def _generator(self, sgrp_grpby, **kwargs): - export_collect = [] - for sgrpnm, sgrp_grp in sgrp_grpby: - sID_grpby = sgrp_grp.groupby(list(self.spectrum.grp_names.sGrp_cols[1:])) - logger.info(f"{self._cqnm} _generator starting group: {sgrpnm}") - exporter_sample = None - for sIDnm, sIDgrp in sID_grpby: - try: - exporter_sample = self.simple_process_sample_wrapper( - sgrpnm, sIDnm, sIDgrp, **kwargs - ) - except GeneratorExit: - logger.warning(f"{self._cqnm} _generator closed.") - return () - except Exception as e: - logger.warning(f"{self._cqnm} _generator exception: {e}") - export_collect.append(exporter_sample) - return export_collect - - def coordinator(self): - pass - - def simple_process_sample_wrapper(self, *sID_args, **kwargs): - logger.info( - f"{self._cqnm} starting simple process_sample_wrapper args:\n\t - {sID_args[0]}\n\t - {kwargs.keys()}" - ) - exporter_sample = None + selection = index_selector.selection + if not selection: + logger.info("Selection was empty.") + return selection + + def call_export_manager(self): + # breakpoint() + export = ExportManager(self.run_mode, self.results) + exports = export.export_files() + return exports + + # region_names:List[RegionNames], model_names: List[str] + def select_models_from_provided_models(self) -> LMFitModelCollection: + selected_region_names = self.fit_model_region_names + selected_model_names = self.fit_model_specific_names + selected_models = {} + for region_name, all_region_models in self.lmfit_models.items(): + if region_name not in selected_region_names: + continue + if not selected_model_names: + selected_models[region_name] = all_region_models + continue + selected_region_models = {} + for mod_name, mod_val in all_region_models.items(): + if mod_name not in selected_model_names: + continue + selected_region_models[mod_name] = mod_val + + selected_models[region_name] = selected_region_models + return selected_models + + def select_fitting_model( + self, region_name: RegionNames, model_name: str + ) -> BaseLMFitModel: try: - logger.debug( - f"{self._cqnm} simple process_sample_wrapper starting:\n\t - {sID_args[1]}" - ) - exporter_sample = self.process_sample(*sID_args, **kwargs) - if exporter_sample: - logger.debug( - f"{self._cqnm} simple process_sample_wrapper appending export:\n\t - {exporter_sample}" + return self.lmfit_models[region_name][model_name] + except KeyError as exc: + raise KeyError(f"Model {region_name} {model_name} not found.") from exc + + def main_run(self): + selection = self.select_samples_from_index() + if not self.fit_model_region_names: + logger.info("No model region names were selected.") + if not self.selected_models: + logger.info("No fit models were selected.") + + results = {} + + for group_name, grp in groupby_sample_group(selection): + results[group_name] = {} + for sample_id, sample_grp in groupby_sample_id(grp): + sgrp = list(sample_grp) + results[group_name][sample_id] = {} + _error_msg = None + + if not sgrp: + _err = "group is empty" + _error_msg = ERROR_MSG_TEMPLATE.format(group_name, sample_id, _err) + logger.debug(_error_msg) + results[group_name][sample_id]["errors"] = _error_msg + continue + + unique_positions = {i.sample.position for i in sgrp} + if len(unique_positions) <= len(sgrp): + # handle edge-case, multiple source files for a single position on a sample + _error_msg = f"Handle multiple source files for a single position on a sample, {group_name} {sample_id}" + results[group_name][sample_id]["errors"] = _error_msg + logger.debug(_error_msg) + model_result = run_fit_over_selected_models( + sgrp, + self.selected_models, + use_multiprocessing=self.use_multiprocessing, ) - self.export_collect.append(exporter_sample) - except Exception as e: - logger.warning( - f"{self._cqnm} simple process_sample_wrapper exception on call process sample: {e}" - ) - self._failed_samples.append((e, sID_args, kwargs)) - return exporter_sample + results[group_name][sample_id]["fit_results"] = model_result + self.results = results - def test_positions( - self, sGrp_grp, sIDnm, grp_cols=["FileStem", "SamplePos", "FilePath"] - ): - if sGrp_grp.FileStem.nunique() != sGrp_grp.SamplePos.nunique(): - logger.warning( - f"{sGrp_grp[grp_cols]} Unique files and positions not matching for {sIDnm}" - ) - return sGrp_grp.groupby(grp_cols), grp_cols - def process_sample(self, sgrpnm, sIDnm, sID_grp, **kwargs): - """ - Loops over individual Sample positions (files) from a SampleID and performs the - fitting, plotting and exporting. - """ - logger.info( - f"{self._cqnm} process_sample called:\n\t - {sgrpnm}, {sIDnm}\n\t - {kwargs.keys()}" +def get_results_over_selected_models( + raman_files: List[RamanFileInfo], models: LMFitModelCollection, fit_model_results +) -> Dict[RegionNames, AggregatedSampleSpectrumFitResult]: + results = {} + for region_name, region_grp in models.items(): + aggregated_spectrum = prepare_aggregated_spectrum_from_files( + region_name, raman_files ) - - models = kwargs.get("models", None) - - sGr_out = dict(zip(self.spectrum.grp_names.sGrp_cols, (sgrpnm,) + sIDnm)) - export_info_out = add_make_sample_group_destdirs(sID_grp) - sample_pos_grp, sPos_cols = self.test_positions( - sID_grp, sIDnm, list(self.spectrum.grp_names.sPos_cols) - ) - - sample_spectra = [] - for meannm, meangrp in sample_pos_grp: - logger.info(f"{self._cqnm} process sample mean loop file: {meannm}.") - sPos_out = dict(zip(self.spectrum.grp_names.sPos_cols, meannm)) - _spectrum_position_info_kwargs = {**sGr_out, **export_info_out, **sPos_out} - spectrum_data = SpectrumDataLoader( - file=meannm[-1], run_kwargs=_spectrum_position_info_kwargs, ovv=meangrp - ) - sample_spectra.append(spectrum_data) - if sample_spectra: - spectra_collection = SpectrumDataCollection(sample_spectra) - ft = Fitter(spectra_collection, RamanModels=models) - rex = Exporter(ft) - return rex - else: - logger.info( - f"{self._cqnm} process sample spectra empty {','.join(map(str,[sgrpnm, sIDnm]))}." - ) - return None - - def __repr__(self): - return f'Maindelegator: run_mode = {self.run_mode}, {", ".join([f"{k} = {str(val)}" for k,val in self.kwargs.items()])}' - - -def add_make_sample_group_destdirs(sample_grp: pd.DataFrame): - dest_grp_dir = Path( - sample_grp.DestDir.unique()[0] - ) # takes one destination directory from Sample Groups - dest_fit_plots = dest_grp_dir.joinpath("Fitting_Plots") - dest_fit_comps = dest_grp_dir.joinpath("Fitting_Components") - dest_fit_comps.mkdir(parents=True, exist_ok=True) - - dest_raw_data_dir = dest_grp_dir.joinpath("Raw_Data") - dest_raw_data_dir.mkdir(parents=True, exist_ok=True) - - export_info = { - "DestGrpDir": dest_grp_dir, - "DestFittingPlots": dest_fit_plots, - "DestFittingComps": dest_fit_comps, - "DestRaw": dest_raw_data_dir, - } - return export_info - - -def process_sample_wrapper(fn, *args, **kwargs): - def wrapper(*args, **kwargs): - logger.debug( - f"process_sample_wrapper args:\n\t - {fn}\n\t - {args}\n\t - {kwargs.keys()}" + if aggregated_spectrum is None: + continue + fit_region_results = AggregatedSampleSpectrumFitResult( + region_name=region_name, + aggregated_spectrum=aggregated_spectrum, + fit_model_results=fit_model_results, ) - exp_sample = None - try: - exp_sample = fn(*args, **kwargs) - except Exception as e: - logger.error( - f"process_sample_wrapper process_sample_wrapper exception on call {fn}: {e}" - ) - exp_sample = (e, args, kwargs) - - return exp_sample + results[region_name] = fit_region_results + return results def make_examples(): - _main_run = MainDelegator(run_mode="make_examples") + # breakpoint() + _main_run = MainDelegator( + run_mode="pytest", fit_model_specific_names=["2peaks", "3peaks", "2nd_4peaks"] + ) + _main_run.main_run() return _main_run + + +if __name__ == "__main__": + example_run = make_examples() diff --git a/src/raman_fitting/delegating/models.py b/src/raman_fitting/delegating/models.py new file mode 100644 index 0000000..5e59730 --- /dev/null +++ b/src/raman_fitting/delegating/models.py @@ -0,0 +1,29 @@ +# pylint: disable=W0614,W0401,W0611,W0622,C0103,E0401,E0402 +from typing import Dict, Sequence + +from pydantic import BaseModel + +from raman_fitting.imports.models import RamanFileInfo + +from raman_fitting.models.spectrum import SpectrumData +from raman_fitting.models.fit_models import SpectrumFitModel +from raman_fitting.models.splitter import RegionNames +from raman_fitting.imports.spectrumdata_parser import SpectrumReader +from raman_fitting.processing.post_processing import SpectrumProcessor + + +class PreparedSampleSpectrum(BaseModel): + file_info: RamanFileInfo + read: SpectrumReader + processed: SpectrumProcessor + + +class AggregatedSampleSpectrum(BaseModel): + sources: Sequence[PreparedSampleSpectrum] + spectrum: SpectrumData + + +class AggregatedSampleSpectrumFitResult(BaseModel): + region_name: RegionNames + aggregated_spectrum: AggregatedSampleSpectrum + fit_model_results: Dict[str, SpectrumFitModel] diff --git a/src/raman_fitting/delegating/pre_processing.py b/src/raman_fitting/delegating/pre_processing.py new file mode 100644 index 0000000..f58b63c --- /dev/null +++ b/src/raman_fitting/delegating/pre_processing.py @@ -0,0 +1,44 @@ +from typing import List + +from raman_fitting.models.splitter import RegionNames +from raman_fitting.imports.spectrumdata_parser import SpectrumReader +from raman_fitting.processing.post_processing import SpectrumProcessor +from raman_fitting.imports.models import RamanFileInfo +from .models import ( + AggregatedSampleSpectrum, + PreparedSampleSpectrum, +) + +from loguru import logger + +from raman_fitting.config.path_settings import CLEAN_SPEC_REGION_NAME_PREFIX +from ..imports.spectrum.spectra_collection import SpectraDataCollection + + +def prepare_aggregated_spectrum_from_files( + region_name: RegionNames, raman_files: List[RamanFileInfo] +) -> AggregatedSampleSpectrum | None: + select_region_key = f"{CLEAN_SPEC_REGION_NAME_PREFIX}{region_name}" + clean_data_for_region = [] + data_sources = [] + for i in raman_files: + read = SpectrumReader(i.file) + processed = SpectrumProcessor(read.spectrum) + prepared_spec = PreparedSampleSpectrum( + file_info=i, read=read, processed=processed + ) + data_sources.append(prepared_spec) + selected_clean_data = processed.clean_spectrum.spec_regions[select_region_key] + clean_data_for_region.append(selected_clean_data) + if not clean_data_for_region: + logger.warning( + f"prepare_mean_data_for_fitting received no files. {region_name}" + ) + return + spectra_collection = SpectraDataCollection( + spectra=clean_data_for_region, region_name=region_name + ) + aggregated_spectrum = AggregatedSampleSpectrum( + sources=data_sources, spectrum=spectra_collection.mean_spectrum + ) + return aggregated_spectrum diff --git a/src/raman_fitting/delegating/run_fit_multi.py b/src/raman_fitting/delegating/run_fit_multi.py new file mode 100644 index 0000000..9396a0e --- /dev/null +++ b/src/raman_fitting/delegating/run_fit_multi.py @@ -0,0 +1,54 @@ +from typing import Dict, List + +from loguru import logger +from mpire import WorkerPool + +from raman_fitting.models.fit_models import SpectrumFitModel + + +def run_fit_multi(**kwargs) -> SpectrumFitModel: + # include optional https://lmfit.github.io/lmfit-py/model.html#saving-and-loading-modelresults + spectrum = kwargs.pop("spectrum") + model = kwargs.pop("model") + lmfit_model = model["lmfit_model"] + region = kwargs.pop("region") + import time + + lmfit_kwargs = {} + if "method" not in kwargs: + lmfit_kwargs["method"] = "leastsq" + + init_params = lmfit_model.make_params() + start_time = time.time() + x, y = spectrum["ramanshift"], spectrum["intensity"] + out = lmfit_model.fit(y, init_params, x=x, **lmfit_kwargs) # 'leastsq' + end_time = time.time() + elapsed_seconds = abs(start_time - end_time) + elapsed_time = elapsed_seconds + logger.debug( + f"Fit with model {model['name']} on {region} success: {out.success} in {elapsed_time:.2f}s." + ) + return out + + +def run_fit_multiprocessing( + spec_fits: List[SpectrumFitModel], +) -> Dict[str, SpectrumFitModel]: + spec_fits_dumps = [i.model_dump() for i in spec_fits] + + with WorkerPool(n_jobs=4, use_dill=True) as pool: + results = pool.map( + run_fit_multi, spec_fits_dumps, progress_bar=True, progress_bar_style="rich" + ) + # patch spec_fits, setattr fit_result + fit_model_results = {} + for result in results: + _spec_fit_search = [ + i for i in spec_fits if i.model.lmfit_model.name == result.model.name + ] + if len(_spec_fit_search) != 1: + continue + _spec_fit = _spec_fit_search[0] + _spec_fit.fit_result = result + fit_model_results[_spec_fit.model.name] = _spec_fit + return fit_model_results diff --git a/src/raman_fitting/delegating/run_fit_spectrum.py b/src/raman_fitting/delegating/run_fit_spectrum.py new file mode 100644 index 0000000..2f16f76 --- /dev/null +++ b/src/raman_fitting/delegating/run_fit_spectrum.py @@ -0,0 +1,65 @@ +from typing import List, Dict + +from raman_fitting.delegating.run_fit_multi import run_fit_multiprocessing +from raman_fitting.models.spectrum import SpectrumData +from raman_fitting.types import LMFitModelCollection +from raman_fitting.delegating.models import AggregatedSampleSpectrumFitResult +from raman_fitting.delegating.pre_processing import ( + prepare_aggregated_spectrum_from_files, +) +from raman_fitting.imports.models import RamanFileInfo +from raman_fitting.models.deconvolution.spectrum_regions import RegionNames +from raman_fitting.models.fit_models import SpectrumFitModel + +from loguru import logger + + +def run_fit_over_selected_models( + raman_files: List[RamanFileInfo], + models: LMFitModelCollection, + use_multiprocessing: bool = False, +) -> Dict[RegionNames, AggregatedSampleSpectrumFitResult]: + results = {} + for region_name, model_region_grp in models.items(): + aggregated_spectrum = prepare_aggregated_spectrum_from_files( + region_name, raman_files + ) + if aggregated_spectrum is None: + continue + spec_fits = prepare_spec_fit_regions( + aggregated_spectrum.spectrum, model_region_grp + ) + if use_multiprocessing: + fit_model_results = run_fit_multiprocessing(spec_fits) + else: + fit_model_results = run_fit_loop(spec_fits) + fit_region_results = AggregatedSampleSpectrumFitResult( + region_name=region_name, + aggregated_spectrum=aggregated_spectrum, + fit_model_results=fit_model_results, + ) + results[region_name] = fit_region_results + return results + + +def prepare_spec_fit_regions( + spectrum: SpectrumData, model_region_grp +) -> List[SpectrumFitModel]: + spec_fits = [] + for model_name, model in model_region_grp.items(): + region = model.region_name.name + spec_fit = SpectrumFitModel(spectrum=spectrum, model=model, region=region) + spec_fits.append(spec_fit) + return spec_fits + + +def run_fit_loop(spec_fits: List[SpectrumFitModel]) -> Dict[str, SpectrumFitModel]: + fit_model_results = {} + for spec_fit in spec_fits: + # include optional https://lmfit.github.io/lmfit-py/model.html#saving-and-loading-modelresults + spec_fit.run_fit() + logger.debug( + f"Fit with model {spec_fit.model.name} on {spec_fit.region} success: {spec_fit.fit_result.success} in {spec_fit.elapsed_time:.2f}s." + ) + fit_model_results[spec_fit.model.name] = spec_fit + return fit_model_results diff --git a/src/raman_fitting/docker/run_make_examples.py b/src/raman_fitting/docker/run_make_examples.py deleted file mode 100644 index 2452896..0000000 --- a/src/raman_fitting/docker/run_make_examples.py +++ /dev/null @@ -1,22 +0,0 @@ -""" -Created on Thu Jul 22 11:45:34 2021 - -@author: DW - -For this to work, the app needs be installed inside the docker container - -""" -from time import sleep - -# import raman_fitting -from .api import make_examples - -# from ..api import make_examples - -if __name__ == "__main__": - print( - 'Hello Docker World, the raman_fitting "make examples" command from the containter is starting in 3 seconds...' - ) - sleep(2) - print("\n...and Go!.....\n") - make_examples() diff --git a/src/raman_fitting/datafiles/example_files/Si_spectrum01.txt b/src/raman_fitting/example_fixtures/Si_spectrum01.txt similarity index 100% rename from src/raman_fitting/datafiles/example_files/Si_spectrum01.txt rename to src/raman_fitting/example_fixtures/Si_spectrum01.txt diff --git a/src/raman_fitting/deconvolution_models/__init__.py b/src/raman_fitting/example_fixtures/__init__.py similarity index 100% rename from src/raman_fitting/deconvolution_models/__init__.py rename to src/raman_fitting/example_fixtures/__init__.py diff --git a/src/raman_fitting/datafiles/example_files/testDW38C_pos1.txt b/src/raman_fitting/example_fixtures/testDW38C_pos1.txt similarity index 100% rename from src/raman_fitting/datafiles/example_files/testDW38C_pos1.txt rename to src/raman_fitting/example_fixtures/testDW38C_pos1.txt diff --git a/src/raman_fitting/datafiles/example_files/testDW38C_pos2.txt b/src/raman_fitting/example_fixtures/testDW38C_pos2.txt similarity index 100% rename from src/raman_fitting/datafiles/example_files/testDW38C_pos2.txt rename to src/raman_fitting/example_fixtures/testDW38C_pos2.txt diff --git a/src/raman_fitting/datafiles/example_files/testDW38C_pos3.txt b/src/raman_fitting/example_fixtures/testDW38C_pos3.txt similarity index 100% rename from src/raman_fitting/datafiles/example_files/testDW38C_pos3.txt rename to src/raman_fitting/example_fixtures/testDW38C_pos3.txt diff --git a/src/raman_fitting/datafiles/example_files/testDW38C_pos4.txt b/src/raman_fitting/example_fixtures/testDW38C_pos4.txt similarity index 100% rename from src/raman_fitting/datafiles/example_files/testDW38C_pos4.txt rename to src/raman_fitting/example_fixtures/testDW38C_pos4.txt diff --git a/src/raman_fitting/exporting/__init__.py b/src/raman_fitting/exporting/__init__.py deleted file mode 100644 index 792d600..0000000 --- a/src/raman_fitting/exporting/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# diff --git a/src/raman_fitting/exporting/database.py b/src/raman_fitting/exporting/database.py deleted file mode 100644 index 3dfebbc..0000000 --- a/src/raman_fitting/exporting/database.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Mon May 10 16:15:25 2021 - -@author: zmg -""" - -import sqlite3 - -from raman_fitting.config import filepath_settings - - -class RamanDB: - def __init__(self): - self.dbpath = filepath_settings.RESULTS_DIR.joinpath("sqlite.db") - - def conn(self): - self.conn = sqlite3.connect(self.dbpath) diff --git a/src/raman_fitting/exporting/exporter.py b/src/raman_fitting/exporting/exporter.py deleted file mode 100644 index 965603f..0000000 --- a/src/raman_fitting/exporting/exporter.py +++ /dev/null @@ -1,126 +0,0 @@ -import pandas as pd - -from raman_fitting.exporting.plotting import fit_spectrum_plot, raw_data_export - -import logging - -logger = logging.getLogger(__name__) - - -class ExporterError(Exception): - """Error occured during the exporting functions""" - - -class Exporter: - """ - The Exporter class handles all the exporting of spectra and models - into figures and xlsx files. - - """ - - def __init__(self, arg, raw_out=True, plot=True, model_names_prefix=["1st", "2nd"]): - self.raw_out = raw_out - self.plot = plot - try: - self.delegator(arg) - except ExporterError: - logger.warning( - f"{self.__class__.__qualname__} failed export from {type(arg)}" - ) - except Exception as e: - logger.error( - f"{self.__class__.__qualname__} failed export with unexpected error {e}" - ) - - # Exporting and Plotting - def delegator(self, arg): - self.fitter = arg - if "Fitter" in type(arg).__name__: - self.fitter = arg - self.split_results() - - if self.raw_out: - self.raw_export() - - if self.plot: - self.export_fitting_plotting_models() - elif isinstance(arg, list): - # "list" in type([]).__name__: - # FIXME - try: - self.export_from_list(arg) - except Exception as e: - logger.error( - "f{self.__class__.__qualname__} failed export from list", e - ) - else: - logger.warning( - "f{self.__class__.__qualname__} failed export from unknown arg type {type(arg)}" - ) - raise ExporterError - - def export_from_list(self, arg): - fitter_args = [i for i in arg if hasattr(arg, "fitter")] - if fitter_args: - FitRes = pd.concat( - [ - val.FitParameters - for exp in fitter_args - for k, val in exp.fitter.FitResults.items() - ] - ) - _info = fitter_args[0].fitter.info - self.export_fitparams_grp_per_model(FitRes, _info) - - def export_fitparams_grp_per_model(self, FitRes, _info): - DestGrpDir = _info.get("DestGrpDir") - grpnm = _info["SampleGroup"] - for pknm, pkgrp in FitRes.groupby(level=0): - peak_destpath = DestGrpDir.joinpath(f"{grpnm}_FitParameters_{pknm}") - pkgrp.dropna(axis=1).to_excel( - peak_destpath.with_suffix(".xlsx"), index=False - ) - - def raw_export(self): - raw_data_export(self.fitter.spectra_arg.fitting_spectra) - - def export_fitting_plotting_models(self): - pars1, pars2 = [], [] - - _1st = { - k: val for k, val in self.fitter.FitResults.items() if k.startswith("1st") - } - _2nd = { - k: val for k, val in self.fitter.FitResults.items() if k.startswith("2nd") - } - - for modname_2, fitres_2 in _2nd.items(): - self.export_xls_from_spec(fitres_2) - pars2.append(fitres_2.FitParameters) - for modname_1, fitres_1 in _1st.items(): - self.export_xls_from_spec(fitres_1) - try: - fit_spectrum_plot( - modname_1, - modname_2, - fitres_1, - fitres_2, - plot_Annotation=True, - plot_Residuals=True, - ) - except Exception as e: - print( - f"Error fit_spectrum_plot:{modname_1}, {fitres_1.raw_data_col}.\n {e}" - ) - pars1.append(fitres_1.FitParameters) - return pd.concat(pars1, sort=False), pd.concat(pars2, sort=False) - - def export_xls_from_spec(self, res_peak_spec): - try: - res_peak_spec.FitComponents.to_excel( - res_peak_spec.extrainfo["DestFittingModel"].with_suffix(".xlsx"), - index=False, - ) - - except Exception as e: - print("Error export_xls_from_spec", e) diff --git a/src/raman_fitting/exporting/plotting.py b/src/raman_fitting/exporting/plotting.py deleted file mode 100644 index 74b6e7a..0000000 --- a/src/raman_fitting/exporting/plotting.py +++ /dev/null @@ -1,296 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Jan 29 14:49:50 2020 - -@author: DW -""" -import matplotlib -import matplotlib.lines as mlines -import matplotlib.pyplot as plt -from matplotlib import gridspec -from matplotlib.ticker import AutoMinorLocator, FormatStrFormatter, MultipleLocator - -matplotlib.rcParams.update({"font.size": 14}) - -# %% - - -# IDEA PLOTTING PER PEAK MODEL -def plotting_info(windowname): # pragma: no cover - axes = { - "full": (0, 0), - "low": (0, 1), - "1st_order": (0, 2), - "mid": (1, 1), - "2nd_order": (1, 2), - "normalization": (1, 0), - } - return axes.get(windowname) - - -def raw_data_export(fitting_specs): # pragma: no cover - current_sample = fitting_specs[0].windowname, fitting_specs[0].sIDmean_col - try: - raw_data_spectra_plot(fitting_specs) - except Exception as e: - print("no extra Raw Data plots for {1} \n {0}".format(e, current_sample)) - try: - raw_data_spectra_export(fitting_specs) - except Exception as e: - print("no extra Raw Data plots for {1} \n {0}".format(e, current_sample)) - - -def raw_data_spectra_plot(fitting_specs): # pragma: no cover - # fitting_specs - try: - fig, ax = plt.subplots(2, 3, figsize=(18, 12)) - ax_wn = [] - - for spec in fitting_specs: - try: - ax_wn = ax[plotting_info(spec.windowname)] - _legend = True if "full" == spec.windowname else False - spec.mean_spec.plot( - x="ramanshift", - y=spec.sID_rawcols, - ax=ax_wn, - alpha=0.5, - legend=_legend, - ) - spec.mean_spec.plot( - x="ramanshift", - y=spec.sIDmean_col, - ax=ax_wn, - c="k", - alpha=0.7, - lw=3, - legend=_legend, - ) - - ax_wn.set_title(spec.windowname) - if _legend: - ax_wn.legend(fontsize=10) - - except: - pass - - plt.suptitle(spec.sIDmean_col, fontsize=16) - plt.savefig( - spec.mean_info.DestRaw.unique()[0].joinpath(f"{spec.sIDmean_col}.png"), - dpi=300, - bbox_inches="tight", - ) - plt.close() - except Exception as e: - print("no extra Raw Data plots: {0}".format(e)) - - -def raw_data_spectra_export(fitting_specs): - try: - for spec in fitting_specs: - wnxl_outpath_spectra = spec.mean_info.DestRaw.unique()[0].joinpath( - f"spectra_{spec.sIDmean_col}_{spec.windowname}.xlsx" - ) - spec.mean_spec.to_excel(wnxl_outpath_spectra) - - _0_spec = fitting_specs[0] - wnxl_outpath_info = _0_spec.mean_info.DestRaw.unique()[0].joinpath( - f"info_{_0_spec.sIDmean_col}.xlsx" - ) - _0_spec.mean_info.to_excel(wnxl_outpath_info) - except Exception as e: - print("no extra Raw Data plots: {0}".format(e)) - - -def fit_spectrum_plot( - peak1, - peak2, - res1_peak_spec, - res2_peak_spec, - plot_Annotation=True, - plot_Residuals=True, -): # pragma: no cover - modname_2 = peak2 - # %% - sID = res1_peak_spec.extrainfo["SampleID"] - SampleBgmean_col = res1_peak_spec.raw_data_col - - FitData_1st = res1_peak_spec.FitComponents - Model_peak_col_1st = res1_peak_spec.model_name - Model_data_col_1st = res1_peak_spec.model_name - compscols_1st = [ - i for i in FitData_1st.columns if i.endswith("_") and not i.startswith("Si") - ] - - FitData_2nd = res2_peak_spec.FitComponents - Model_peak_col_2nd = res2_peak_spec.model_name - Model_data_col_2nd = res2_peak_spec.model_name - compscols_2nd = [i for i in FitData_2nd.columns if i.endswith("_")] - - FitPars, FitPars_2nd = res1_peak_spec.FitParameters, res2_peak_spec.FitParameters - - fig = plt.figure(figsize=(28, 24)) - gs = gridspec.GridSpec(4, 1, height_ratios=[4, 1, 4, 1]) - ax = plt.subplot(gs[0]) - axRes = plt.subplot(gs[1]) - ax2nd = plt.subplot(gs[2]) - ax2ndRes = plt.subplot(gs[3]) - ax2ndRes.grid(True), axRes.grid(True, "both") - ax2nd.grid(True), ax.grid(True, "both") - ax.get_yaxis().set_tick_params(direction="in") - ax.get_xaxis().set_tick_params(direction="in") - ax.set_title(SampleBgmean_col) - # '%s' %FileName) - ax.xaxis.set_minor_locator(AutoMinorLocator(2)) - ax.yaxis.set_minor_locator(AutoMinorLocator(2)) - ax.tick_params(which="both", direction="in") - ax2nd.xaxis.set_minor_locator(AutoMinorLocator(2)) - ax2nd.yaxis.set_minor_locator(AutoMinorLocator(2)) - ax2nd.tick_params(which="both", direction="in") - ax.set_facecolor("oldlace"), ax2nd.set_facecolor("oldlace") - axRes.set_facecolor("oldlace"), ax2ndRes.set_facecolor("oldlace") - ax2nd.plot( - FitData_2nd["RamanShift"], - FitData_2nd[Model_data_col_2nd], - label=Model_data_col_2nd, - lw=3, - c="r", - ) - ax2nd.plot( - FitData_2nd["RamanShift"], - FitData_2nd[res2_peak_spec.raw_data_col], - label="Data", - lw=3, - c="grey", - alpha=0.5, - ) - if plot_Residuals: - ax2ndRes.plot( - FitData_2nd["RamanShift"], - FitData_2nd[res2_peak_spec.raw_data_col] - FitData_2nd[Model_data_col_2nd], - label="Residual", - lw=3, - c="k", - alpha=0.8, - ) - - for fit_comp_col_2nd in compscols_2nd: # automatic color cycle 'cyan' ... - ax2nd.plot( - FitData_2nd["RamanShift"], - FitData_2nd[fit_comp_col_2nd], - ls="--", - lw=4, - label=fit_comp_col_2nd, - ) - center_col, height_col = ( - fit_comp_col_2nd + "center", - fit_comp_col_2nd + "height", - ) - ax2nd.annotate( - f"{fit_comp_col_2nd}\n {FitPars_2nd[center_col].round(0).iloc[0]:.0f}", - xy=( - FitPars_2nd[center_col].iloc[0] * 0.97, - 0.8 * FitPars_2nd[height_col].iloc[0], - ), - xycoords="data", - ) - ax2nd.set_ylim(-0.02, FitData_2nd[Model_data_col_2nd].max() * 1.5) - ax.plot( - FitData_1st["RamanShift"], - FitData_1st[Model_data_col_1st], - label=Model_data_col_1st, - lw=3, - c="r", - ) - ax.plot( - FitData_1st["RamanShift"], - FitData_1st[res1_peak_spec.raw_data_col], - label="Data", - lw=3, - c="grey", - alpha=0.8, - ) - - if plot_Residuals: - axRes.plot( - FitData_1st["RamanShift"], - FitData_1st[res1_peak_spec.raw_data_col] - FitData_1st[Model_data_col_1st], - label="Residual", - lw=3, - c="k", - alpha=0.8, - ) - - for fit_comp_col_1st in compscols_1st: # automatic color cycle 'cyan' ... - ax.plot( - FitData_1st["RamanShift"], - FitData_1st[fit_comp_col_1st], - ls="--", - lw=4, - label=fit_comp_col_1st, - ) - center_col, height_col = ( - fit_comp_col_1st + "center", - fit_comp_col_1st + "height", - ) - ax.annotate( - f"{fit_comp_col_1st}:\n {FitPars[center_col].round(0).iloc[0]:.0f}", - xy=(FitPars[center_col].iloc[0] * 0.97, 0.7 * FitPars[height_col].iloc[0]), - xycoords="data", - ) - - if "peaks" in peak1 and peak1.endswith("+Si"): - ax.plot( - FitData_1st["RamanShift"], - FitData_1st["Si1_"], - "b--", - lw=4, - label="Si_substrate", - ) - if FitPars["Si1_fwhm"].iloc[0] > 1: - ax.annotate( - "Si_substrate:\n %.0f" % FitPars["Si1_center"], - xy=(FitPars["Si1_center"] * 0.97, 0.8 * FitPars["Si1_height"]), - xycoords="data", - ) - if plot_Annotation: - frsplit = res1_peak_spec.FitReport.split() - if len(frsplit) > 200: - fr = res1_peak_spec.FitReport.replace("prefix='D3_'", "prefix='D3_' \n") - else: - fr = res1_peak_spec.FitReport - props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) - Report1 = ax.text( - 1.01, - 1, - fr, - transform=ax.transAxes, - fontsize=11, - verticalalignment="top", - bbox=props, - ) - Report2 = ax2nd.text( - 1.01, - 0.7, - res2_peak_spec.FitReport, - transform=ax2nd.transAxes, - fontsize=11, - verticalalignment="top", - bbox=props, - ) - - ax.legend(loc=1), ax.set_xlabel("Raman shift (cm$^{-1}$)"), ax.set_ylabel( - "normalized I / a.u." - ) - ax2nd.legend(loc=1), ax2nd.set_xlabel("Raman shift (cm$^{-1}$)"), ax2nd.set_ylabel( - "normalized I / a.u." - ) - - plt.savefig( - res1_peak_spec.extrainfo["DestFittingModel"].with_suffix(".png"), - dpi=100, - bbox_extra_artists=(Report1, Report2), - bbox_inches="tight", - ) - plt.close() diff --git a/src/raman_fitting/docker/__init__.py b/src/raman_fitting/exports/__init__.py similarity index 100% rename from src/raman_fitting/docker/__init__.py rename to src/raman_fitting/exports/__init__.py diff --git a/src/raman_fitting/exports/exporter.py b/src/raman_fitting/exports/exporter.py new file mode 100644 index 0000000..af66816 --- /dev/null +++ b/src/raman_fitting/exports/exporter.py @@ -0,0 +1,57 @@ +from dataclasses import dataclass +from typing import Dict, Any +from raman_fitting.config.path_settings import ( + RunModes, + initialize_run_mode_paths, + ExportPathSettings, +) +from raman_fitting.config import settings + +from raman_fitting.exports.plotting_fit_results import fit_spectrum_plot +from raman_fitting.exports.plotting_raw_data import raw_data_spectra_plot + + +from loguru import logger + + +class ExporterError(Exception): + """Error occured during the exporting functions""" + + +@dataclass +class ExportManager: + run_mode: RunModes + results: Dict[str, Any] | None = None + + def __post_init__(self): + self.paths = initialize_run_mode_paths( + self.run_mode, user_package_home=settings.destination_dir + ) + + def export_files(self): + # breakpoint() self.results + exports = [] + for group_name, group_results in self.results.items(): + for sample_id, sample_results in group_results.items(): + export_dir = self.paths.results_dir / group_name / sample_id + export_paths = ExportPathSettings(results_dir=export_dir) + try: + raw_data_spectra_plot( + sample_results["fit_results"], export_paths=export_paths + ) + except Exception as exc: + logger.error(f"Plotting error, raw_data_spectra_plot: {exc}") + try: + fit_spectrum_plot( + sample_results["fit_results"], export_paths=export_paths + ) + except Exception as exc: + logger.error(f"plotting error fit_spectrum_plot: {exc}") + raise exc from exc + exports.append( + { + "sample": sample_results["fit_results"], + "export_paths": export_paths, + } + ) + return exports diff --git a/src/raman_fitting/exports/file_table.py b/src/raman_fitting/exports/file_table.py new file mode 100644 index 0000000..661beb7 --- /dev/null +++ b/src/raman_fitting/exports/file_table.py @@ -0,0 +1,31 @@ +from typing import List + +from raman_fitting.models.spectrum import SpectrumData + + +def raw_data_spectra_export(spectra: List[SpectrumData]): + try: + for spec in spectra: + wnxl_outpath_spectra = spec.mean_info.DestRaw.unique()[0].joinpath( + f"spectra_{spec.sIDmean_col}_{spec.regionname}.xlsx" + ) + spec.mean_spec.to_excel(wnxl_outpath_spectra) + + _0_spec = spectra[0] + wnxl_outpath_info = _0_spec.mean_info.DestRaw.unique()[0].joinpath( + f"info_{_0_spec.sIDmean_col}.xlsx" + ) + _0_spec.mean_info.to_excel(wnxl_outpath_info) + except Exception as e: + print("no extra Raw Data plots: {0}".format(e)) + + +def export_xls_from_spec(self, res_peak_spec): + try: + res_peak_spec.FitComponents.to_excel( + res_peak_spec.extrainfo["DestFittingModel"].with_suffix(".xlsx"), + index=False, + ) + + except Exception as e: + print("Error export_xls_from_spec", e) diff --git a/src/raman_fitting/exports/plot_formatting.py b/src/raman_fitting/exports/plot_formatting.py new file mode 100644 index 0000000..4e044b7 --- /dev/null +++ b/src/raman_fitting/exports/plot_formatting.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Apr 28 15:08:26 2021 + +@author: zmg +""" + +from collections import namedtuple +from typing import Sequence, Tuple + +from raman_fitting.models.splitter import RegionNames + +import matplotlib.pyplot as plt +from lmfit import Model as LMFitModel + +from loguru import logger + + +CMAP_OPTIONS_DEFAULT = ("Dark2", "tab20") +DEFAULT_COLOR = (0.4, 0.4, 0.4, 1.0) +COLOR_BLACK = (0, 0, 0, 1) # black as fallback default color + +ModelValidation = namedtuple("ModelValidation", "valid peak_group model_inst message") + + +PLOT_REGION_AXES = { + RegionNames.full: (0, 0), + RegionNames.low: (0, 1), + RegionNames.first_order: (0, 2), + RegionNames.mid: (1, 1), + RegionNames.second_order: (1, 2), + RegionNames.normalization: (1, 0), +} + + +class PeakValidationWarning(UserWarning): + pass + + +class NotFoundAnyModelsWarning(PeakValidationWarning): + pass + + +class CanNotInitializeModelWarning(PeakValidationWarning): + pass + + +def get_cmap_list( + length: int, + cmap_options: Tuple = CMAP_OPTIONS_DEFAULT, + default_color: Tuple = DEFAULT_COLOR, +) -> Tuple | None: + lst = list(range(length)) + if not lst: + return None + + # set fallback color from class + if isinstance(default_color, tuple) and default_color is not None: + if len(default_color) == 4: + cmap = [default_color for _ in lst] + return cmap + elif default_color is None: + cmap = [DEFAULT_COLOR for _ in lst] + else: + raise ValueError(f"default color is not tuple but {type(default_color)}") + + # set cmap colors from cmap options + if cmap_options: + try: + pltcmaps = [plt.get_cmap(cmap) for cmap in cmap_options] + # Take shortest colormap but not + cmap = min( + [i for i in pltcmaps if len(lst) <= len(i.colors)], + key=lambda x: len(x.colors), + default=cmap, + ) + # if succesfull + if "ListedColormap" in str(type(cmap)): + cmap = cmap.colors + + except Exception as exc: + logger.warning(f"get_cmap_list error setting cmap colors:{exc}") + + return cmap + + +def assign_colors_to_peaks(selected_models: Sequence[LMFitModel]) -> dict: + cmap_get = get_cmap_list(len(selected_models)) + annotated_models = {} + for n, peak in enumerate(selected_models): + color = ", ".join([str(i) for i in cmap_get[n]]) + lenpars = len(peak.param_names) + res = {"index": n, "color": color, "lenpars": lenpars, "peak": peak} + annotated_models[peak.prefix] = res + return annotated_models + + +def __repr__(self): + _repr = "Validated Peak model collection" + if self.selected_models: + _selmods = f", {len(self.selected_models)} models from: " + "\n\t- " + _repr += _selmods + _joinmods = "\n\t- ".join( + [f"{i.peak_group}: {i.model_inst} \t" for i in self.selected_models] + ) + _repr += _joinmods + else: + _repr += ", empty selected models" + return _repr diff --git a/src/raman_fitting/exports/plotting_fit_results.py b/src/raman_fitting/exports/plotting_fit_results.py new file mode 100644 index 0000000..5fe473e --- /dev/null +++ b/src/raman_fitting/exports/plotting_fit_results.py @@ -0,0 +1,301 @@ +from typing import Dict + +import matplotlib +import matplotlib.pyplot as plt +from matplotlib import gridspec +from matplotlib.axes import Axes + +from matplotlib.text import Text +from matplotlib.ticker import AutoMinorLocator + +from raman_fitting.imports.samples.models import SampleMetaData +from raman_fitting.models.fit_models import SpectrumFitModel + + +from raman_fitting.config.path_settings import ExportPathSettings +from raman_fitting.models.splitter import RegionNames +from raman_fitting.delegating.models import AggregatedSampleSpectrumFitResult + +from loguru import logger + + +matplotlib.rcParams.update({"font.size": 14}) +FIT_REPORT_MIN_CORREL = 0.7 + + +def fit_spectrum_plot( + aggregated_spectra: Dict[RegionNames, AggregatedSampleSpectrumFitResult], + export_paths: ExportPathSettings | None = None, + plot_annotation=True, + plot_residuals=True, +): # pragma: no cover + first_order = aggregated_spectra[RegionNames.first_order] + second_order = aggregated_spectra[RegionNames.second_order] + + sources = first_order.aggregated_spectrum.sources + sample = sources[0].file_info.sample + second_model_name = "2nd_4peaks" + second_model = second_order.fit_model_results.get(second_model_name) + for first_model_name, first_model in first_order.fit_model_results.items(): + prepare_combined_spectrum_fit_result_plot( + first_model, + second_model, + sample, + export_paths, + plot_annotation=plot_annotation, + plot_residuals=plot_residuals, + ) + + +def prepare_combined_spectrum_fit_result_plot( + first_model: SpectrumFitModel, + second_model: SpectrumFitModel, + sample: SampleMetaData, + export_paths: ExportPathSettings, + plot_annotation=True, + plot_residuals=True, +): + plt.figure(figsize=(28, 24)) + gs = gridspec.GridSpec(4, 1, height_ratios=[4, 1, 4, 1]) + ax = plt.subplot(gs[0]) + ax_res = plt.subplot(gs[1]) + ax.set_title(f"{sample.id}") + + first_model_name = first_model.model.name + + fit_plot_first(ax, ax_res, first_model, plot_residuals=plot_residuals) + _bbox_artists = None + if plot_annotation: + annotate_report_first = prepare_annotate_fit_report_first( + ax, first_model.fit_result + ) + _bbox_artists = (annotate_report_first,) + + if second_model is not None: + ax2nd = plt.subplot(gs[2]) + ax2nd_res = plt.subplot(gs[3]) + fit_plot_second(ax2nd, ax2nd_res, second_model, plot_residuals=plot_residuals) + if plot_annotation: + annotate_report_second = prepare_annotate_fit_report_second( + ax2nd, second_model.fit_result + ) + if annotate_report_second is not None: + _bbox_artists = (annotate_report_first, annotate_report_second) + + # set axes labels and legend + set_axes_labels_and_legend(ax) + + plot_special_si_components(ax, first_model) + if export_paths is not None: + savepath = export_paths.plots.joinpath(f"Model_{first_model_name}").with_suffix( + ".png" + ) + plt.savefig( + savepath, + dpi=100, + bbox_extra_artists=_bbox_artists, + bbox_inches="tight", + ) + logger.debug(f"Plot saved to {savepath}") + plt.close() + + +def fit_plot_first( + ax, ax_res, first_model: SpectrumFitModel, plot_residuals: bool = True +) -> matplotlib.text.Text | None: + first_result = first_model.fit_result + first_components = first_model.fit_result.components + first_eval_comps = first_model.fit_result.eval_components() + first_model_name = first_model.model.name + + ax.grid(True, "both") + ax_res.grid(True, "both") + ax.get_yaxis().set_tick_params(direction="in") + ax.get_xaxis().set_tick_params(direction="in") + + ax.xaxis.set_minor_locator(AutoMinorLocator(2)) + ax.yaxis.set_minor_locator(AutoMinorLocator(2)) + ax.tick_params(which="both", direction="in") + ax.set_facecolor("oldlace") + ax_res.set_facecolor("oldlace") + ax.plot( + first_model.spectrum.ramanshift, + first_result.best_fit, + label=first_model_name, + lw=3, + c="r", + ) + ax.plot( + first_model.spectrum.ramanshift, + first_result.data, + label="Data", + lw=3, + c="grey", + alpha=0.8, + ) + + if plot_residuals: + ax_res.plot( + first_model.spectrum.ramanshift, + first_result.residual, + label="Residual", + lw=3, + c="k", + alpha=0.8, + ) + + for _component in first_components: # automatic color cycle 'cyan' ... + peak_name = _component.prefix.rstrip("_") + ax.plot( + first_model.spectrum.ramanshift, + first_eval_comps[_component.prefix], + ls="--", + lw=4, + label=peak_name, + ) + center_col = _component.prefix + "center" + ax.annotate( + f"{peak_name}:\n {first_result.best_values[center_col]:.0f}", + xy=( + first_result.best_values[center_col] * 0.97, + 0.7 * first_result.params[_component.prefix + "height"].value, + ), + xycoords="data", + ) + + +def fit_plot_second( + ax2nd, ax2nd_res, second_model: SpectrumFitModel, plot_residuals: bool = True +) -> None: + if second_model: + second_result = second_model.fit_result + second_components = second_model.fit_result.components + second_eval_comps = second_model.fit_result.eval_components() + second_model_name = second_model.model.name + else: + second_components = [] + second_result = None + second_model_name = None + second_eval_comps = None + if second_model: + ax2nd.grid(True) + ax2nd_res.grid(True) + ax2nd.xaxis.set_minor_locator(AutoMinorLocator(2)) + ax2nd.yaxis.set_minor_locator(AutoMinorLocator(2)) + ax2nd.tick_params(which="both", direction="in") + ax2nd.set_facecolor("oldlace") + ax2nd_res.set_facecolor("oldlace") + if second_result is not None: + ax2nd.plot( + second_model.spectrum.ramanshift, + second_result.best_fit, + label=second_model_name, + lw=3, + c="r", + ) + ax2nd.plot( + second_model.spectrum.ramanshift, + second_result.data, + label="Data", + lw=3, + c="grey", + alpha=0.5, + ) + if plot_residuals: + ax2nd_res.plot( + second_model.spectrum.ramanshift, + second_result.residual, + label="Residual", + lw=3, + c="k", + alpha=0.8, + ) + + for _component in second_components: # automatic color cycle 'cyan' ... + if second_eval_comps is None: + continue + + peak_name = _component.prefix.rstrip("_") + ax2nd.plot( + second_model.spectrum.ramanshift, + second_eval_comps[_component.prefix], + ls="--", + lw=4, + label=peak_name, + ) + center_col = _component.prefix + "center" + ax2nd.annotate( + f"{peak_name}\n {second_result.best_values[center_col]:.0f}", + xy=( + second_result.best_values[center_col] * 0.97, + 0.8 * second_result.params[_component.prefix + "height"].value, + ), + xycoords="data", + ) + ax2nd.set_ylim(-0.02, second_result.data.max() * 1.5) + + set_axes_labels_and_legend(ax2nd) + + +def prepare_annotate_fit_report_second(ax2nd, second_result) -> Text: + props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) + annotate_report_second = ax2nd.text( + 1.01, + 0.7, + second_result.fit_report(min_correl=FIT_REPORT_MIN_CORREL), + transform=ax2nd.transAxes, + fontsize=11, + verticalalignment="top", + bbox=props, + ) + + return annotate_report_second + + +def prepare_annotate_fit_report_first(ax, first_result): + fit_report = first_result.fit_report(min_correl=FIT_REPORT_MIN_CORREL) + if len(fit_report) > -1: + fit_report = fit_report.replace("prefix='D3_'", "prefix='D3_' \n") + props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) + + annotate_report_first = ax.text( + 1.01, + 1, + fit_report, + transform=ax.transAxes, + fontsize=11, + verticalalignment="top", + bbox=props, + ) + return annotate_report_first + + +def plot_special_si_components(ax, first_model): + first_result = first_model.fit_result + si_components = filter(lambda x: x.prefix.startswith("Si"), first_result.components) + first_eval_comps = first_model.fit_result.eval_components() + for si_comp in si_components: + si_result = si_comp + ax.plot( + first_model.spectrum.ramanshift, + first_eval_comps[si_comp.prefix], + "b--", + lw=4, + label="Si_substrate", + ) + if si_result.params[si_comp.prefix + "fwhm"] > 1: + ax.annotate( + "Si_substrate:\n %.0f" % si_result.params["Si1_center"].value, + xy=( + si_result.params["Si1_center"].value * 0.97, + 0.8 * si_result.params["Si1_height"].value, + ), + xycoords="data", + ) + + +def set_axes_labels_and_legend(ax: Axes): + # set axes labels and legend + ax.legend(loc=1) + ax.set_xlabel("Raman shift (cm$^{-1}$)") + ax.set_ylabel("normalized I / a.u.") diff --git a/src/raman_fitting/exports/plotting_raw_data.py b/src/raman_fitting/exports/plotting_raw_data.py new file mode 100644 index 0000000..cf4a74b --- /dev/null +++ b/src/raman_fitting/exports/plotting_raw_data.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Jan 29 14:49:50 2020 + +@author: DW +""" + +from typing import Dict + + +import matplotlib +import matplotlib.pyplot as plt + +from raman_fitting.models.splitter import RegionNames +from raman_fitting.config.path_settings import ( + CLEAN_SPEC_REGION_NAME_PREFIX, + ExportPathSettings, +) +from raman_fitting.exports.plot_formatting import PLOT_REGION_AXES +from raman_fitting.delegating.models import AggregatedSampleSpectrumFitResult + +from loguru import logger + +matplotlib.rcParams.update({"font.size": 14}) + + +def raw_data_spectra_plot( + aggregated_spectra: Dict[RegionNames, AggregatedSampleSpectrumFitResult], + export_paths: ExportPathSettings, +): # pragma: no cover + if not aggregated_spectra: + return + # breakpoint() + sources = list(aggregated_spectra.values())[0].aggregated_spectrum.sources + sample_id = "-".join(set(i.file_info.sample.id for i in sources)) + + destfile = export_paths.plots.joinpath(f"{sample_id}_mean.png") + destfile.parent.mkdir(exist_ok=True, parents=True) + + mean_fmt = dict(c="k", alpha=0.7, lw=3) + sources_fmt = dict(alpha=0.4, lw=2) + + _, ax = plt.subplots(2, 3, figsize=(18, 12)) + + for spec_source in sources: + for ( + source_region_label, + source_region, + ) in spec_source.processed.clean_spectrum.spec_regions.items(): + _source_region_name = source_region.region_name.split( + CLEAN_SPEC_REGION_NAME_PREFIX + )[-1] + if _source_region_name not in PLOT_REGION_AXES: + continue + ax_ = ax[PLOT_REGION_AXES[_source_region_name]] + ax_.plot( + source_region.ramanshift, + source_region.intensity, + label=f"{spec_source.file_info.file.stem}", + **sources_fmt, + ) + ax_.set_title(_source_region_name) + if _source_region_name in aggregated_spectra: + mean_spec = aggregated_spectra[ + _source_region_name + ].aggregated_spectrum.spectrum + # plot the mean aggregated spectrum + ax_.plot( + mean_spec.ramanshift, + mean_spec.intensity, + label=mean_spec.label, + **mean_fmt, + ) + + if _source_region_name == RegionNames.full: + ax_.legend(fontsize=10) + + plt.suptitle(f"Mean {sample_id}", fontsize=16) + plt.savefig( + destfile, + dpi=300, + bbox_inches="tight", + ) + plt.close() + logger.debug(f"raw_data_spectra_plot saved:\n{destfile}") diff --git a/src/raman_fitting/imports/__init__.py b/src/raman_fitting/imports/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/raman_fitting/imports/collector.py b/src/raman_fitting/imports/collector.py new file mode 100644 index 0000000..e81a8cc --- /dev/null +++ b/src/raman_fitting/imports/collector.py @@ -0,0 +1,31 @@ +from pathlib import Path +from typing import List, Collection, Tuple +import logging + +from .models import RamanFileInfo + +logger = logging.getLogger(__name__) + + +def collect_raman_file_infos( + raman_files: Collection[Path], +) -> Tuple[List[RamanFileInfo], List[Path]]: + pp_collection = [] + _files = [] + _failed_files = [] + for file in raman_files: + _files.append(file) + try: + pp_res = RamanFileInfo(**{"file": file}) + pp_collection.append(pp_res) + except Exception as exc: + logger.warning( + f"{__name__} collect_raman_file_infos unexpected error for calling RamanFileInfo on\n{file}.\n{exc}" + ) + _failed_files.append({"file": file, "error": exc}) + if _failed_files: + logger.warning( + f"{__name__} collect_raman_file_infos failed for {len(_failed_files)}." + ) + + return pp_collection, _files diff --git a/src/raman_fitting/imports/files/file_finder.py b/src/raman_fitting/imports/files/file_finder.py new file mode 100644 index 0000000..38137a6 --- /dev/null +++ b/src/raman_fitting/imports/files/file_finder.py @@ -0,0 +1,41 @@ +from typing import List +import logging +from pathlib import Path +from pydantic import BaseModel, DirectoryPath, Field, model_validator + +logger = logging.getLogger(__name__) + + +class FileFinder(BaseModel): + directory: DirectoryPath + suffixes: List[str] = Field([".txt"]) + files: List[Path] = Field(None, init_var=False) + + @model_validator(mode="after") + def parse_metadata_from_filepath(self) -> "FileFinder": + if self.files is None: + files = find_files(self.directory, self.suffixes) + self.files = files + + return self + + +def find_files(directory: Path, suffixes: List[str]) -> List[Path]: + """ + Creates a list of all raman type files found in the DATASET_DIR which are used in the creation of the index. + """ + + raman_files = [] + + for suffix in suffixes: + files = list(directory.rglob(f"*{suffix}")) + raman_files += files + + if not raman_files: + logger.warning( + f"find_files warning: the chose data file dir was empty.\n{directory}\mPlease choose another directory which contains your data files." + ) + logger.info( + f"find_files {len(raman_files)} files were found in the chosen data dir:\n\t{directory}" + ) + return raman_files diff --git a/src/raman_fitting/imports/files/file_indexer.py b/src/raman_fitting/imports/files/file_indexer.py new file mode 100644 index 0000000..23d89b1 --- /dev/null +++ b/src/raman_fitting/imports/files/file_indexer.py @@ -0,0 +1,232 @@ +"""Indexer for raman data files""" + +from itertools import filterfalse, groupby +from pathlib import Path +from typing import List, Sequence, TypeAlias + +from loguru import logger +from pydantic import ( + BaseModel, + ConfigDict, + Field, + FilePath, + NewPath, + model_validator, +) +from raman_fitting.config import settings +from raman_fitting.imports.collector import collect_raman_file_infos +from raman_fitting.imports.files.utils import ( + load_dataset_from_file, + write_dataset_to_file, +) +from raman_fitting.imports.models import RamanFileInfo +from tablib import Dataset + +from raman_fitting.imports.spectrum import SPECTRUM_FILETYPE_PARSERS + +RamanFileInfoSet: TypeAlias = Sequence[RamanFileInfo] + + +class RamanFileIndex(BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + + index_file: NewPath | FilePath | None = Field(None, validate_default=False) + raman_files: RamanFileInfoSet | None = Field(None) + dataset: Dataset | None = Field(None) + force_reindex: bool = Field(False, validate_default=False) + persist_to_file: bool = Field(True, validate_default=False) + + @model_validator(mode="after") + def read_or_load_data(self) -> "RamanFileIndex": + if not any([self.index_file, self.raman_files, self.dataset]): + raise ValueError("Not all fields should be empty.") + + reload_from_file = validate_reload_from_index_file( + self.index_file, self.force_reindex + ) + if reload_from_file: + self.dataset = load_dataset_from_file(self.index_file) + if not self.raman_files and self.dataset: + self.raman_files = parse_dataset_to_index(self.dataset) + return self + + if self.raman_files is not None: + dataset_rf = cast_raman_files_to_dataset(self.raman_files) + if self.dataset is not None: + assert ( + dataset_rf == self.dataset + ), "Both dataset and raman_files provided and they are different." + self.dataset = dataset_rf + + if self.dataset is not None: + self.raman_files = parse_dataset_to_index(self.dataset) + + if self.raman_files is None and self.dataset is None: + raise ValueError( + "Index error, both raman_files and dataset are not provided." + ) + + if self.persist_to_file and self.index_file is not None: + write_dataset_to_file(self.index_file, self.dataset) + + return self + + +def validate_reload_from_index_file( + index_file: Path | None, force_reindex: bool +) -> bool: + if index_file is None: + logger.debug( + "Index file not provided, index will not be reloaded or persisted." + ) + return False + if index_file.exists() and not force_reindex: + return True + elif force_reindex: + logger.warning( + f"Index index_file file {index_file} exists and will be overwritten." + ) + else: + logger.info( + "Index index_file file does not exists but was asked to reload from it." + ) + return False + + +def cast_raman_files_to_dataset(raman_files: RamanFileInfoSet) -> Dataset: + headers = list(RamanFileInfo.model_fields.keys()) + data = Dataset(headers=headers) + for file in raman_files: + data.append(file.model_dump(mode="json").values()) + return data + + +def parse_dataset_to_index(dataset: Dataset) -> RamanFileInfoSet: + raman_files = [] + for row in dataset: + row_data = dict(zip(dataset.headers, row)) + raman_files.append(RamanFileInfo(**row_data)) + return raman_files + + +class IndexSelector(BaseModel): + raman_files: Sequence[RamanFileInfo] + sample_ids: List[str] = Field(default_factory=list) + sample_groups: List[str] = Field(default_factory=list) + selection: Sequence[RamanFileInfo] = Field(default_factory=list) + + @model_validator(mode="after") + def make_and_set_selection(self) -> "IndexSelector": + rf_index = self.raman_files + if not any([self.sample_groups, self.sample_ids]): + self.selection = rf_index + logger.debug( + f"{self.__class__.__qualname__} selected {len(self.selection)} of {len(rf_index)}. " + ) + return self + else: + rf_index_groups = list( + filter(lambda x: x.sample.group in self.sample_groups, rf_index) + ) + _pre_selected_samples = {i.sample.id for i in rf_index_groups} + selected_sample_ids = filterfalse( + lambda x: x in _pre_selected_samples, self.sample_ids + ) + rf_index_samples = list( + filter(lambda x: x.sample.id in selected_sample_ids, rf_index) + ) + rf_selection_index = rf_index_groups + rf_index_samples + self.selection = rf_selection_index + logger.debug( + f"{self.__class__.__qualname__} selected {len(self.selection)} of {rf_index}. " + ) + return self + + +def groupby_sample_group(index: RamanFileInfoSet): + """Generator for Sample Groups, yields the name of group and group of the index SampleGroup""" + grouper = groupby(index, key=lambda x: x.sample.group) + return grouper + + +def groupby_sample_id(index: RamanFileInfoSet): + """Generator for SampleIDs, yields the name of group, name of SampleID and group of the index of the SampleID""" + grouper = groupby(index, key=lambda x: x.sample.id) + return grouper + + +def iterate_over_groups_and_sample_id(index: RamanFileInfoSet): + for grp_name, grp in groupby_sample_group(index): + for sample_id, sgrp in groupby_sample_group(grp): + yield grp_name, grp, sample_id, sgrp + + +def select_index_by_sample_groups(index: RamanFileInfoSet, sample_groups: List[str]): + return filter(lambda x: x.sample.group in sample_groups, index) + + +def select_index_by_sample_ids(index: RamanFileInfoSet, sample_ids: List[str]): + return filter(lambda x: x.sample.id in sample_ids, index) + + +def select_index( + index: RamanFileInfoSet, sample_groups: List[str], sample_ids: List[str] +): + group_selection = list(select_index_by_sample_groups(index, sample_groups)) + sample_selection = list(select_index_by_sample_ids(index, sample_ids)) + selection = group_selection + sample_selection + return selection + + +def collect_raman_file_index_info( + raman_files: Sequence[Path] | None = None, **kwargs +) -> RamanFileInfoSet: + """loops over the files and scrapes the index data from each file""" + raman_files = list(raman_files) + total_files = [] + dirs = [i for i in raman_files if i.is_dir()] + files = [i for i in raman_files if i.is_file()] + total_files += files + suffixes = [i.lstrip(".") for i in SPECTRUM_FILETYPE_PARSERS.keys()] + for d1 in dirs: + paths = [path for i in suffixes for path in d1.glob(f"*.{i}")] + total_files += paths + index, files = collect_raman_file_infos(total_files, **kwargs) + logger.info(f"successfully made index {len(index)} from {len(files)} files") + return index + + +def initialize_index_from_source_files( + files: Sequence[Path] | None = None, + index_file: Path | None = None, + force_reindex: bool = False, +) -> RamanFileIndex: + raman_files = collect_raman_file_index_info(raman_files=files) + # breakpoint() + raman_index = RamanFileIndex( + index_file=index_file, raman_files=raman_files, force_reindex=force_reindex + ) + logger.info( + f"index_delegator index prepared with len {len(raman_index.raman_files)}" + ) + return raman_index + + +def main(): + """test run for indexer""" + index_file = settings.destination_dir.joinpath("index.csv") + raman_files = collect_raman_file_index_info() + try: + index_data = {"file": index_file, "raman_files": raman_files} + raman_index = RamanFileIndex(**index_data) + logger.debug(f"Raman Index len: {len(raman_index.dataset)}") + select_index(raman_index.raman_files, sample_groups=["DW"], sample_ids=["DW38"]) + except Exception as e: + logger.error(f"Raman Index error: {e}") + raman_index = None + + return raman_index + + +if __name__ == "__main__": + main() diff --git a/src/raman_fitting/imports/files/index_funcs.py b/src/raman_fitting/imports/files/index_funcs.py new file mode 100644 index 0000000..39bdd24 --- /dev/null +++ b/src/raman_fitting/imports/files/index_funcs.py @@ -0,0 +1,155 @@ +import sys + +from pathlib import Path + +from raman_fitting.imports.spectrum.datafile_parsers import load_dataset_from_file + +from loguru import logger + + +def get_dtypes_filepath(index_file): + _dtypes_filepath = index_file.with_name( + index_file.stem + "_dtypes" + index_file.suffix + ) + return _dtypes_filepath + + +def export_index(index, index_file): + """saves the index to a defined Index file""" + if index.empty: + logger.info(f"{__name__} Empty index not exported") + return + + if not index_file.parent.exists(): + logger.info(f"{__name__} created parent dir: {index_file.parent}") + index_file.parent.mkdir(exist_ok=True, parents=True) + + index.to_csv(index_file) + + _dtypes = index.dtypes.to_frame("dtypes") + _dtypes.to_csv(get_dtypes_filepath(index_file)) + + logger.info( + f"{__name__} Succesfully Exported Raman Index file to:\n\t{index_file}\nwith len({len(index)})." + ) + + +def load_index(index_file): + """loads the index from from defined Index file""" + if not index_file.exists(): + logger.error( + f"Error in load_index: {index_file} does not exists, starting reload index ... " + ) + return + + try: + index = load_dataset_from_file(index_file) + + logger.info( + f"Succesfully imported Raman Index file from {index_file}, with len({len(index)})" + ) + if len(index) != len(index): + logger.error( + f"""'Error in load_index from {index_file}, + \nlength of loaded index not same as number of raman files + \n starting reload index ... """ + ) + + except Exception as e: + logger.error( + f"Error in load_index from {index_file},\n{e}\n starting reload index ... " + ) + + +def index_selection(index, **kwargs): + """ + Special selector on the index DataFrame + + Parameters + ------- + + index + pd.DataFrame containing the index of files + should contains columns that are given in index_file_sample_cols and index_file_stat_cols + default_selection str + all or '' for empty default + kwargs + checks for keywords suchs as samplegroups, sampleIDs, extra + meant for cli commands + + Returns + ------- + index_selection + pd.DataFrame with a selection from the given input parameter index + default returns empty DataFrame + + """ + if index is None: + return + + if not kwargs: + return index + + default_selection = kwargs.get("default_selection", "all") + if "normal" not in kwargs.get("run_mode", default_selection): + default_selection = "all" + index_selection = None + logger.info( + f"starting index selection from index({len(index)}) with:\n default selection: {default_selection}\n and {kwargs}" + ) + + if not index: + logger.warning("index selection index arg empty") + return + + if default_selection == "all": + index_selection = index.copy() + + if "samplegroups" in kwargs: + index = list( + filter(lambda x: x.sample.group in kwargs.get("samplegroups", []), index) + ) + if "sampleIDs" in kwargs: + index = list( + filter(lambda x: x.sample.id in kwargs.get("sampleIDs", []), index) + ) + + if "extra" in kwargs: + runq = kwargs.get("run") + if "recent" in runq: + grp = index.sort_values( + "FileCreationDate", ascending=False + ).FileCreationDate.unique()[0] + + index_selection = index.loc[index.FileCreationDate == grp] + index_selection = index_selection.assign( + **{ + "DestDir": [ + Path(i).joinpath(grp.strftime("%Y-%m-%d")) + for i in index_selection.DestDir.values + ] + } + ) + + logger.debug( + f"finished index selection from index({len(index)}) with:\n {default_selection}\n and {kwargs}\n selection len({len(index_selection )})" + ) + + if not index_selection: + logger.warning("index selection empty. exiting") + sys.exit() + + return index_selection + + +def test_positions(sample_group_files): + if not sample_group_files: + return + + _files = [i.file for i in sample_group_files] + _positions = [i.sample.position for i in sample_group_files] + if len(set(_files)) != len(set(_positions)): + logger.warning( + f"{sample_group_files[0].sample} Unique files and positions not matching for {sample_group_files}" + ) + return sample_group_files diff --git a/src/raman_fitting/imports/files/index_helpers.py b/src/raman_fitting/imports/files/index_helpers.py new file mode 100644 index 0000000..7ffa5dd --- /dev/null +++ b/src/raman_fitting/imports/files/index_helpers.py @@ -0,0 +1,24 @@ +import hashlib +from pathlib import Path + + +def get_filename_id_from_path(path: Path) -> str: + """ + Makes the ID from a filepath + + Parameters + ---------- + path : Path + DESCRIPTION. + + Returns + ------- + str: which contains hash(parent+suffix)_stem of path + + """ + + _parent_suffix_hash = hashlib.sha512( + (str(path.parent) + path.suffix).encode("utf-8") + ).hexdigest() + filename_id = f"{_parent_suffix_hash}_{path.stem}" + return filename_id diff --git a/src/raman_fitting/imports/files/metadata.py b/src/raman_fitting/imports/files/metadata.py new file mode 100644 index 0000000..30ea44c --- /dev/null +++ b/src/raman_fitting/imports/files/metadata.py @@ -0,0 +1,48 @@ +from pathlib import Path +from typing import Dict +from datetime import date +import datetime +from typing import Any + + +from pydantic import ( + BaseModel, + FilePath, + PastDatetime, +) + + +class FileMetaData(BaseModel): + file: FilePath + creation_date: date + creation_datetime: PastDatetime + modification_date: date + modification_datetime: PastDatetime + size: int + + +def get_file_metadata(filepath: Path) -> Dict[str, Any]: + """converting creation time and last mod time to datetime object""" + fstat = filepath.stat() + c_t = fstat.st_ctime + m_t = fstat.st_mtime + c_tdate, m_tdate = c_t, m_t + + try: + c_t = datetime.datetime.fromtimestamp(fstat.st_ctime) + m_t = datetime.datetime.fromtimestamp(fstat.st_mtime) + c_tdate = c_t.date() + m_tdate = m_t.date() + except OverflowError: + pass + except OSError: + pass + ret = { + "file": filepath, + "creation_date": c_tdate, + "creation_datetime": c_t, + "modification_date": m_tdate, + "modification_datetime": m_t, + "size": fstat.st_size, + } + return ret diff --git a/src/raman_fitting/imports/files/utils.py b/src/raman_fitting/imports/files/utils.py new file mode 100644 index 0000000..cb0be14 --- /dev/null +++ b/src/raman_fitting/imports/files/utils.py @@ -0,0 +1,28 @@ +from pathlib import Path + +import tablib.exceptions +from tablib import Dataset + +from loguru import logger + + +def write_dataset_to_file(file: Path, dataset: Dataset) -> None: + if file.suffix == ".csv": + with open(file, "w", newline="") as f: + f.write(dataset.export("csv")) + else: + with open(file, "wb", encoding="utf-8") as f: + f.write(dataset.export(file.suffix)) + logger.debug(f"Wrote dataset {len(dataset)} to {file}") + + +def load_dataset_from_file(file) -> Dataset: + with open(file, "r", encoding="utf-8") as fh: + try: + imported_data = Dataset().load(fh) + except tablib.exceptions.UnsupportedFormat as e: + logger.warning(f"Read dataset {e} from {file}") + imported_data = Dataset() + + logger.debug(f"Read dataset {len(imported_data)} from {file}") + return imported_data diff --git a/src/raman_fitting/imports/files/validators.py b/src/raman_fitting/imports/files/validators.py new file mode 100644 index 0000000..cba9b0b --- /dev/null +++ b/src/raman_fitting/imports/files/validators.py @@ -0,0 +1,21 @@ +import logging +from pathlib import Path + +logger = logging.getLogger(__name__) + + +def validate_filepath(filepath: Path, max_bytesize=10**6) -> Path | None: + if not isinstance(filepath, (Path, str)): + raise TypeError("Argument given is not Path nor str") + + filepath = Path(filepath) + + if not filepath.exists(): + logger.warning("File does not exist") + return + + filesize = filepath.stat().st_size + if filesize > max_bytesize: + logger.warning(f"File too large ({filesize})=> skipped") + return + return filepath diff --git a/src/raman_fitting/imports/models.py b/src/raman_fitting/imports/models.py new file mode 100644 index 0000000..76ba638 --- /dev/null +++ b/src/raman_fitting/imports/models.py @@ -0,0 +1,59 @@ +import json +from pydantic import ( + BaseModel, + FilePath, + model_validator, + Field, + ConfigDict, +) + +from .samples.sample_id_helpers import extract_sample_metadata_from_filepath + +from .files.metadata import FileMetaData, get_file_metadata +from .files.index_helpers import get_filename_id_from_path +from .samples.models import SampleMetaData + + +class RamanFileInfo(BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + + file: FilePath + filename_id: str = Field(None, init_var=False, validate_default=False) + sample: SampleMetaData | str = Field(None, init_var=False, validate_default=False) + file_metadata: FileMetaData | str = Field( + None, init_var=False, validate_default=False + ) + + @model_validator(mode="after") + def set_filename_id(self) -> "RamanFileInfo": + filename_id = get_filename_id_from_path(self.file) + self.filename_id = filename_id + return self + + @model_validator(mode="after") + def parse_and_set_sample_from_file(self) -> "RamanFileInfo": + sample = extract_sample_metadata_from_filepath(self.file) + self.sample = sample + return self + + @model_validator(mode="after") + def parse_and_set_metadata_from_filepath(self) -> "RamanFileInfo": + file_metadata = get_file_metadata(self.file) + self.file_metadata = FileMetaData(**file_metadata) + return self + + @model_validator(mode="after") + def initialize_sample_and_file_from_dict(self) -> "RamanFileInfo": + if isinstance(self.sample, dict): + self.sample = SampleMetaData(**self.sample) + elif isinstance(self.sample, str): + _sample = json.loads(self.sample.replace("'", '"')) + self.sample = SampleMetaData(**_sample) + + if isinstance(self.file_metadata, dict): + self.file_metadata = FileMetaData(**self.file_metadata) + elif isinstance(self.file_metadata, str): + _file_metadata = json.loads(self.file_metadata.replace("'", '"')) + self.file_metadata = SampleMetaData(**_file_metadata) + + return self diff --git a/src/raman_fitting/imports/samples/models.py b/src/raman_fitting/imports/samples/models.py new file mode 100644 index 0000000..dca47fe --- /dev/null +++ b/src/raman_fitting/imports/samples/models.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel + + +class SampleMetaData(BaseModel): + id: str + group: str + position: int = 0 diff --git a/src/raman_fitting/imports/samples/sample_id_helpers.py b/src/raman_fitting/imports/samples/sample_id_helpers.py new file mode 100644 index 0000000..01826d8 --- /dev/null +++ b/src/raman_fitting/imports/samples/sample_id_helpers.py @@ -0,0 +1,107 @@ +from typing import List, Tuple, Optional, Dict +from pathlib import Path + +from .models import SampleMetaData + + +def parse_string_to_sample_id_and_position( + string: str, seps=("_", " ", "-") +) -> Tuple[str, int]: + """ + Parser for the filenames -> finds SampleID and sample position + + Parameters + ---------- + # ramanfile_string : str + # The filepath which the is parsed + seps : tuple of str default + ordered collection of seperators tried for split + default : ('_', ' ', '-') + + Returns + ------- + tuple of strings + Collection of strings which contains the parsed elements. + """ + + split = None + first_sep_match_index = min( + [n for n, i in enumerate(seps) if i in string], default=None + ) + first_sep_match = ( + seps[first_sep_match_index] if first_sep_match_index is not None else None + ) + split = string.split(first_sep_match) + _lensplit = len(split) + + if _lensplit == 0: + sample_id, position = split[0], 0 + elif len(split) == 1: + sample_id, position = split[0], 0 + elif len(split) == 2: + sample_id = split[0] + _pos_strnum = "".join(i for i in split[1] if i.isnumeric()) + if _pos_strnum: + position = int(_pos_strnum) + else: + position = split[1] + elif len(split) >= 3: + sample_id = "_".join(split[0:-1]) + position = int("".join(filter(str.isdigit, split[-1]))) + position = position or 0 + return (sample_id, position) + + +def extract_sample_group_from_sample_id(sample_id: str, max_len=4) -> str: + """adding the extra sample Group key from sample ID""" + + _len = len(sample_id) + _maxalphakey = min( + [n for n, i in enumerate(sample_id) if not str(i).isalpha()], default=_len + ) + _maxkey = min((_len, _maxalphakey, max_len)) + sample_group_id = "".join([i for i in sample_id[0:_maxkey] if i.isalpha()]) + return sample_group_id + + +def overwrite_sample_id_from_mapper(sample_id: str, mapper: dict) -> str: + """Takes an sample_id and potentially overwrites from a mapper dict""" + sample_id_map = mapper.get(sample_id) + if sample_id_map is not None: + return sample_id_map + return sample_id + + +def overwrite_sample_group_id_from_parts( + parts: List[str], sample_group_id: str, mapper: dict +) -> str: + for k, val in mapper.items(): + if k in parts: + sample_group_id = val + return sample_group_id + + +def extract_sample_metadata_from_filepath( + filepath: Path, sample_name_mapper: Optional[Dict[str, Dict[str, str]]] = None +) -> SampleMetaData: + """parse the sample_id, position and sgrpID from stem""" + stem = filepath.stem + parts = filepath.parts + + sample_id, position = parse_string_to_sample_id_and_position(stem) + + if sample_name_mapper is not None: + sample_id_mapper = sample_name_mapper.get("sample_id", {}) + sample_id = overwrite_sample_id_from_mapper(sample_id, sample_id_mapper) + sample_group_id = extract_sample_group_from_sample_id(sample_id) + + if sample_name_mapper is not None: + sample_grp_mapper = sample_name_mapper.get("sample_group_id", {}) + sample_group_id = overwrite_sample_group_id_from_parts( + parts, sample_group_id, sample_grp_mapper + ) + + sample = SampleMetaData( + **{"id": sample_id, "group": sample_group_id, "position": position} + ) + return sample diff --git a/src/raman_fitting/imports/spectrum/__init__.py b/src/raman_fitting/imports/spectrum/__init__.py new file mode 100644 index 0000000..e150217 --- /dev/null +++ b/src/raman_fitting/imports/spectrum/__init__.py @@ -0,0 +1,17 @@ +from .datafile_parsers import read_file_with_tablib + +SPECTRUM_FILETYPE_PARSERS = { + ".txt": { + "method": read_file_with_tablib, # load_spectrum_from_txt, + }, + ".xlsx": { + "method": read_file_with_tablib, # pd.read_excel, + }, + ".csv": { + "method": read_file_with_tablib, # pd.read_csv, + "kwargs": {}, + }, + ".json": { + "method": read_file_with_tablib, + }, +} diff --git a/src/raman_fitting/imports/spectrum/datafile_parsers.py b/src/raman_fitting/imports/spectrum/datafile_parsers.py new file mode 100644 index 0000000..2218149 --- /dev/null +++ b/src/raman_fitting/imports/spectrum/datafile_parsers.py @@ -0,0 +1,85 @@ +from typing import Sequence +from pathlib import Path + +import numpy as np +from tablib import Dataset + +from loguru import logger + + +def filter_data_for_numeric(data: Dataset): + filtered_data = Dataset() + filtered_data.headers = data.headers + + for row in data: + try: + digits_row = tuple(map(float, row)) + except ValueError: + continue + except TypeError: + continue + + if not any(i is None for i in digits_row): + filtered_data.append(digits_row) + return filtered_data + + +def load_dataset_from_file(filepath, **kwargs) -> Dataset: + with open(filepath, "r") as fh: + imported_data = Dataset(**kwargs).load(fh) + return imported_data + + +def check_header_keys(dataset: Dataset, header_keys: Sequence[str]): + if set(header_keys) not in set(dataset.headers): + first_row = list(dataset.headers) + dataset.insert(0, first_row) + dataset.headers = header_keys + return dataset + + +def read_file_with_tablib( + filepath: Path, header_keys: Sequence[str], sort_by=None +) -> Dataset: + data = load_dataset_from_file(filepath) + data = check_header_keys(data, header_keys) + numeric_data = filter_data_for_numeric(data) + sort_by = header_keys[0] if sort_by is None else sort_by + sorted_data = numeric_data.sort(sort_by) + return sorted_data + + +def read_text(filepath, max_bytes=10**6, encoding="utf-8", errors=None): + """additional read text method for raw text data inspection""" + _text = "read_text_method" + filesize = filepath.stat().st_size + if filesize < max_bytes: + try: + _text = filepath.read_text(encoding=encoding, errors=errors) + # _text.splitlines() + except Exception as exc: + # IDEA specify which Exceptions are expected + _text += "\nread_error" + logger.warning(f"file read text error => skipped.\n{exc}") + else: + _text += "\nfile_too_large" + logger.warning(f" file too large ({filesize})=> skipped") + + return _text + + +def use_np_loadtxt(filepath, usecols=(0, 1), **kwargs) -> np.array: + array = np.array([]) + try: + array = np.loadtxt(filepath, usecols=usecols, **kwargs) + except IndexError: + logger.debug(f"IndexError called np genfromtxt for {filepath}") + array = np.genfromtxt(filepath, invalid_raise=False) + except ValueError: + logger.debug(f"ValueError called np genfromtxt for {filepath}") + array = np.genfromtxt(filepath, invalid_raise=False) + except Exception as exc: + _msg = f"Can not load data from txt file: {filepath}\n{exc}" + logger.error(_msg) + raise ValueError(_msg) from exc + return array diff --git a/src/raman_fitting/imports/spectrum/spectra_collection.py b/src/raman_fitting/imports/spectrum/spectra_collection.py new file mode 100644 index 0000000..da840ca --- /dev/null +++ b/src/raman_fitting/imports/spectrum/spectra_collection.py @@ -0,0 +1,63 @@ +from typing import List + +import numpy as np + +from pydantic import BaseModel, ValidationError, model_validator + +from raman_fitting.models.deconvolution.spectrum_regions import RegionNames +from raman_fitting.models.spectrum import SpectrumData + + +class SpectraDataCollection(BaseModel): + spectra: List[SpectrumData] + region_name: RegionNames + mean_spectrum: SpectrumData | None = None + + @model_validator(mode="after") + def check_spectra_have_same_label(self) -> "SpectraDataCollection": + """checks member of lists""" + labels = set(i.label for i in self.spectra) + if len(labels) > 1: + raise ValidationError(f"Spectra have different labels {labels}") + return self + + @model_validator(mode="after") + def check_spectra_have_same_region(self) -> "SpectraDataCollection": + """checks member of lists""" + region_names = set(i.region_name for i in self.spectra) + if len(region_names) > 1: + raise ValidationError(f"Spectra have different region_names {region_names}") + return self + + @model_validator(mode="after") + def check_spectra_lengths(self) -> "SpectraDataCollection": + unique_lengths_rs = set(len(i.ramanshift) for i in self.spectra) + unique_lengths_int = set(len(i.intensity) for i in self.spectra) + if len(unique_lengths_rs) > 1: + raise ValidationError( + f"The spectra have different ramanshift lengths where they should be the same.\n\t{unique_lengths_rs}" + ) + if len(unique_lengths_int) > 1: + raise ValidationError( + f"The spectra have different intensity lengths where they should be the same.\n\t{unique_lengths_int}" + ) + + return self + + @model_validator(mode="after") + def set_mean_spectrum(self) -> "SpectraDataCollection": + # wrap this in a ProcessedSpectraCollection model + mean_int = np.mean(np.vstack([i.intensity for i in self.spectra]), axis=0) + mean_ramanshift = np.mean( + np.vstack([i.ramanshift for i in self.spectra]), axis=0 + ) + source_files = list(set(i.source for i in self.spectra)) + _label = "".join(map(str, set(i.label for i in self.spectra))) + mean_spec = SpectrumData( + ramanshift=mean_ramanshift, + intensity=mean_int, + label=f"clean_{self.region_name}_mean", + region_name=self.region_name, + source=source_files, + ) + self.mean_spectrum = mean_spec diff --git a/src/raman_fitting/imports/spectrum/validators.py b/src/raman_fitting/imports/spectrum/validators.py new file mode 100644 index 0000000..68621ef --- /dev/null +++ b/src/raman_fitting/imports/spectrum/validators.py @@ -0,0 +1,53 @@ +from dataclasses import dataclass +import logging + +import pandas as pd +import numpy as np +from tablib import Dataset + +logger = logging.getLogger(__name__) + + +@dataclass +class ValidateSpectrumValues: + spectrum_key: str + min: float + max: float + len: int + + def validate_min(self, spectrum_data: pd.DataFrame): + data_min = min(spectrum_data[self.spectrum_key]) + return np.isclose(data_min, self.min, rtol=0.2) + + def validate_max(self, spectrum_data: pd.DataFrame): + data_max = max(spectrum_data[self.spectrum_key]) + return data_max <= self.max + + def validate_len(self, spectrum_data: pd.DataFrame): + data_len = len(spectrum_data) + return np.isclose(data_len, self.len, rtol=0.1) + + def validate(self, spectrum_data: pd.DataFrame): + ret = [] + for _func in [self.validate_min, self.validate_max, self.validate_len]: + ret.append(_func(spectrum_data)) + return all(ret) + + +def validate_spectrum_keys_expected_values( + spectrum_data: Dataset, expected_values: ValidateSpectrumValues +): + if expected_values.spectrum_key not in spectrum_data.columns: + logger.error( + f"The expected value type {expected_values.spectrum_key} is not in the columns {spectrum_data.columns}" + ) + if spectrum_data.empty: + logger.error("Spectrum data is empty") + return + + validation = expected_values.validate(spectrum_data) + + if not validation: + logger.warning( + f"The {expected_values.spectrum_key} of this spectrum does not match the expected values {expected_values}" + ) diff --git a/src/raman_fitting/imports/spectrumdata_parser.py b/src/raman_fitting/imports/spectrumdata_parser.py new file mode 100644 index 0000000..6a89e6a --- /dev/null +++ b/src/raman_fitting/imports/spectrumdata_parser.py @@ -0,0 +1,118 @@ +""" +Created on Mon Jul 5 21:09:06 2021 + +@author: DW +""" + +from dataclasses import dataclass, field +import hashlib + +from pathlib import Path +from functools import partial + +from typing import Callable + +from tablib import Dataset + +from .spectrum.validators import ValidateSpectrumValues +from .files.validators import validate_filepath +from .spectrum import SPECTRUM_FILETYPE_PARSERS + +from raman_fitting.models.spectrum import SpectrumData + +from loguru import logger + + +spectrum_data_keys = ("ramanshift", "intensity") + +ramanshift_expected_values = ValidateSpectrumValues( + spectrum_key="ramanshift", min=-95, max=3650, len=1600 +) +intensity_expected_values = ValidateSpectrumValues( + spectrum_key="intensity", min=0, max=1e4, len=1600 +) + +spectrum_keys_expected_values = { + "ramanshift": ramanshift_expected_values, + "intensity": intensity_expected_values, +} + + +def get_file_parser(filepath: Path) -> Callable[[Path], Dataset]: + "Get callable file parser function." + suffix = filepath.suffix + parser = SPECTRUM_FILETYPE_PARSERS[suffix]["method"] + kwargs = SPECTRUM_FILETYPE_PARSERS[suffix].get("kwargs", {}) + return partial(parser, **kwargs) + + +@dataclass +class SpectrumReader: + """ + Reads a spectrum from a 'raw' data file Path or str + + with spectrum_data_keys "ramanshift" and "intensity". + Double checks the values + Sets a hash attribute afterwards + """ + + filepath: Path | str + spectrum_data_keys: tuple = field(default=spectrum_data_keys, repr=False) + + spectrum: SpectrumData = field(default=None) + label: str = "raw" + region_name: str = "full" + spectrum_hash: str = field(default=None, repr=False) + spectrum_length: int = field(default=0, init=False) + + def __post_init__(self): + super().__init__() + + self.filepath = validate_filepath(self.filepath) + self.spectrum_length = 0 + + if self.filepath is None: + raise ValueError(f"File is not valid. {self.filepath}") + parser = get_file_parser(self.filepath) + parsed_spectrum = parser(self.filepath, self.spectrum_data_keys) + if parsed_spectrum is None: + return + for spectrum_key in parsed_spectrum.headers: + if spectrum_key not in spectrum_keys_expected_values: + continue + validator = spectrum_keys_expected_values[spectrum_key] + valid = validator.validate(parsed_spectrum) + if not valid: + logger.warning( + f"The values of {spectrum_key} of this spectrum are invalid. {validator}" + ) + spec_init = { + "label": self.label, + "region_name": self.region_name, + "source": self.filepath, + } + _parsed_spec_dict = { + k: parsed_spectrum[k] for k in spectrum_keys_expected_values.keys() + } + spec_init.update(_parsed_spec_dict) + self.spectrum = SpectrumData(**spec_init) + + self.spectrum_hash = self.get_hash_text(self.spectrum) + self.spectrum_length = len(self.spectrum) + + @staticmethod + def get_hash_text(data, hash_text_encoding="utf-8"): + text = str(data) + text_hash = hashlib.sha256(text.encode(hash_text_encoding)).hexdigest() + return text_hash + + def __repr__(self): + _txt = f"Spectrum({self.filepath.name}, len={self.spectrum_length})" + return _txt + + def quickplot(self): + """Plot for quickly checking the spectrum""" + try: + self.spectrum.plot(x="ramanshift", y="intensity") + except TypeError: + logger.warning("No numeric data to plot") diff --git a/src/raman_fitting/indexing/__init__.py b/src/raman_fitting/indexing/__init__.py deleted file mode 100644 index 792d600..0000000 --- a/src/raman_fitting/indexing/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# diff --git a/src/raman_fitting/indexing/filedata_parser.py b/src/raman_fitting/indexing/filedata_parser.py deleted file mode 100644 index 2a02bd9..0000000 --- a/src/raman_fitting/indexing/filedata_parser.py +++ /dev/null @@ -1,247 +0,0 @@ -""" -Created on Mon Jul 5 21:09:06 2021 - -@author: DW -""" -import hashlib -import logging -from pathlib import Path -from warnings import warn - -import numpy as np -import pandas as pd - - -logger = logging.getLogger(__name__) - - -# %% -class SpectrumReader: - """ - Reads a clean spectrum from a file Path or str - - with columns "ramanshift" and "intensity". - Double checks the values - Sets a hash attribute afterwards - """ - - supported_filetypes = [".txt"] - - spectrum_data_keys = ("ramanshift", "intensity") - spectrum_keys_expected_values = { - "ramanshift": {"expected_values": {"min": -95, "max": 3600, "len": 1600}}, - "intensity": {"expected_values": {"min": 0, "max": 1e4, "len": 1600}}, - } - # expected_ranges = {"ramanshift": (-95, 3600)} - # expected_length = 1600 - # using slots since there will be many instances of this class - - __slots__ = ( - *("filepath", "max_bytesize", "spectrum", "spectrum_hash", "spectrum_length"), - *spectrum_keys_expected_values.keys(), - ) - - def __init__(self, filepath: Path, max_bytesize=10**6): - if not isinstance(filepath, Path): - if isinstance(filepath, str): - filepath = Path(filepath) - else: - raise TypeError("Argument given is not Path nor str") - - self.filepath = filepath - self.max_bytesize = max_bytesize - - self.spectrum = pd.DataFrame(data=[], columns=self.spectrum_data_keys) - if filepath.exists(): - filesize = filepath.stat().st_size - if filesize < max_bytesize: - self.spectrum = self.spectrum_parser(self.filepath) - self.double_check_spectrum_values( - self.spectrum, expected_values=self.spectrum_keys_expected_values - ) - else: - logger.warning("File does not exist") - - self.spectrum_hash = self.get_hash_text(self.spectrum) - self.spectrum_length = len(self.spectrum) - - # sort spectrum data by ramanshift - self.spectrum = self.sort_spectrum( - self.spectrum, sort_by="ramanshift", ignore_index=True - ) - - for key in self.spectrum_data_keys: - setattr(self, key, self.spectrum[key].to_numpy()) - - def spectrum_parser(self, filepath: Path): - """ - Reads data from a file and converts into pd.DataFrame object - - Parameters - -------- - filepath : Path, str - file which contains the data of a spectrum - - Returns - -------- - pd.DataFrame - Contains the data of the spectrum in a DataFrame with the selected spectrum keys as columns - """ - - spectrum_data = pd.DataFrame() - - suffix = "" - suffix = filepath.suffix - if suffix in self.supported_filetypes: - if suffix == ".txt": - try: - spectrum_data = self.use_np_loadtxt(filepath) - - except Exception as exc: - logger.warning( - f"Can not complete use_np_loadtxt for:\n{filepath}\n{exc}" - ) - # data = self.read_text(self.filepath) - elif suffix == ".xlsx": - # read excel file input - # IDEA not implemented yet, select columns etc or autodetect - spectrum_data = pd.read_excel(filepath) - - elif suffix == ".csv": - # read csv file input - # IDEA not implemented yet, select columns etc or autodetect - spectrum_data = pd.read_excel(filepath) - - else: - logger.warning(f"Filetype {suffix} not supported") - - return spectrum_data - - def use_np_loadtxt(self, filepath, usecols=(0, 1), **kwargs): - try: - loaded_array = np.loadtxt(filepath, usecols=usecols, **kwargs) - except IndexError: - logger.debug(f"IndexError called np genfromtxt for {filepath}") - loaded_array = np.genfromtxt(filepath, invalid_raise=False) - except ValueError: - logger.debug(f"ValueError called np genfromtxt for {filepath}") - loaded_array = np.genfromtxt(filepath, invalid_raise=False) - except Exception as exc: - logger.warning(f"Can not load data from txt file: {filepath}\n{exc}") - loaded_array = np.array([]) - - spectrum_data = pd.DataFrame() - if loaded_array.ndim == len(self.spectrum_data_keys): - try: - spectrum_data = pd.DataFrame( - loaded_array, columns=self.spectrum_data_keys - ) - except Exception as exc: - logger.warning( - f"Can not create DataFrame from array object: {loaded_array}\n{exc}" - ) - return spectrum_data - - def double_check_spectrum_values( - self, spectrum_data: pd.DataFrame, expected_values: dict = {} - ): - if all([i in spectrum_data.columns for i in expected_values.keys()]): - _len = len(spectrum_data) - for _key, expectations in expected_values.items(): - if expectations: - for exp_method, exp_value in expectations.items(): - _check = False - if "len" in exp_method: - _spectrum_value = _len - _check = np.isclose(_spectrum_value, exp_value, rtol=0.1) - - elif "min" in exp_method: - _spectrum_value = spectrum_data[_key].min() - _check = np.isclose(_spectrum_value, exp_value, rtol=0.2) - if not _check: - _check = exp_value >= _spectrum_value - elif "max" in exp_method: - _spectrum_value = spectrum_data[_key].max() - _check = exp_value <= _spectrum_value - else: - # not implemented - _check = True - if not _check: - logger.warning( - f"The {exp_method} of this spectrum ({_spectrum_value }) does not match the expected values {exp_value}" - ) - else: - logger.error( - f"The dataframe does not have all the keys from {self.spectrum_data_keys}" - ) - - @staticmethod - def read_text(filepath, max_bytes=10**6, encoding="utf-8", errors=None): - """additional read text method for raw text data inspection""" - _text = "read_text_method" - filesize = filepath.stat().st_size - if filesize < max_bytes: - try: - _text = filepath.read_text(encoding=encoding, errors=errors) - # _text.splitlines() - except Exception as exc: - # IDEA specify which Exceptions are expected - _text += "\nread_error" - logger.warning(f"file read text error => skipped.\n{exc}") - else: - _text += "\nfile_too_large" - logger.warning(f" file too large ({filesize})=> skipped") - - return _text - - def sort_spectrum( - self, spectrum: pd.DataFrame, sort_by="ramanshift", ignore_index=True - ): - """sort the spectrum by the given column""" - if sort_by in spectrum.columns: - spectrum = spectrum.sort_values(by=sort_by, ignore_index=ignore_index) - else: - logger.warning(f"sort_by column {sort_by} not in spectrum") - return spectrum - - def load_data(self, filepath): - """old method taken out from SpectrumConstructor""" - # on_lbl="raw" - # assert self.file.exists(), f'File: "{self.file}" does not exist.' - # IDEA import file reader class here - ramanshift, intensity = np.array([]), np.array([]) - i = 0 - while not ramanshift.any() and i < 2000: - try: - ramanshift, intensity = np.loadtxt( - filepath, usecols=(0, 1), delimiter="\t", unpack=True, skiprows=i - ) - # Alternative parsing method with pandas.read_csv - # _rawdf = pd.read_csv(self.file, usecols=(0, 1), delimiter='\t', - # skiprows=i, header =None, names=['ramanshift','intensity']) - logger.info( - f"{self.file} with len rs({len(ramanshift)}) and len int({len(intensity)})" - ) - # self._read_succes = True - # self.spectrum_length = len(ramanshift) - # self.info.update( - # {"spectrum_length": self.spectrum_length, "skipped_rows": i}) - except ValueError: - i += 1 - - @staticmethod - def get_hash_text(dataframe, hash_text_encoding="utf-8"): - text = dataframe.to_string() - text_hash = hashlib.sha256(text.encode(hash_text_encoding)).hexdigest() - return text_hash - - def __repr__(self): - _txt = f"Spectrum({self.filepath.name}, len={self.spectrum_length})" - return _txt - - def quickplot(self): - """Plot for quickly checking the spectrum""" - try: - self.spectrum.plot(x="ramanshift", y="intensity") - except TypeError: - logger.warning("No numeric data to plot") diff --git a/src/raman_fitting/indexing/filename_parser.py b/src/raman_fitting/indexing/filename_parser.py deleted file mode 100644 index 1da2eab..0000000 --- a/src/raman_fitting/indexing/filename_parser.py +++ /dev/null @@ -1,190 +0,0 @@ -# import datetime -import hashlib -import logging -from pathlib import Path - -from typing import Dict, List - -from .filedata_parser import SpectrumReader -from .filename_parser_helpers import filestem_to_sid_and_pos, sID_to_sgrpID, get_fstats - -logger = logging.getLogger(__name__) - -index_primary_key = "rfID" -index_file_primary_keys = {f"{index_primary_key}": "string"} -index_file_path_keys = {"FileStem": "string", "FilePath": "Path"} -index_file_sample_keys = { - "SampleID": "string", - "SamplePos": "int64", - "SampleGroup": "string", -} -index_file_stat_keys = { - "FileCreationDate": "datetime64", - "FileCreation": "float", - "FileModDate": "datetime64", - "FileMod": "float", - "FileSize": "int64", -} -index_file_read_text_keys = {"FileHash": "string", "FileText": "string"} - -index_dtypes_collection = { - **index_file_path_keys, - **index_file_sample_keys, - **index_file_stat_keys, - **index_file_read_text_keys, -} - -# Extra name to sID mapper, if keys is in filename -_extra_sID_name_mapper = { - "David": "DW", - "stephen": "SP", - "Alish": "AS", - "Aish": "AS", -} - -# Extra name to sID mapper, if key is in filepath parts -_extra_sgrpID_name_mapper = {"Raman Data for fitting David": "SH"} - - -def _extra_overwrite_sID_from_mapper( - sID: str, mapper: dict = _extra_sID_name_mapper -) -> str: - """Takes an sID and potentially overwrites from a mapper dict""" - _sID_map = mapper.get(sID, None) - if _sID_map: - sID = _sID_map - return sID - - -def _extra_overwrite_sgrpID_from_parts( - parts: List[str], sgrpID: str, mapper: dict = _extra_sgrpID_name_mapper -) -> str: - for k, val in _extra_sgrpID_name_mapper.items(): - if k in parts: - sgrpID = val - return sgrpID - - -class PathParser(Path): - """ - This class parses the filepath of a file to a parse_result (dict), - from which the main raman file index will be built up. - - """ - - _flavour = type(Path())._flavour - - def __init__(self, *args, **kwargs): - super().__init__() - self._qcnm = self.__class__.__qualname__ - self.stats_ = None - self.data = None - self.parse_result = self.collect_parse_results(**kwargs) - - @staticmethod - def get_rfID_from_path(path: Path) -> str: - """ - Makes the ID from a filepath - - Parameters - ---------- - path : Path - DESCRIPTION. - - Returns - ------- - str: which contains hash(parent+suffix)_stem of path - - """ - - _parent_suffix_hash = hashlib.sha512( - (str(path.parent) + path.suffix).encode("utf-8") - ).hexdigest() - _filestem = path.stem - fnID = _parent_suffix_hash + "_" + _filestem - return fnID - - def collect_parse_results( - self, read_data=False, store_data=False, **kwargs - ) -> Dict: - """performs all the steps for parsing the filepath""" - parse_res_collect = {} - - if self.exists(): - if self.is_file(): - self.stats_ = self.stat() - - _fnID = self.make_dict_from_keys( - index_file_primary_keys, (self.get_rfID_from_path(self),) - ) - _filepath = self.make_dict_from_keys( - index_file_path_keys, (self.stem, self) - ) - _sample = self.parse_sample_with_checks() - _filestats = self.parse_filestats(self.stats_) - if read_data == True: - try: - self.data = SpectrumReader(self) - except Exception as exc: - logger.warning( - f"{self._qcnm} {self} SpectrumReader failed.\n{exc}" - ) - - parse_res_collect = {**_fnID, **_filepath, **_sample, **_filestats} - else: - logger.warning(f"{self._qcnm} {self} is not a file => skipped") - else: - logger.warning(f"{self._qcnm} {self} does not exist => skipped") - return parse_res_collect - - def parse_sample_with_checks(self): - """parse the sID, position and sgrpID from stem""" - - _parse_res = filestem_to_sid_and_pos(self.stem) - - if len(_parse_res) == 2: - sID, position = _parse_res - - try: - sID = _extra_overwrite_sID_from_mapper(sID) - except Exception as exc: - logger.info( - f"{self._qcnm} {self} _extra_overwrite_sID_from_mapper failed => skipped.\n{exc}" - ) - - sgrpID = sID_to_sgrpID(sID) - - try: - sgrpID = _extra_overwrite_sgrpID_from_parts(self.parts, sgrpID) - except Exception as exc: - logger.info( - f"{self._qcnm} {self} _extra_overwrite_sgrpID_from_parts failed => skipped.\n{exc}" - ) - - _parse_res = sID, position, sgrpID - else: - logger.warning( - f"{self._qcnm} {self} failed to parse filename to sID and position." - ) - return self.make_dict_from_keys(index_file_sample_keys, _parse_res) - - def parse_filestats(self, fstat) -> Dict: - """get status metadata from a file""" - - filestats = get_fstats(fstat) - return self.make_dict_from_keys(index_file_stat_keys, filestats) - - def make_dict_from_keys(self, _keys_attr: Dict, _result: tuple) -> Dict: - """returns dict from tuples of keys and results""" - if not isinstance(_result, tuple): - logger.warning( - f"{self._qcnm} input value is not a tuple, {_result}. Try to cast into tuple" - ) - _result = (_result,) - - _keys = _keys_attr.keys() - - if not len(_result) == len(_keys) and not isinstance(_keys, str): - # if len not matches make stand in numbered keys - _keys = [f"{_keys_attr}_{n}" for n, i in enumerate(_result)] - return dict(zip(_keys, _result)) diff --git a/src/raman_fitting/indexing/filename_parser_collector.py b/src/raman_fitting/indexing/filename_parser_collector.py deleted file mode 100644 index 04ba581..0000000 --- a/src/raman_fitting/indexing/filename_parser_collector.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Created on Sun Aug 8 18:20:20 2021 - -@author: DW -""" -from typing import List, Collection -import logging - -logger = logging.getLogger(__name__) - -from .filename_parser import PathParser - - -def make_collection(raman_files: Collection, **kwargs) -> List[PathParser]: - pp_collection = [] - for file in raman_files: - try: - pp_res = PathParser(file, **kwargs) - pp_collection.append(pp_res) - except Exception as e: - logger.warning( - f"{__name__} make_collection unexpected error for calling PathParser on\n{file}.\n{e}" - ) - pp_collection = sorted(pp_collection) - return pp_collection diff --git a/src/raman_fitting/indexing/filename_parser_helpers.py b/src/raman_fitting/indexing/filename_parser_helpers.py deleted file mode 100644 index 19f4757..0000000 --- a/src/raman_fitting/indexing/filename_parser_helpers.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Collection of method for parsing a filename""" -# -*- coding: utf-8 -*- - -import datetime -from typing import Tuple - - -__all__ = ["filestem_to_sid_and_pos", "sID_to_sgrpID", "get_fstats"] - - -def filestem_to_sid_and_pos(stem: str, seps=("_", " ", "-")) -> Tuple[str, str]: - """ - Parser for the filenames -> finds SampleID and sample position - - Parameters - ---------- - # ramanfile_stem : str - # The filepath which the is parsed - seps : tuple of str default - ordered collection of seperators tried for split - default : ('_', ' ', '-') - - Returns - ------- - tuple of strings - Collection of strings which contains the parsed elements. - """ - - split = None - first_sep_match_index = min( - [n for n, i in enumerate(seps) if i in stem], default=None - ) - first_sep_match = ( - seps[first_sep_match_index] if first_sep_match_index is not None else None - ) - split = stem.split(first_sep_match) - _lensplit = len(split) - - if _lensplit == 0: - sID, position = split[0], 0 - elif len(split) == 1: - sID, position = split[0], 0 - elif len(split) == 2: - sID = split[0] - _pos_strnum = "".join(i for i in split[1] if i.isnumeric()) - if _pos_strnum: - position = int(_pos_strnum) - else: - position = split[1] - elif len(split) >= 3: - sID = "_".join(split[0:-1]) - position = int("".join(filter(str.isdigit, split[-1]))) - return (sID, position) - - -def sID_to_sgrpID(sID: str, max_len=4) -> str: - """adding the extra sample Group key from sample ID""" - - _len = len(sID) - _maxalphakey = min( - [n for n, i in enumerate(sID) if not str(i).isalpha()], default=_len - ) - _maxkey = min((_len, _maxalphakey, max_len)) - sgrpID = "".join([i for i in sID[0:_maxkey] if i.isalpha()]) - return sgrpID - - -def get_fstats(fstat) -> Tuple: - """converting creation time and last mod time to datetime object""" - c_t = fstat.st_ctime - m_t = fstat.st_mtime - c_tdate, m_tdate = c_t, m_t - - try: - c_t = datetime.datetime.fromtimestamp(fstat.st_ctime) - m_t = datetime.datetime.fromtimestamp(fstat.st_mtime) - c_tdate = c_t.date() - m_tdate = m_t.date() - except OverflowError as e: - pass - except OSError as e: - pass - - return c_tdate, c_t, m_tdate, m_t, fstat.st_size diff --git a/src/raman_fitting/indexing/indexer.py b/src/raman_fitting/indexing/indexer.py deleted file mode 100644 index 062a3ce..0000000 --- a/src/raman_fitting/indexing/indexer.py +++ /dev/null @@ -1,334 +0,0 @@ -""" Indexer for raman data files """ -import logging -from pathlib import Path -import sys -from typing import List - -import pandas as pd - -from raman_fitting.config.filepath_helper import get_directory_paths_for_run_mode -from raman_fitting.indexing.filename_parser import index_dtypes_collection -from raman_fitting.indexing.filename_parser_collector import make_collection - -logger = logging.getLogger(__name__) -logger.propagate = False - -__all__ = ["MakeRamanFilesIndex"] - - -class MakeRamanFilesIndex: - """ - - Finds the RAMAN files in the data folder from config and creates an overview, on the attribute .index - finds a list of files, - - """ - - debug = False - - table_name = "ramanfiles" - - def __init__( - self, force_reload=True, run_mode="normal", dataset_dirs=None, **kwargs - ): - self._cqnm = self.__class__.__qualname__ - - self._kwargs = kwargs - self.force_reload = force_reload - self.run_mode = run_mode - - if not dataset_dirs: - dataset_dirs = get_directory_paths_for_run_mode(run_mode=self.run_mode) - - self.dataset_dirs = dataset_dirs - for k, val in self.dataset_dirs.items(): - if isinstance(val, Path): - setattr(self, k, val) - - self.raman_files = self.find_files(data_dir=self.DATASET_DIR) - self.index = pd.DataFrame() - self._error_parse_filenames = [] - if "normal" in run_mode and not self.debug and not self.force_reload: - self.index = self.load_index() - - else: - self.index = self.reload_index() - - self.index_selection = self.index_selection(self.index, **self._kwargs) - - @staticmethod - def find_files(data_dir: Path = Path()) -> List: - """ - Creates a list of all raman type files found in the DATASET_DIR which are used in the creation of the index. - """ - - if not isinstance(data_dir, Path): - logger.warning("find_files warning: arg is not Path.") - return [] - - raman_files_raw = [] - if data_dir.exists(): - RFs = data_dir.rglob("*txt") - if RFs: - raman_files_raw = [ - i - for i in RFs - if not "fail" in i.stem and not "Labjournal" in str(i) - ] - logger.info( - f"find_files {len(raman_files_raw)} files were found in the chosen data dir:\n\t{data_dir}" - ) - else: - logger.warning( - f"find_files warning: the chose data file dir was empty.\n{data_dir}\mPlease choose another directory which contains your data files." - ) - else: - logger.warning( - f"find_files warning: the chosen data file dir does not exists.\n{data_dir}\nPlease choose an existing directory which contains your data files." - ) - - return raman_files_raw - - def make_index(self): - """loops over the files and scrapes the index data from each file""" - raman_files = self.raman_files - pp_collection = make_collection(raman_files, **self._kwargs) - - index = pd.DataFrame([i.parse_result for i in pp_collection]) - index = self._extra_assign_destdir_and_set_paths(index) - logger.info( - f"{self._cqnm} successfully made index {len(index)} from {len(raman_files)} files" - ) - if self._error_parse_filenames: - logger.info( - f"{self._cqnm} errors for filename parser {len(self._error_parse_filenames)} from {len(raman_files)} files" - ) - return index - - def _extra_assign_destdir_and_set_paths(self, index: pd.DataFrame): - """assign the DestDir column to index and sets column values as object type""" - - if hasattr(index, "SampleGroup"): - index = index.assign( - **{ - "DestDir": [ - self.RESULTS_DIR.joinpath(sGrp) - for sGrp in index.SampleGroup.to_numpy() - ] - } - ) - _path_dtypes_map = { - k: val for k, val in index_dtypes_collection.items() if "Path" in val - } - for k, val in _path_dtypes_map.items(): - if hasattr(index, k): - if "Path" in val: - index[k] = [Path(i) for i in index[k].to_numpy()] - return index - - def export_index(self, index): - """saves the index to a defined Index file""" - if not index.empty: - if not self.INDEX_FILE.parent.exists(): - logger.info( - f"{self._cqnm} created parent dir: {self.INDEX_FILE.parent}" - ) - self.INDEX_FILE.parent.mkdir(exist_ok=True, parents=True) - - index.to_csv(self.INDEX_FILE) - - _dtypes = index.dtypes.to_frame("dtypes") - _dtypes.to_csv(self._dtypes_filepath()) - - logger.info( - f"{self._cqnm} Succesfully Exported Raman Index file to:\n\t{self.INDEX_FILE}\nwith len({len(index)})." - ) - else: - logger.info(f"{self._cqnm} Empty index not exported") - - def load_index(self): - """loads the index from from defined Index file""" - if self.INDEX_FILE.exists(): - try: - _dtypes = pd.read_csv(self._dtypes_filepath(), index_col=[0]).to_dict()[ - "dtypes" - ] - - _dtypes_datetime = { - k: val - for k, val in _dtypes.items() - if "datetime" in val or k.endswith("Date") - } - - _dtypes_no_datetime = { - k: val - for k, val in _dtypes.items() - if k not in _dtypes_datetime.keys() - } - - index = pd.read_csv( - self.INDEX_FILE, - index_col=[0], - dtype=_dtypes_no_datetime, - parse_dates=list(_dtypes_datetime.keys()), - ) - index = self._extra_assign_destdir_and_set_paths(index) - - logger.info( - f"Succesfully imported Raman Index file from {self.INDEX_FILE}, with len({len(index)})" - ) - if not len(self.index) == ( - len(self.raman_files) + len(self._error_parse_filenames) - ): - logger.error( - f"""'Error in load_index from {self.INDEX_FILE}, - \nlength of loaded index not same as number of raman files - \n starting reload index ... """ - ) - self.index = self.reload_index() - - except Exception as e: - logger.error( - f"Error in load_index from {self.INDEX_FILE},\n{e}\n starting reload index ... " - ) - index = self.reload_index() - else: - logger.error( - f"Error in load_index: {self.INDEX_FILE} does not exists, starting reload index ... " - ) - index = self.reload_index() - return index - - def reload_index(self): - """restarts the index creation from scratch and export.""" - logger.info(f"{self._cqnm} starting reload index.") - index = pd.DataFrame() - - try: - logger.info(f"{self._cqnm} making index.") - - try: - index = self.make_index() - except Exception as e: - logger.error(f"{self._cqnm} make index error:\n\t{e}") - - try: - self.export_index(index) - except Exception as e: - logger.error(f"{self._cqnm} export after make index error:\n\t{e}") - - except Exception as e: - logger.error(f"{self._cqnm} reload index error:\n\t{e}") - - return index - - def index_selection( - self, index=pd.DataFrame(), default_selection: str = "", **kwargs - ): - """ - Special selector on the index DataFrame - - Parameters - ------- - - index - pd.DataFrame containing the index of files - should contains columns that are given in index_file_sample_cols and index_file_stat_cols - default_selection str - all or '' for empty default - kwargs - checks for keywords suchs as samplegroups, sampleIDs, extra - meant for cli commands - - Returns - ------- - index_selection - pd.DataFrame with a selection from the given input parameter index - default returns empty DataFrame - - """ - - _kws = kwargs - _keys = _kws.keys() - - default_selection = _kws.get("default_selection", default_selection) - if not "normal" in _kws.get("run_mode", default_selection): - default_selection = "all" - index_selection = pd.DataFrame() - logger.info( - f"{self._cqnm} starting index selection from index({len(index)}) with:\n default selection: {default_selection}\n and {kwargs}" - ) - - if not index.empty: - if default_selection: - if default_selection == "all": - index_selection = index.copy() - - if "samplegroups" in _keys: - if _kws["samplegroups"]: - index_selection = index.loc[ - index.SampleGroup.str.contains("|".join(_kws["samplegroups"])) - ] - if "sampleIDs" in _keys: - index_selection = index.loc[ - index.SampleID.str.contains("|".join(_kws["sampleIDs"])) - ] - - if "extra" in _keys: - runq = _kws.get("run") - if "recent" in runq: - grp = index.sort_values( - "FileCreationDate", ascending=False - ).FileCreationDate.unique()[0] - - index_selection = index.loc[index.FileCreationDate == grp] - index_selection = index_selection.assign( - **{ - "DestDir": [ - Path(i).joinpath(grp.strftime("%Y-%m-%d")) - for i in index_selection.DestDir.values - ] - } - ) - - if "make_examples" in self.run_mode: - index_selection = index.loc[~index.SampleID.str.startswith("Si")] - - logger.debug( - f"{self._cqnm} finished index selection from index({len(index)}) with:\n {default_selection}\n and {kwargs}\n selection len({len(index_selection )})" - ) - else: - logger.warning(f"{self._cqnm} index selection index arg empty") - - if index_selection.empty: - logger.warning(f"{self._cqnm} index selection empty. exiting") - sys.exit() - - return index_selection - - def _dtypes_filepath(self): - _dtypes_filepath = self.INDEX_FILE.with_name( - self.INDEX_FILE.stem + "_dtypes" + self.INDEX_FILE.suffix - ) - return _dtypes_filepath - - def __repr__(self): - return f"{self._cqnm} with index ({len(self.index)})" - - def __len__(self): - return len(self.index) - - -def main(): - """test run for indexer""" - RamanIndex = None - try: - RamanIndex = MakeRamanFilesIndex(read_data=True, run_mode="make_examples") - - except Exception as e: - logger.error(f"Raman Index error: {e}") - return RamanIndex - - -if __name__ == "__main__": - RamanIndex = main() diff --git a/src/raman_fitting/interfaces/__init__.py b/src/raman_fitting/interfaces/__init__.py index 0e5bc4c..e69de29 100644 --- a/src/raman_fitting/interfaces/__init__.py +++ b/src/raman_fitting/interfaces/__init__.py @@ -1,4 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from .cli import main diff --git a/src/raman_fitting/interfaces/cli.py b/src/raman_fitting/interfaces/argparse_cli.py similarity index 53% rename from src/raman_fitting/interfaces/cli.py rename to src/raman_fitting/interfaces/argparse_cli.py index 19e95b3..9b545c9 100644 --- a/src/raman_fitting/interfaces/cli.py +++ b/src/raman_fitting/interfaces/argparse_cli.py @@ -3,17 +3,9 @@ import argparse - -RUN_MODES = ["normal", "testing", "debug", "make_index", "make_examples"] - -try: - import importlib.metadata - - _version = importlib.metadata.version("raman_fitting") -except ImportError: - _version = "version.not.found" - -_version_text = f"\n=== CLI raman_fitting version: {_version} ===\n" +from raman_fitting.config.path_settings import RunModes +from loguru import logger +from .utils import get_package_version def main(): @@ -27,16 +19,17 @@ def main(): parser.add_argument( "-M", + "-m", "--run-mode", - type=str, - choices=RUN_MODES, + type=RunModes, + # choices=, help="running mode of package, for testing", default="normal", ) parser.add_argument( "-sIDs", - "--sampleIDs", + "--sample_ids", nargs="+", default=[], help="Selection of names of SampleIDs from index to run over.", @@ -44,18 +37,23 @@ def main(): parser.add_argument( "-sGrps", - "--samplegroups", + "--sample_groups", nargs="+", default=[], help="Selection of names of sample groups from index to run over.", ) + parser.add_argument( + "--fit_model_specific_names", + nargs="+", + default=[], + help="Selection of names of the composite LMfit models to use for fitting.", + ) + parser.add_argument( "--version", - # action=print(_version_text), action="version", - version="%(prog)s {}".format(_version), - # const=_version_text, + version="%(prog)s {}".format(get_package_version()), help="Prints out the current version of the raman_fitting distribution, via importlib.metadata.version", ) @@ -65,17 +63,11 @@ def main(): # import the raman_fitting package import raman_fitting as rf - print(f"CLI args: {args}") - if args.run_mode == "normal": - pass - # _org_index = OrganizeRamanFiles() - # RL = RamanLoop(_org_index, run_mode ='normal') - elif args.run_mode.upper() == "DEBUG": - args.run_mode = args.run_mode.upper() - # IDEA Add a FAST TRACK for DEBUG - elif args.run_mode == "testing": - pass - - _main_run = rf.MainDelegator(**vars(args)) - - # return parser + extra_kwargs = {} + if args.run_mode == RunModes.EXAMPLES: + extra_kwargs.update( + {"fit_model_specific_names": ["2peaks", "3peaks", "4peaks"]} + ) + logger.info(f"Starting raman_fitting with CLI args:\n{args}") + kwargs = {**vars(args), **extra_kwargs} + _main_run = rf.MainDelegator(**kwargs) diff --git a/src/raman_fitting/interfaces/typer_cli.py b/src/raman_fitting/interfaces/typer_cli.py new file mode 100644 index 0000000..2fc568b --- /dev/null +++ b/src/raman_fitting/interfaces/typer_cli.py @@ -0,0 +1,125 @@ +from typing import List, Optional +from typing_extensions import Annotated + +from pathlib import Path +from enum import StrEnum, auto +from loguru import logger +from raman_fitting.config.path_settings import RunModes +from raman_fitting.delegating.main_delegator import MainDelegator +from raman_fitting.imports.files.file_indexer import initialize_index_from_source_files +from .utils import get_package_version + +import typer + + +class MakeTypes(StrEnum): + INDEX = auto() + CONFIG = auto() + EXAMPLE = auto() + + +__version__ = "0.1.0" + + +def version_callback(value: bool): + if value: + package_version = get_package_version() + typer_cli_version = f"Awesome Typer CLI Version: {__version__}" + print(f"{package_version}\n{typer_cli_version}") + raise typer.Exit() + + +app = typer.Typer() +state = {"verbose": False} + + +@app.command() +def run( + models: Annotated[ + List[str], + typer.Option( + default_factory=list, help="Selection of models to use for deconvolution." + ), + ], + sample_ids: Annotated[ + List[str], + typer.Option( + default_factory=list, + help="Selection of names of SampleIDs from index to run over.", + ), + ], + group_ids: Annotated[ + List[str], + typer.Option( + default_factory=list, + help="Selection of names of sample groups from index to run over.", + ), + ], + fit_models: Annotated[ + List[str], + typer.Option( + default_factory=list, + help="Selection of names of the composite LMfit models to use for fitting.", + ), + ], + run_mode: Annotated[RunModes, typer.Argument()] = RunModes.NORMAL, + multiprocessing: Annotated[bool, typer.Option("--multiprocessing")] = False, +): + if run_mode is None: + print("No make run mode passed") + raise typer.Exit() + kwargs = {"run_mode": run_mode, "use_multiprocessing": multiprocessing} + if run_mode == RunModes.EXAMPLES: + kwargs.update( + { + "fit_model_specific_names": [ + "2peaks", + "3peaks", + "4peaks", + "2nd_4peaks", + ], + "sample_groups": ["test"], + } + ) + logger.info(f"Starting raman_fitting with CLI args:\n{run_mode}") + _main_run = MainDelegator(**kwargs) + + +@app.command() +def make( + make_type: Annotated[MakeTypes, typer.Argument()], + source_files: Annotated[List[Path], typer.Option()], + index_file: Annotated[Path, typer.Option()] = None, + force_reindex: Annotated[bool, typer.Option("--force-reindex")] = False, +): + if make_type is None: + print("No make type args passed") + raise typer.Exit() + if index_file: + index_file = index_file.resolve() + if make_type == MakeTypes.INDEX: + initialize_index_from_source_files( + files=source_files, index_file=index_file, force_reindex=force_reindex + ) + + elif make_type == MakeTypes.CONFIG: + pass # make config + + +@app.callback() +def main( + verbose: bool = False, + version: Annotated[ + Optional[bool], typer.Option("--version", callback=version_callback) + ] = None, +): + """ + Manage raman_fitting in the awesome CLI app. + """ + if verbose: + print("Will write verbose output") + state["verbose"] = True + + +if __name__ == "__main__": + app() diff --git a/src/raman_fitting/interfaces/utils.py b/src/raman_fitting/interfaces/utils.py new file mode 100644 index 0000000..22715bc --- /dev/null +++ b/src/raman_fitting/interfaces/utils.py @@ -0,0 +1,10 @@ +def get_package_version() -> str: + try: + import importlib.metadata + + _version = importlib.metadata.version("raman_fitting") + except ImportError: + _version = "version.not.found" + + _version_text = f"raman_fitting version: {_version}" + return _version_text diff --git a/src/raman_fitting/models/__init__.py b/src/raman_fitting/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/raman_fitting/models/deconvolution/__init__.py b/src/raman_fitting/models/deconvolution/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/raman_fitting/models/deconvolution/base_model.py b/src/raman_fitting/models/deconvolution/base_model.py new file mode 100644 index 0000000..62f7809 --- /dev/null +++ b/src/raman_fitting/models/deconvolution/base_model.py @@ -0,0 +1,163 @@ +"""The members of the validated collection of BasePeaks are assembled here into fitting Models""" + +import logging +from typing import Optional, Dict +from warnings import warn + +from lmfit.models import Model as LMFitModel +from pydantic import ( + BaseModel, + Field, + ConfigDict, + model_validator, +) + + +from raman_fitting.models.deconvolution.base_peak import ( + BasePeak, + get_peaks_from_peak_definitions, +) +from raman_fitting.models.deconvolution.lmfit_parameter import ( + construct_lmfit_model_from_components, +) +from raman_fitting.models.splitter import RegionNames + +logger = logging.getLogger(__name__) + +SUBSTRATE_PEAK = "Si1_peak" +SEP = "+" +SUFFIX = "_" + + +class BaseLMFitModelWarning(UserWarning): + pass + + +class BaseLMFitModel(BaseModel): + """ + This Model class combines the collection of valid peaks from BasePeak into a regression model + of type lmfit.model.CompositeModel + that is compatible with the lmfit Model and fit functions. + The model_name, include_substrate and lmfit_model attributes are kept + consistent w.r.t. their meaning when they are set. + + Parameters + -------- + verbose_name: string ==> is converted to lmfit Model object + include_substrate: bool ==> toggle between True and False to include a substrate peak + + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + name: str + peaks: str + peak_collection: Dict[str, BasePeak] = Field( + default_factory=get_peaks_from_peak_definitions, + validate_default=True, + repr=False, + ) + lmfit_model: LMFitModel = Field(None, init_var=False, repr=False) + region_name: RegionNames + + @property + def has_substrate(self): + if not self.lmfit_model.components: + return False + comps = set(map(lambda x: x.prefix, self.lmfit_model.components)) + substrate_comps = set( + [i.lmfit_model.prefix for i in self.substrate_peaks.values()] + ) + return substrate_comps.issubset(comps) + + def add_substrate(self): + if self.has_substrate: + warn( + f"{self.__class__.__name__} already has substrate.", + BaseLMFitModelWarning, + ) + return + + for name in self.substrate_peaks.keys(): + self.peaks += SEP + name + self.check_lmfit_model() + + def remove_substrate(self): + if not self.has_substrate: + warn( + f"{self.__class__.__name__} has no substrate to remove.", + BaseLMFitModelWarning, + ) + return + _peaks = self.peaks.split(SEP) + for name in self.substrate_peaks.keys(): + _peaks.remove(name) + self.peaks = SEP.join(_peaks) + self.check_lmfit_model() + + @property + def substrate_peaks(self): + return {k: val for k, val in self.peak_collection.items() if val.is_substrate} + + @model_validator(mode="after") + def check_peaks_in_peak_collection(self) -> "BaseLMFitModel": + peak_names_split = self.peaks.split(SEP) + default_peak_names = self.peak_collection.keys() + valid_peaks = set(peak_names_split).union(set(default_peak_names)) + assert valid_peaks + new_peak_names = SEP.join([i for i in peak_names_split if i in valid_peaks]) + self.peaks = new_peak_names + return self + + @model_validator(mode="after") + def check_lmfit_model(self) -> "BaseLMFitModel": + lmfit_model = construct_lmfit_model(self.peaks, self.peak_collection) + self.lmfit_model = lmfit_model + return self + + +def construct_lmfit_model( + peaks: str, peak_collection: Dict[str, BasePeak] +) -> LMFitModel: + peak_names = peaks.split(SEP) + base_peaks = [peak_collection[i] for i in peak_names if i in peak_collection] + if not base_peaks: + raise ValueError(f"Could not find matching peaks for {peaks}") + base_peaks_lmfit = [i.lmfit_model for i in base_peaks] + lmfit_model = construct_lmfit_model_from_components(base_peaks_lmfit) + return lmfit_model + + +def get_models_and_peaks_from_definitions( + models_and_peaks_definitions: Optional[Dict] = None, +) -> Dict[str, Dict[str, BaseLMFitModel]]: + peak_collection = get_peaks_from_peak_definitions( + peak_definitions=models_and_peaks_definitions + ) + models_settings = { + k: val.get("models") + for k, val in models_and_peaks_definitions.items() + if "models" in val + } + all_models = {} + for region_name, region_model_settings in models_settings.items(): + if region_model_settings is None: + continue + all_models[region_name] = {} + for model_name, model_peaks in region_model_settings.items(): + all_models[region_name][model_name] = BaseLMFitModel( + name=model_name, + peaks=model_peaks, + peak_collection=peak_collection, + region_name=region_name, + ) + return all_models + + +def main(): + models = get_models_and_peaks_from_definitions() + print("Models: ", len(models)) + + +if __name__ == "__main__": + main() diff --git a/src/raman_fitting/models/deconvolution/base_peak.py b/src/raman_fitting/models/deconvolution/base_peak.py new file mode 100644 index 0000000..4649b34 --- /dev/null +++ b/src/raman_fitting/models/deconvolution/base_peak.py @@ -0,0 +1,219 @@ +from enum import StrEnum +from typing import List, Optional, Dict + +from pydantic import ( + BaseModel, + ConfigDict, + InstanceOf, + Field, + field_validator, + model_validator, +) +from lmfit import Parameters +from lmfit.models import Model + +from raman_fitting.models.deconvolution.lmfit_parameter import ( + LMFIT_MODEL_MAPPER, + LMFitParameterHints, + parmeter_to_dict, +) +from raman_fitting.config.default_models import load_config_from_toml_files +from raman_fitting.utils.string_operations import prepare_text_from_param + +ParamHintDict = Dict[str, Dict[str, Optional[float | bool | str]]] + + +class BasePeakWarning(UserWarning): # pragma: no cover + pass + + +PEAK_TYPE_OPTIONS = StrEnum("PEAK_TYPE_OPTIONS", ["Lorentzian", "Gaussian", "Voigt"]) + + +def get_lmfit_model_from_peak_type(peak_type: str, prefix: str = "") -> Optional[Model]: + """returns the lmfit model instance according to the chosen peak type and sets the prefix from peak_name""" + model = None + + capitalized = peak_type.capitalize() + try: + lmfit_model_class = LMFIT_MODEL_MAPPER[capitalized] + model = lmfit_model_class(prefix=prefix) + except IndexError: + raise NotImplementedError( + f'This peak type or model "{peak_type}" has not been implemented.' + ) + return model + + +class BasePeak(BaseModel): + """ + -------- + Example usage + -------- + Base class for easier definition of typical intensity peaks found in the + raman spectra. + + The go al of is this metaclass is to be able to more easily write + peak class definitions (for possible user input). It tries to find three + fields in the definition, which are requiredfor a LMfit model creation, + namely: peak_name, peak_type and the param hints. + + peak_name: + arbitrary name as prefix for the peak + peak_type: + defines the lineshape of the peak, the following options are implemented: + "Lorentzian", "Gaussian", "Voigt" + params_hints: + initial values for the parameters of the peak, at least + a value for the center position of the peak should be given. + + It tries to find these fields in different sources such as: the class definition + with only class attributes, init attributes or even in the keywords arguments. + The FieldsTracker class instance (fco) keeps track of the definition in different + sources and can check when all are ready. If there are multiple sources with definitions + for the same field than the source with highest priority will be chosen (based on tuple order). + Each field is a propery which validates the assigments. + + Sort of wrapper for lmfit.model definition. + Several of these peaks combined are used to make the lmfit CompositeModel + (composed in the fit_models module), which will be used for the fit. + + -------- + Example usage + -------- + + "Example class definition with attribute definitions" + class New_peak(metaclass=BasePeak): + "New peak child class for easier definition" + + param_hints = { 'center': {'value': 2435,'min': 2400, 'max': 2550}} + peak_type = 'Voigt' #'Voigt' + peak_name ='R2D2' + + New_peak().lmfit_model == + + "Example class definition with keyword arguments" + + New_peak = BasePeak('new', + peak_name='D1', + peak_type= 'Lorentzian', + param_hints = { 'center': {'value': 1500}} + ) + New_peak() + """ + + model_config = ConfigDict(arbitrary_types_allowed=True, from_attributes=True) + + peak_name: str + param_hints: Optional[Parameters | List[LMFitParameterHints] | ParamHintDict] = None + peak_type: Optional[str] = None + is_substrate: Optional[bool] = False + is_for_normalization: Optional[bool] = False + docstring: Optional[str] = Field(None, repr=False) + lmfit_model: Optional[InstanceOf[Model]] = None + + @field_validator("peak_type") + @classmethod + def check_peak_type(cls, v: Optional[str]) -> Optional[str]: + if v is None: + return v + if isinstance(v, str): + try: + v = PEAK_TYPE_OPTIONS[v].name + return v + except KeyError: + raise KeyError( + f"peak_type is not in {map(lambda x: x.name, PEAK_TYPE_OPTIONS)}, but {v}" + ) + elif isinstance(v, PEAK_TYPE_OPTIONS): + v = v.name + return v + else: + raise TypeError(f"peak_type is not a string or enum, but {type(v)}") + + @field_validator("param_hints") + @classmethod + def check_param_hints( + cls, v: Optional[Parameters | List[LMFitParameterHints] | ParamHintDict] + ) -> Optional[Parameters]: + if v is None: + return v + if isinstance(v, Parameters): + return v + + if isinstance(v, dict): + valid_p_hints = [LMFitParameterHints(name=k, **val) for k, val in v.items()] + + if isinstance(v, list): + assert all(isinstance(i, LMFitParameterHints) for i in v) + + pars_hints = [i.parameter for i in valid_p_hints] + params = Parameters() + params.add_many(*pars_hints) + return params + + @model_validator(mode="after") + def check_lmfit_model(self) -> "BasePeak": + if self.lmfit_model is not None: + if isinstance(self.lmfit_model, Model): + return self + else: + raise ValueError( + f"lmfit_model is not a Model instance, but {type(self.lmfit_model)}" + ) + peak_type = self.peak_type + if peak_type is None: + raise ValueError("peak_type is None") + + lmfit_model = get_lmfit_model_from_peak_type( + peak_type, prefix=self.peak_name_prefix + ) + if lmfit_model is None: + raise ValueError("lmfit_model is None") + + if self.param_hints is not None: + for k, v in self.param_hints.items(): + par_dict = parmeter_to_dict(v) + lmfit_model.set_param_hint(k, **par_dict) + self.lmfit_model = lmfit_model + return self + + @property + def peak_name_prefix(self): + if not self.peak_name: + return "" + if self.peak_name.endswith("_"): + return self.peak_name + return self.peak_name + "_" + + def __str__(self): + _repr = f"{self.__class__.__name__}('{self.peak_name}'" + if self.lmfit_model is None: + _repr += ": no Model set" + _repr += f", {self.lmfit_model}" + param_text = make_string_from_param_hints(self.param_hints) + _repr += f"{param_text})" + return _repr + + +def make_string_from_param_hints(param_hints: Parameters) -> str: + param_center = param_hints.get("center", {}) + text = prepare_text_from_param(param_center) + return text + + +def get_peaks_from_peak_definitions( + peak_definitions: Optional[Dict] = None, +) -> Dict[str, BasePeak]: + if peak_definitions is None: + peak_definitions = load_config_from_toml_files() + peak_settings = { + k: val.get("peaks") for k, val in peak_definitions.items() if "peaks" in val + } + peak_models = {} + for peak_type, peak_type_defs in peak_settings.items(): + if peak_type_defs is None: + continue + for peak_name, peak_def in peak_type_defs.items(): + peak_models[peak_name] = BasePeak(**peak_def) + return peak_models diff --git a/src/raman_fitting/models/deconvolution/init_models.py b/src/raman_fitting/models/deconvolution/init_models.py new file mode 100644 index 0000000..6a0e7e6 --- /dev/null +++ b/src/raman_fitting/models/deconvolution/init_models.py @@ -0,0 +1,57 @@ +from dataclasses import dataclass, field +import logging +from typing import Dict + +from raman_fitting.config.default_models import load_config_from_toml_files +from raman_fitting.models.deconvolution.base_model import ( + get_models_and_peaks_from_definitions, +) +from .base_model import BaseLMFitModel + +logger = logging.getLogger(__name__) + + +@dataclass +class InitializeModels: + """ + This class will initialize and validate the different fitting models. + The models are of type lmfit.model.CompositeModel and stored in a dict with names + for the models as keys. + """ + + model_definitions: dict = field(default_factory=dict) + peaks: dict = field(default_factory=dict) + lmfit_models: Dict[str, Dict[str, BaseLMFitModel]] | None = None + + def __post_init__(self): + self.model_definitions = self.model_definitions or {} + self.peaks = self.peaks or {} + self.lmfit_models = self.lmfit_models or {} + if not self.model_definitions: + self.model_definitions = load_config_from_toml_files() + if not self.lmfit_models and self.model_definitions: + self.lmfit_models = get_models_and_peaks_from_definitions( + self.model_definitions + ) + + def __repr__(self): + _t = ", ".join(map(str, self.lmfit_models.keys())) + _t += "\n" + _t += "\n".join(map(str, self.lmfit_models.values())) + return _t + + +def main(): + from raman_fitting.config.default_models import ( + load_config_from_toml_files, + ) + + model_definitions = load_config_from_toml_files() + print("model_definitions: ", model_definitions) + models = InitializeModels() + print(models) + # breakpoint() + + +if __name__ == "__main__": + main() diff --git a/src/raman_fitting/models/deconvolution/lmfit_parameter.py b/src/raman_fitting/models/deconvolution/lmfit_parameter.py new file mode 100644 index 0000000..a7636c0 --- /dev/null +++ b/src/raman_fitting/models/deconvolution/lmfit_parameter.py @@ -0,0 +1,171 @@ +import math +from enum import StrEnum +from typing import List, Optional, Dict +from warnings import warn + +from lmfit import Parameter +from lmfit.models import GaussianModel, LorentzianModel, Model, VoigtModel + +from pydantic import ( + BaseModel, + ConfigDict, + Field, + model_validator, +) + + +param_hint_dict = Dict[str, Dict[str, Optional[float | bool | str]]] + + +class BasePeakWarning(UserWarning): # pragma: no cover + pass + + +PEAK_TYPE_OPTIONS = StrEnum("PEAK_TYPE_OPTIONS", ["Lorentzian", "Gaussian", "Voigt"]) + +LMFIT_PARAM_KWARGS = ("value", "vary", "min", "max", "expr") + + +LMFIT_MODEL_MAPPER = { + "Lorentzian": LorentzianModel, + "Gaussian": GaussianModel, + "Voigt": VoigtModel, +} + + +class LMFitParameterHints(BaseModel): + """ + https://github.com/lmfit/lmfit-py/blob/master/lmfit/model.py#L566 + + The given hint can include optional bounds and constraints + ``(value, vary, min, max, expr)``, which will be used by + `Model.make_params()` when building default parameters. + + While this can be used to set initial values, `Model.make_params` or + the function `create_params` should be preferred for creating + parameters with initial values. + + The intended use here is to control how a Model should create + parameters, such as setting bounds that are required by the mathematics + of the model (for example, that a peak width cannot be negative), or to + define common constrained parameters. + + Parameters + ---------- + name : str + Parameter name, can include the models `prefix` or not. + **kwargs : optional + Arbitrary keyword arguments, needs to be a Parameter attribute. + Can be any of the following: + + - value : float, optional + Numerical Parameter value. + - vary : bool, optional + Whether the Parameter is varied during a fit (default is + True). + - min : float, optional + Lower bound for value (default is ``-numpy.inf``, no lower + bound). + - max : float, optional + Upper bound for value (default is ``numpy.inf``, no upper + bound). + - expr : str, optional + Mathematical expression used to constrain the value during + the fit. + + Example + -------- + >>> model = GaussianModel() + >>> model.set_param_hint('sigma', min=0) + + """ + + model_config = ConfigDict(arbitrary_types_allowed=True, from_attributes=True) + + name: str + value: Optional[float] + vary: Optional[bool] = True + min: Optional[float] = Field(-math.inf, allow_inf_nan=True) + max: Optional[float] = Field(math.inf, allow_inf_nan=True) + expr: Optional[str] = None + parameter: Optional[Parameter] = Field(None, exclude=True) + + @model_validator(mode="after") + def check_min_max(self) -> "LMFitParameterHints": + min_, max_ = self.min, self.max + if min_ is not None and max_ is not None and min_ > max_: + raise ValueError("Min must be less than max") + return self + + @model_validator(mode="after") + def check_value_min_max(self) -> "LMFitParameterHints": + value, min_, max_ = self.value, self.min, self.max + if value is None: + raise ValueError("Value must not be None") + if min_ is not None: + assert value >= min_ + if max_ is not None: + assert value <= max_ + if max_ and min_: + assert min_ <= value <= max_ + assert min_ < max_ + return self + + @model_validator(mode="after") + def check_construct_parameter(self) -> "LMFitParameterHints": + if self.parameter is None: + self.parameter = Parameter( + name=self.name, + value=self.value, + vary=self.vary, + min=self.min, + max=self.max, + expr=self.expr, + ) + return self + + +def construct_lmfit_model_from_components( + models: List[Model], sort_on_center=True +) -> "Model": + """ + Construct the lmfit model from a collection of (known) peaks + """ + if not models: + raise ValueError("No peaks given to construct lmfit model from.") + if sort_on_center: + models = sort_lmfit_models(models) + lmfit_composite_model = sum(models, models.pop()) + return lmfit_composite_model + + +def sort_lmfit_models( + models: List[Model], key: str = "center", reverse: bool = False +) -> List[Model]: + try: + sorted_models = sorted( + models, key=lambda x: x.param_hints[key]["value"], reverse=reverse + ) + except KeyError: + warn(f"Sorting on model on key {key} failed") + return sorted_models + + +def parmeter_to_dict(parameter: Parameter) -> dict: + ret = {k: getattr(parameter, k) for k in LMFIT_PARAM_KWARGS} + ret = {k: v for k, v in ret.items() if v is not None} + return ret + + +DEFAULT_GAMMA_PARAM_HINT = LMFitParameterHints( + name="gamma", value=1, min=1e-05, max=70, vary=False +) + + +def main(): + pass + # breakpoint() + + +if __name__ == "__main__": + main() diff --git a/src/raman_fitting/models/deconvolution/spectrum_regions.py b/src/raman_fitting/models/deconvolution/spectrum_regions.py new file mode 100644 index 0000000..a64a8bf --- /dev/null +++ b/src/raman_fitting/models/deconvolution/spectrum_regions.py @@ -0,0 +1,24 @@ +from enum import StrEnum +from typing import Dict + +from pydantic import BaseModel +from raman_fitting.config.default_models import load_config_from_toml_files + + +def get_default_regions_from_toml_files() -> Dict[str, Dict[str, float]]: + default_regions = ( + load_config_from_toml_files().get("spectrum", {}).get("regions", {}) + ) + return default_regions + + +RegionNames = StrEnum( + "RegionNames", " ".join(get_default_regions_from_toml_files()), module=__name__ +) + + +class SpectrumRegionLimits(BaseModel): + name: RegionNames + min: int + max: int + extra_margin: int = 20 diff --git a/src/raman_fitting/models/fit_models.py b/src/raman_fitting/models/fit_models.py new file mode 100644 index 0000000..d8a5456 --- /dev/null +++ b/src/raman_fitting/models/fit_models.py @@ -0,0 +1,67 @@ +from typing import Dict +import time + +from pydantic import BaseModel, model_validator, Field, ConfigDict +from lmfit import Model as LMFitModel +from lmfit.model import ModelResult + +from raman_fitting.models.deconvolution.base_model import BaseLMFitModel +from raman_fitting.models.deconvolution.spectrum_regions import RegionNames +from raman_fitting.models.post_deconvolution.calculate_params import ( + calculate_ratio_of_unique_vars_in_results, +) + +from raman_fitting.models.spectrum import SpectrumData + + +class SpectrumFitModel(BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + + spectrum: SpectrumData + model: BaseLMFitModel + region: RegionNames + fit_kwargs: Dict = Field(default_factory=dict, repr=False) + fit_result: ModelResult = Field(None, init_var=False) + param_results: Dict = Field(default_factory=dict) + elapsed_time: float = Field(0, init_var=False, repr=False) + + @model_validator(mode="after") + def match_region_names(self) -> "SpectrumFitModel": + model_region = self.model.region_name + spec_region = self.spectrum.region_name + if model_region != spec_region: + raise ValueError( + f"Region names do not match {model_region} and {spec_region}" + ) + return self + + def run_fit(self) -> None: + if "method" not in self.fit_kwargs: + self.fit_kwargs["method"] = "leastsq" + lmfit_model = self.model.lmfit_model + start_time = time.time() + fit_result = call_fit_on_model(lmfit_model, self.spectrum, **self.fit_kwargs) + end_time = time.time() + elapsed_seconds = abs(start_time - end_time) + self.elapsed_time = elapsed_seconds + self.fit_result = fit_result + self.post_process() + + def post_process(self): + if not self.fit_result: + return + param_results = self.fit_result.params.valuesdict() + params_ratio_vars = calculate_ratio_of_unique_vars_in_results( + param_results, raise_exception=False + ) + self.param_results["ratios"] = params_ratio_vars + + +def call_fit_on_model( + model: LMFitModel, spectrum: SpectrumData, method="leastsq", **kwargs +) -> ModelResult: + # ideas: improve fitting loop so that starting parameters from modelX and modelX+Si are shared, faster... + init_params = model.make_params() + x, y = spectrum.ramanshift, spectrum.intensity + out = model.fit(y, init_params, x=x, method=method, **kwargs) # 'leastsq' + return out diff --git a/src/raman_fitting/models/post_deconvolution/__init__.py b/src/raman_fitting/models/post_deconvolution/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/raman_fitting/models/post_deconvolution/calculate_params.py b/src/raman_fitting/models/post_deconvolution/calculate_params.py new file mode 100644 index 0000000..2222402 --- /dev/null +++ b/src/raman_fitting/models/post_deconvolution/calculate_params.py @@ -0,0 +1,55 @@ +from inspect import getmembers, isfunction +from typing import Dict, Any + +from raman_fitting.models.post_deconvolution import parameter_ratio_funcs + +RATIO_FUNC_PREFIX = "ratio_" +functions = [ + fn + for _, fn in getmembers(parameter_ratio_funcs, isfunction) + if fn.__module__ == parameter_ratio_funcs.__name__ +] +ratio_funcs = list( + filter(lambda x: x.__name__.startswith(RATIO_FUNC_PREFIX), functions) +) + + +def calculate_params_from_results( + combined_results: Dict, + var_name: str, + prefix: str | None = None, + raise_exception=True, +) -> dict[str, dict[str, Any]]: + results = {} + for ratio_func in ratio_funcs: + try: + label, ratio = ratio_func(combined_results, var_name, prefix=prefix) + func = ratio_func.__name__ + results[func] = {"label": label, "ratio": ratio} + except (ValueError, KeyError) as e: + if raise_exception: + raise e from e + continue + return results + + +def calculate_ratio_of_unique_vars_in_results( + results: Dict, raise_exception: bool = True +) -> dict[Any, dict[str, dict[str, Any]]]: + uniq_vars = set(i.split("_")[-1] for i in results.keys()) + var_ratios = {} + for var_name in uniq_vars: + ratios = calculate_params_from_results( + results, var_name, raise_exception=raise_exception + ) + var_ratios[var_name] = ratios + return var_ratios + + +def main(): + print(functions) + print(list(map(str, ratio_funcs))) + + +if __name__ == "__main__": + main() diff --git a/src/raman_fitting/models/post_deconvolution/parameter_ratio_funcs.py b/src/raman_fitting/models/post_deconvolution/parameter_ratio_funcs.py new file mode 100644 index 0000000..5c8e5ae --- /dev/null +++ b/src/raman_fitting/models/post_deconvolution/parameter_ratio_funcs.py @@ -0,0 +1,129 @@ +from typing import Tuple, List, Dict +from functools import wraps + +from raman_fitting.utils.decorators import decorator_with_kwargs +from raman_fitting.utils.string_operations import join_prefix_suffix + + +def validate_result(result, var_name: str, requires: List[str] | None = None): + req_vars = {join_prefix_suffix(i, var_name) for i in requires} + provided_vars = {join_prefix_suffix(i, var_name) for i in result.keys()} + if provided_vars < req_vars: + raise ValueError( + f"Missing required vars {req_vars} in result: {', '.join(result.keys())}" + ) + + +@decorator_with_kwargs +def calculate_ratio(function, requires: List[str] | None = None): + @wraps(function) + def wrapper(result, var_name: str, prefix: str | None = None, **kwargs): + validate_result(result, var_name, requires=requires) + prefix = prefix or "" + return function(result, var_name, prefix=prefix) + + return wrapper + + +def get_var(peak: str, result: Dict, var_name: str): + return result[join_prefix_suffix(peak.upper(), var_name)] + + +@calculate_ratio(requires=["D", "G"]) +def ratio_d_to_g(result, var_name: str, prefix: str | None = None) -> Tuple[str, float]: + d_ = get_var("D", result, var_name) + g_ = get_var("G", result, var_name) + ratio = d_ / g_ + label = f"{prefix}D/{prefix}G" + return label, ratio + + +@calculate_ratio(requires=["D", "G"]) +def ratio_la_d_to_g( + result, var_name: str, prefix: str | None = None +) -> Tuple[str, float]: + ratio = 4.4 * (ratio_d_to_g(result, var_name, prefix=prefix)[-1]) ** -1 + label = f"La_{prefix}G" + return label, ratio + + +@calculate_ratio(requires=["D", "G", "D2"]) +def ratio_d_to_gplusd2( + result, var_name: str, prefix: str | None = None +) -> Tuple[str, float] | None: + d = get_var("D", result, var_name) + g = get_var("G", result, var_name) + d2 = get_var("D2", result, var_name) + ratio = d / (g + d2) + label = f"{prefix}D/({prefix}G+{prefix}D2)" + return label, ratio + + +@calculate_ratio(requires=["D", "G", "D2"]) +def ratio_la_d_to_gplusd2( + result, var_name: str, prefix: str | None = None +) -> Tuple[str, float]: + ratio = 4.4 * (ratio_d_to_gplusd2(result, var_name, prefix=prefix)[-1]) ** -1 + label = (f"La_{prefix}G+D2",) + return label, ratio + + +@calculate_ratio(requires=["D2", "G", "D3"]) +def ratio_d3_to_gplusd2( + result, var_name: str, prefix: str | None = None +) -> Tuple[str, float] | None: + d2 = get_var("D2", result, var_name) + d3 = get_var("D3", result, var_name) + g = get_var("G", result, var_name) + ratio = d3 / (g + d2) + label = f"{prefix}D3/({prefix}G+{prefix}D2" + return label, ratio + + +@calculate_ratio(requires=["D3", "G"]) +def ratio_d3_to_g( + result, var_name: str, prefix: str | None = None +) -> Tuple[str, float] | None: + d3 = get_var("D3", result, var_name) + g = get_var("G", result, var_name) + ratio = d3 / g + label = f"{prefix}D3/{prefix}G" + return label, ratio + + +@calculate_ratio(requires=["D4", "G"]) +def ratio_d4_to_g( + result, var_name: str, prefix: str | None = None +) -> Tuple[str, float] | None: + d4 = get_var("D4", result, var_name) + g = get_var("G", result, var_name) + ratio = d4 / g + label = f"{prefix}D4/{prefix}G" + return label, ratio + + +@calculate_ratio(requires=["D1D1", "D"]) +def ratio_d1d1_to_d(result, var_name: str, prefix: str | None = None): + d1d1 = get_var("D1D1", result, var_name) + d = get_var("D", result, var_name) + ratio = 8.8 * d1d1 / d + label = f"Leq_{prefix}" + return label, ratio + + +@calculate_ratio(requires=["D1D1", "GD1"]) +def ratio_d1d1_to_gd1( + result, var_name: str, prefix: str | None = None +) -> Tuple[str, float]: + d1d1 = get_var("D1D1", result, var_name) + gd1 = get_var("GD1", result, var_name) + ratio = d1d1 / gd1 + label = f"{prefix}D1D1/{prefix}GD1" + + return label, ratio + + +if __name__ == "__main__": + result = {"D_peak": 1, "G_peak": 2, "D1D1_peak": 3} + var_name = "peak" + print(ratio_d_to_g(result, var_name)) diff --git a/src/raman_fitting/models/spectrum.py b/src/raman_fitting/models/spectrum.py new file mode 100644 index 0000000..0c2d804 --- /dev/null +++ b/src/raman_fitting/models/spectrum.py @@ -0,0 +1,46 @@ +from typing import Sequence +import numpy as np + +from pydantic import ( + BaseModel, + FilePath, + AwareDatetime, + model_validator, + Field, +) +import pydantic_numpy.typing as pnd + + +class SpectrumData(BaseModel): + ramanshift: pnd.Np1DArrayFp32 = Field(repr=False) + intensity: pnd.Np1DArrayFp32 = Field(repr=False) + label: str + region_name: str | None = None + source: FilePath | Sequence[FilePath] | str | Sequence[str] | None = None + + @model_validator(mode="after") + def validate_equal_length(self): + if len(self.ramanshift) != len(self.intensity): + raise ValueError("Spectrum arrays are not of equal length.") + return self + + @model_validator(mode="after") + def check_if_contains_nan(self): + if np.isnan(self.ramanshift).any(): + raise ValueError("Ramanshift contains NaN") + + if np.isnan(self.intensity).any(): + raise ValueError("Intensity contains NaN") + return self + + # length is derived property + def __len__(self): + return len(self.ramanshift) + + +class SpectrumMetaData(BaseModel): + sample_id: str + sample_group: str + sample_position: str + creation_date: AwareDatetime + source_file: FilePath # FileStem is derived diff --git a/src/raman_fitting/models/splitter.py b/src/raman_fitting/models/splitter.py new file mode 100644 index 0000000..a2a080e --- /dev/null +++ b/src/raman_fitting/models/splitter.py @@ -0,0 +1,89 @@ +from typing import Dict, Any +import numpy as np + +from pydantic import BaseModel, model_validator, Field +from .spectrum import SpectrumData +from .deconvolution.spectrum_regions import ( + SpectrumRegionLimits, + RegionNames, + get_default_regions_from_toml_files, +) + + +class SplitSpectrum(BaseModel): + spectrum: SpectrumData + region_limits: Dict[str, SpectrumRegionLimits] = Field(None, init_var=None) + spec_regions: Dict[str, SpectrumData] = Field(None, init_var=None) + info: Dict[str, Any] = Field(default_factory=dict) + + @model_validator(mode="after") + def process_spectrum(self) -> "SplitSpectrum": + if self.region_limits is None: + region_limits = get_default_spectrum_region_limits() + self.region_limits = region_limits + + if self.spec_regions is not None: + return self + spec_regions = split_spectrum_data_in_regions( + self.spectrum.ramanshift, + self.spectrum.intensity, + spec_region_limits=self.region_limits, + label=self.spectrum.label, + source=self.spectrum.source, + ) + self.spec_regions = spec_regions + return self + + def get_region(self, region_name: RegionNames): + region_name = RegionNames(region_name) + spec_region_keys = [ + i for i in self.spec_regions.keys() if region_name.name in i + ] + if len(spec_region_keys) != 1: + raise ValueError(f"Key {region_name} not in {spec_region_keys}") + spec_region_key = spec_region_keys[0] + return self.spec_regions[spec_region_key] + + +def get_default_spectrum_region_limits( + regions_mapping: Dict = None, +) -> Dict[str, SpectrumRegionLimits]: + if regions_mapping is None: + regions_mapping = get_default_regions_from_toml_files() + regions = {} + for region_name, region_config in regions_mapping.items(): + regions[region_name] = SpectrumRegionLimits(name=region_name, **region_config) + return regions + + +def split_spectrum_data_in_regions( + ramanshift: np.array, + intensity: np.array, + spec_region_limits=None, + label=None, + source=None, +) -> Dict[str, SpectrumData]: + """ + For splitting of spectra into the several SpectrumRegionLimits, + the names of the regions are taken from SpectrumRegionLimits + and set as attributes to the instance. + """ + + if spec_region_limits is None: + spec_region_limits = get_default_spectrum_region_limits() + spec_regions = {} + for region_name, region in spec_region_limits.items(): + # find indices of region in ramanshift array + ind = (ramanshift >= np.min(region.min)) & (ramanshift <= np.max(region.max)) + region_lbl = f"region_{region_name}" + if label is not None and label not in region_lbl: + region_lbl = f"{label}_{region_lbl}" + _data = { + "ramanshift": ramanshift[ind], + "intensity": intensity[ind], + "label": region_lbl, + "region_name": region_name, + "source": source, + } + spec_regions[region_lbl] = SpectrumData(**_data) + return spec_regions diff --git a/src/raman_fitting/processing/baseline_subtraction.py b/src/raman_fitting/processing/baseline_subtraction.py new file mode 100644 index 0000000..258bf4f --- /dev/null +++ b/src/raman_fitting/processing/baseline_subtraction.py @@ -0,0 +1,65 @@ +import logging + +import numpy as np +from scipy.stats import linregress + +from ..models.splitter import SplitSpectrum +from ..models.spectrum import SpectrumData + +logger = logging.getLogger(__name__) + + +def subtract_baseline_per_region(spec: SpectrumData, split_spectrum: SplitSpectrum): + ramanshift = spec.ramanshift + intensity = spec.intensity + region_name = spec.region_name + label = spec.label + regions_data = split_spectrum.spec_regions + region_limits = split_spectrum.region_limits + selected_intensity = intensity + region_config = region_limits[region_name] + region_name_first_order = list( + filter(lambda x: "first_order" in x, regions_data.keys()) + ) + if ( + any((i in region_name or i in label) for i in ("full", "norm")) + and region_name_first_order + ): + selected_intensity = regions_data[region_name_first_order[0]].intensity + region_config = region_limits["first_order"] + + bl_linear = linregress( + ramanshift[[0, -1]], + [ + np.mean(selected_intensity[0 : region_config.extra_margin]), + np.mean(selected_intensity[-region_config.extra_margin : :]), + ], + ) + i_blcor = intensity - (bl_linear[0] * ramanshift + bl_linear[1]) + return i_blcor, bl_linear + + +def subtract_baseline_from_split_spectrum( + split_spectrum: SplitSpectrum = None, label=None +) -> SplitSpectrum: + _bl_spec_regions = {} + _info = {} + label = "blcorr" if label is None else label + for region_name, spec in split_spectrum.spec_regions.items(): + blcorr_int, blcorr_lin = subtract_baseline_per_region(spec, split_spectrum) + new_label = f"{label}_{spec.label}" if label not in spec.label else spec.label + spec = SpectrumData( + **{ + "ramanshift": spec.ramanshift, + "intensity": blcorr_int, + "label": new_label, + "region_name": region_name, + "source": spec.source, + } + ) + _bl_spec_regions.update(**{region_name: spec}) + _info.update(**{region_name: blcorr_lin}) + bl_corrected_spectra = split_spectrum.model_copy( + update={"spec_regions": _bl_spec_regions, "info": _info} + ) + return bl_corrected_spectra diff --git a/src/raman_fitting/processing/cleaner.py b/src/raman_fitting/processing/cleaner.py deleted file mode 100644 index 261cb3e..0000000 --- a/src/raman_fitting/processing/cleaner.py +++ /dev/null @@ -1,318 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import copy -import re -from venv import logger - -import numpy as np -import pandas as pd -from scipy import signal -from scipy.stats import linregress - -from raman_fitting.processing.spectrum_template import ( - SpecTemplate, - SpectrumWindowLimits, - SpectrumWindows, -) - - -class SpectrumMethodException(ValueError): - pass - - -class SpectrumMethods: - """ - Parent class to hold several Spetrum Methods as children - """ - - data = SpecTemplate - - def __init__(self, ramanshift, intensity, label="", **kwargs): - """ - Parameters - ---------- - ramanshift : array or list - collection of the ramanshift values - intensity : array or list - collection of the intensity values - label : TYPE, optional - DESCRIPTION. The default is "". - **kwargs : TYPE - DESCRIPTION. - - Returns - ------- - None. - - """ - self.ramanshift = ramanshift - self.intensity = intensity - self.label = label - self.kwargs = kwargs - - @staticmethod - def filtered_int(intensity=None): - try: - int_savgol_fltr = signal.savgol_filter(intensity, 13, 3, mode="nearest") - except Exception as e: - raise SpectrumMethodException(f"no intensity given to filter, {e}") - int_savgol_fltr = [] - return int_savgol_fltr - - -class SpectrumSplitter(SpectrumMethods): - """ - For splitting of spectra into the several SpectrumWindows, - the names of the windows are taken from SpectrumWindows - and set as attributes to the instance. - """ - - def __init__(self, *args, **kws): - super().__init__(*args, **kws) - - def split_data(self, spec_windows=SpectrumWindows()): - _r, _int = self.ramanshift, self.intensity - # self.register.get(on_lbl) - _d = {} - for windowname, limits in spec_windows.items(): - ind = (_r >= np.min(limits)) & (_r <= np.max(limits)) - # _intslice = _int[ind] - label = f"window_{windowname}" - if self.label: - label = f"{self.label}_{label}" - _data = self.data(_r[ind], _int[ind], label) - setattr(self, label, _data) - _d.update(**{windowname: _data}) - self.windows_data = _d - - -class BaselineSubtractorNormalizer(SpectrumSplitter): - """ - For baseline subtraction as well as normalization of a spectrum - """ - - def __init__(self, *args, **kws): - super().__init__(*args, **kws) - self.split_data() - self.windowlimits = SpectrumWindowLimits() - blcorr_data, blcorr_info = self.subtract_loop() - self.blcorr_data = blcorr_data - self.blcorr_info = blcorr_info - normalization_intensity = self.get_normalization_factor() - self.norm_factor = 1 / normalization_intensity - self.norm_data = self.normalize_data(self.blcorr_data, self.norm_factor) - - def subtract_loop(self): - _blcorr = {} - _info = {} - for windowname, spec in self.windows_data.items(): - blcorr_int, blcorr_lin = self.subtract_baseline_per_window(windowname, spec) - label = f"blcorr_{windowname}" - if self.label: - label = f"{self.label}_{label}" - _data = self.data(spec.ramanshift, blcorr_int, label) - _blcorr.update(**{windowname: _data}) - _info.update(**{windowname: blcorr_lin}) - return _blcorr, _info - - def subtract_baseline_per_window(self, windowname, spec): - rs = spec.ramanshift - if not rs.any(): - return spec.intensity, (0, 0) - - if windowname[0:4] in ("full", "norm"): - i_fltrd_dspkd_fit = self.windows_data.get("1st_order").intensity - else: - i_fltrd_dspkd_fit = spec.intensity - _limits = self.windowlimits.get(windowname) - - bl_linear = linregress( - rs[[0, -1]], - [ - np.mean(i_fltrd_dspkd_fit[0 : _limits[0]]), - np.mean(i_fltrd_dspkd_fit[_limits[1] : :]), - ], - ) - i_blcor = spec.intensity - (bl_linear[0] * rs + bl_linear[1]) - return i_blcor, bl_linear - - def get_normalization_factor(self, norm_method="simple") -> float: - try: - if norm_method == "simple": - normalization_intensity = np.nanmax( - self.blcorr_data["normalization"].intensity - ) - elif norm_method == "fit": - # IDEA not implemented - normalization = NormalizeFit( - self.blcorr_data["1st_order"], plotprint=False - ) # IDEA still implement this NormalizeFit - normalization_intensity = normalization["IG"] - else: - logger.warning(f"unknown normalization method {norm_method}") - normalization_intensity = 1 - except Exception as exc: - logger.error(f"normalization error {exc}") - normalization_intensity = 1 - - return normalization_intensity - - - def normalize_data(self, data, norm_factor) -> dict: - ret = {} - for windowname, spec in data.items(): - label = f"norm_blcorr_{windowname}" - if self.label: - label = f"{self.label}_{label}" - - _data = self.data(spec.ramanshift, spec.intensity * self.norm_factor, label) - ret.update(**{windowname: _data}) - return ret - -def NormalizeFit(spec, plotprint=False): - pass # IDEA placeholder - - -def array_nan_checker(array): - _nans = [n for n, i in enumerate(array) if np.isnan(i)] - return _nans - - -class Despiker(SpectrumMethods): - """ - A Despiking algorithm from reference literature: https://doi.org/10.1016/j.chemolab.2018.06.009 - - Parameters - ---------- - input_intensity : np.ndarray - The intensity array of which the desipked intensity will be calculated. - info : dict, optional - Extra information for despiking settings are added to this dict. - Attributes - --------- - despiked_intensity : np.ndarray - The resulting array of the despiked intensity of same length as input_intensity. - Notes - -------- - Let Y1;...;Yn represent the values of a single Raman spectrum recorded at equally spaced wavenumbers. - From this series, form the detrended differenced seriesr Yt ...:This simple - ata processing step has the effect of annihilating linear and slow movingcurve linear trends, however, - sharp localised spikes will be preserved.Denote the median and the median absolute deviation of - D.A. Whitaker, K. Hayes. Chemometrics and Intelligent Laboratory Systems 179 (2018) 82–84 - """ - - keys = ["input_intensity", "Zt", "Z_t_filtered", "despiked_intensity"] - - def __init__( - self, intensity: np.array([]), Z_threshold=4, moving_window_size=1, info={} - ): - self.info = info - self.Z_threshold = Z_threshold - self.moving_window_size = moving_window_size - - self.info.update( - {"Z_threshold": Z_threshold, "Z_filter_ma": moving_window_size} - ) - - # these get populated by the run_despike call in the setter - self.result = {} - self.df = pd.DataFrame() - - # setter calls to run_despike - self._int = intensity - - # _new_int = copy.deepcopy(intensity) - self.input_intensity = intensity - - @property - def input_intensity(self): - return self._input_intensity - - @input_intensity.setter - def input_intensity(self, value): - """sanitizes the input argument value for an array""" - - type_test = str(type(value)) - if "__main__" in type_test: - if "intensity" in value._fields: - val_intensity = value.intensity - elif "numpy.ndarray" in type_test: - val_intensity = value - elif "dict" in type_test: - val_intensity = value.get([i for i in value.keys() if "intensity" in i][0]) - else: - raise ValueError(f"Despike input error {type_test} for {value}") - - self.info.update({"input_type": type_test}) - self._input_intensity = val_intensity - self.despiked_intensity = val_intensity - - @property - def despiked_intensity(self): - return self._despiked_intensity - - @despiked_intensity.setter - def despiked_intensity(self, value): - result = self.run_despike_steps(value, self.Z_threshold) - - self._despiked_intensity = result["despiked_intensity"] - - self.result = result - self.df = pd.DataFrame(result) - - def run_despike_steps(self, intensity, Z_threshold): - Z_t = self.calc_Z(intensity) - Z_t_filtered = self.calc_Z_filtered(Z_t, Z_threshold) - i_despiked = self.despike_filter( - intensity, Z_t_filtered, self.moving_window_size - ) - - result_values = [intensity, Z_t, Z_t_filtered, i_despiked] - result = dict(zip(self.keys, result_values)) - return result - - @staticmethod - def calc_Z(intensity): - dYt = np.append(np.diff(intensity), 0) - # dYt = intensity.diff() - dYt_Median = np.median(dYt) - # M = dYt.median() - # dYt_M = dYt-M - dYt_MAD = np.median(abs(dYt - dYt_Median)) - # MAD = np.mad(dYt) - Z_t = (0.6745 * (dYt - dYt_Median)) / dYt_MAD - # intensity = blcor.assign(**{'abs_Z_t': Z_t.abs()}) - return Z_t - - @staticmethod - def calc_Z_filtered(Z_t, Z_threshold): - Z_t_filtered = copy.deepcopy(Z_t) - Z_t_filtered[np.abs(Z_t) > Z_threshold] = np.nan - Z_t_filtered[0] = Z_t_filtered[-1] = 0 - return Z_t_filtered - @staticmethod - def despike_filter( - intensity, Z_t_filtered, moving_window_size, ignore_lims=(20, 46) - ): - n = len(intensity) - i_despiked = copy.deepcopy(intensity) - spikes = np.where(np.isnan(Z_t_filtered)) - for i in list(spikes[0]): - if i < ignore_lims[0] or i > ignore_lims[1]: - w = np.arange( - max(0, i - moving_window_size), min(n, i + moving_window_size) - ) - w = w[~np.isnan(Z_t_filtered[w])] - if intensity[w].any(): - i_despiked[i] = np.mean(intensity[w]) - else: - i_despiked[i] = intensity[i] - return i_despiked - - def plot_Z(self): - # fig,ax = plt.subplots(2) - self.df.plot(y=["Zt", "Z_t_filtered"], alpha=0.5) - self.df.plot(y=["input_intensity", "despiked_intensity"], alpha=0.8) - # plt.show() - # plt.close() diff --git a/src/raman_fitting/processing/despike.py b/src/raman_fitting/processing/despike.py new file mode 100644 index 0000000..e563bf1 --- /dev/null +++ b/src/raman_fitting/processing/despike.py @@ -0,0 +1,134 @@ +""" +Created on Mon May 3 11:10:59 2021 + +@author: dw +""" + +from typing import Dict, Tuple, Any, Optional +import copy +import logging + +import numpy as np + +from pydantic import BaseModel, Field, model_validator + +from raman_fitting.models.spectrum import SpectrumData + +logger = logging.getLogger(__name__) + + +class SpectrumDespiker(BaseModel): + spectrum: Optional[SpectrumData] = None + threshold_z_value: int = 4 + moving_region_size: int = 1 + ignore_lims: Tuple[int, int] = (20, 46) + info: Dict = Field(default_factory=dict) + processed_spectrum: SpectrumData = Field(None) + + @model_validator(mode="after") + def process_spectrum(self) -> "SpectrumDespiker": + if self.spectrum is None: + raise ValueError("SpectrumDespiker, spectrum is None") + despiked_intensity, result_info = self.call_despike_spectrum( + self.spectrum.intensity + ) + despiked_spec = self.spectrum.model_copy( + update={"intensity": despiked_intensity}, deep=True + ) + SpectrumData.model_validate(despiked_spec, from_attributes=True) + self.processed_spectrum = despiked_spec + self.info.update(**result_info) + return self + + def process_intensity(self, intensity: np.ndarray) -> np.ndarray: + despiked_intensity, _ = self.call_despike_spectrum(intensity) + return despiked_intensity + + def call_despike_spectrum(self, intensity: np.ndarray) -> Tuple[np.ndarray, Dict]: + despiked_intensity, result_info = despike_spectrum( + intensity, + self.threshold_z_value, + self.moving_region_size, + ignore_lims=self.ignore_lims, + ) + return despiked_intensity, result_info + + +def despike_spectrum( + intensity: np.ndarray, + threshold_z_value: int, + moving_region_size: int, + ignore_lims=(20, 46), +) -> Tuple[np.ndarray, Dict[str, Any]]: + """ + A Despiking algorithm from reference literature: + https://doi.org/10.1016/j.chemolab.2018.06.009 + + Parameters + ---------- + input_intensity : np.ndarray + The intensity array of which the desipked intensity will be calculated. + info : dict, optional + Extra information for despiking settings are added to this dict. + Attributes + --------- + despiked_intensity : np.ndarray + The resulting array of the despiked intensity of same length as input_intensity. + Notes + -------- + Let Y1;...;Yn represent the values of a single Raman spectrum recorded at + equally spaced wavenumbers. + From this series, form the detrended differenced seriesr Yt ...:This simple + data processing step has the effect of annihilating linear and slow movingcurve + linear trends, however, + sharp localised spikes will be preserved.Denote the median and the median absolute + deviation of + D.A. Whitaker, K. Hayes. Chemometrics and Intelligent Laboratory Systems 179 (2018) 82–84 + """ + + z_intensity = calc_z_value_intensity(intensity) + filtered_z_intensity = filter_z_intensity_values(z_intensity, threshold_z_value) + i_despiked = despike_filter( + intensity, filtered_z_intensity, moving_region_size, ignore_lims=ignore_lims + ) + result = {"z_intensity": z_intensity, "filtered_z_intensity": filtered_z_intensity} + return i_despiked, result + + +def calc_z_value_intensity(intensity: np.ndarray) -> np.ndarray: + diff_intensity = np.append(np.diff(intensity), 0) # dYt + median_diff_intensity = np.median(diff_intensity) # dYt_Median + median_abs_deviation = np.median(abs(diff_intensity - median_diff_intensity)) + intensity_values_z = ( + 0.6745 * (diff_intensity - median_diff_intensity) + ) / median_abs_deviation + return intensity_values_z + + +def filter_z_intensity_values(z_intensity, z_intensityhreshold): + filtered_z_intensity = copy.deepcopy(z_intensity) + filtered_z_intensity[np.abs(z_intensity) > z_intensityhreshold] = np.nan + filtered_z_intensity[0] = filtered_z_intensity[-1] = 0 + return filtered_z_intensity + + +def despike_filter( + intensity: np.ndarray, + filtered_z_intensity: np.ndarray, + moving_region_size: int, + ignore_lims=(20, 46), +): + n = len(intensity) + i_despiked = copy.deepcopy(intensity) + spikes = np.nonzero(np.isnan(filtered_z_intensity)) + for i in list(spikes[0]): + if i < ignore_lims[0] or i > ignore_lims[1]: + w = np.arange( + max(0, i - moving_region_size), min(n, i + moving_region_size) + ) + w = w[~np.isnan(filtered_z_intensity[w])] + if intensity[w].any(): + i_despiked[i] = np.mean(intensity[w]) + else: + i_despiked[i] = intensity[i] + return i_despiked diff --git a/src/raman_fitting/processing/filter.py b/src/raman_fitting/processing/filter.py new file mode 100644 index 0000000..041f3b7 --- /dev/null +++ b/src/raman_fitting/processing/filter.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from dataclasses import dataclass +from typing import Callable, Protocol, Tuple, Dict +import numpy as np +from scipy import signal + +from raman_fitting.models.spectrum import SpectrumData + + +class IntensityProcessor(Protocol): + def process_intensity(self, intensity: np.ndarray) -> np.ndarray: ... + + +@dataclass +class IntensityFilter: + name: str + filter_func: Callable + filter_args: Tuple + filter_kwargs: Dict + + def process_intensity(self, intensity: np.ndarray) -> np.ndarray: + if intensity is None: + raise ValueError("no intensity given to filter") + filtered_intensity = self.filter_func( + intensity, *self.filter_args, **self.filter_kwargs + ) + return filtered_intensity + + +available_filters = { + "savgol_filter": IntensityFilter( + "savgol_filter", + signal.savgol_filter, + filter_args=(13, 3), + filter_kwargs=dict(mode="nearest"), + ) +} + + +def filter_spectrum( + spectrum: SpectrumData = None, filter_name="savgol_filter" +) -> SpectrumData: + if filter_name not in available_filters: + raise ValueError(f"Chosen filter {filter_name} not available.") + + filter_class = available_filters[filter_name] + filtered_intensity = filter_class.process_intensity(spectrum.intensity) + label = f"{filter_name}_{spectrum.label}" + filtered_spectrum = spectrum.model_copy( + update={"intensity": filtered_intensity, "label": label} + ) + return filtered_spectrum + + +""" +Parameters +---------- +ramanshift : array or list + collection of the ramanshift values +intensity : array or list + collection of the intensity values +label : TYPE, optional + DESCRIPTION. The default is "". +**kwargs : TYPE + DESCRIPTION. + +Returns +------- +None. +""" diff --git a/src/raman_fitting/processing/normalization.py b/src/raman_fitting/processing/normalization.py new file mode 100644 index 0000000..9e2ab17 --- /dev/null +++ b/src/raman_fitting/processing/normalization.py @@ -0,0 +1,85 @@ +from typing import Optional + +import numpy as np + +from ..models.splitter import SplitSpectrum +from ..models.spectrum import SpectrumData +from ..models.fit_models import SpectrumFitModel, LMFitModel + +from loguru import logger + + +def get_simple_normalization_intensity(split_spectrum: SplitSpectrum) -> float: + norm_spec = split_spectrum.get_region("normalization") + normalization_intensity = np.nanmax(norm_spec.intensity) + return normalization_intensity + + +def get_normalization_factor( + split_spectrum: SplitSpectrum, + norm_method="simple", + normalization_model: LMFitModel = None, +) -> float: + simple_norm = get_simple_normalization_intensity(split_spectrum) + normalization_intensity = simple_norm + + if "fit" in norm_method and normalization_model is not None: + fit_norm = normalizer_fit_model( + split_spectrum, normalization_model=normalization_model + ) + if fit_norm is not None: + normalization_intensity = fit_norm + norm_factor = 1 / normalization_intensity + + return norm_factor + + +def normalize_regions_in_split_spectrum( + split_spectrum: SplitSpectrum, norm_factor: float, label: Optional[str] = None +) -> SplitSpectrum: + norm_spec_regions = {} + norm_infos = {} + label = split_spectrum.spectrum.label if label is None else label + for region_name, spec in split_spectrum.spec_regions.items(): + norm_label = f"{region_name}_{label}" if region_name not in label else label + norm_label = f"norm_{norm_label}" if "norm" not in norm_label else norm_label + # label looks like "norm_regionname_label" + _data = SpectrumData( + **{ + "ramanshift": spec.ramanshift, + "intensity": spec.intensity * norm_factor, + "label": norm_label, + "region_name": region_name, + "source": spec.source, + } + ) + norm_spec_regions.update(**{region_name: _data}) + norm_infos.update(**{region_name: {"normalization_factor": norm_factor}}) + norm_spectra = split_spectrum.model_copy( + update={"spec_regions": norm_spec_regions, "info": norm_infos} + ) + return norm_spectra + + +def normalize_split_spectrum( + split_spectrum: SplitSpectrum = None, +) -> SplitSpectrum: + "Normalize the spectrum intensity according to normalization method." + normalization_factor = get_normalization_factor(split_spectrum) + norm_data = normalize_regions_in_split_spectrum( + split_spectrum, normalization_factor + ) + return norm_data + + +def normalizer_fit_model( + specrum: SpectrumData, normalization_model: LMFitModel +) -> float | None: + spec_fit = SpectrumFitModel(spectrum=specrum, model=normalization_model) + spec_fit.run_fit() + if not spec_fit.fit_result: + return + try: + return spec_fit.fit_result.params["G_height"].value + except KeyError as e: + logger.error(e) diff --git a/src/raman_fitting/processing/post_processing.py b/src/raman_fitting/processing/post_processing.py new file mode 100644 index 0000000..c0570a5 --- /dev/null +++ b/src/raman_fitting/processing/post_processing.py @@ -0,0 +1,52 @@ +from dataclasses import dataclass +from typing import Protocol + +from raman_fitting.models.spectrum import SpectrumData + +from .baseline_subtraction import subtract_baseline_from_split_spectrum +from .filter import filter_spectrum +from .despike import SpectrumDespiker +from ..models.splitter import SplitSpectrum +from .normalization import normalize_split_spectrum + + +class PreProcessor(Protocol): + def process_spectrum(self, spectrum: SpectrumData = None): ... + + +class PostProcessor(Protocol): + def process_spectrum(self, split_spectrum: SplitSpectrum = None): ... + + +@dataclass +class SpectrumProcessor: + spectrum: SpectrumData + processed: bool = False + clean_spectrum: SplitSpectrum | None = None + + def __post_init__(self): + processed_spectrum = self.process_spectrum() + self.clean_spectrum = processed_spectrum + self.processed = True + + def process_spectrum(self) -> SplitSpectrum: + pre_processed_spectrum = self.pre_process_intensity(spectrum=self.spectrum) + post_processed_spectra = self.post_process_spectrum( + spectrum=pre_processed_spectrum + ) + return post_processed_spectra + + def pre_process_intensity(self, spectrum: SpectrumData = None) -> SpectrumData: + filtered_spectrum = filter_spectrum(spectrum=spectrum) + despiker = SpectrumDespiker(spectrum=filtered_spectrum) + return despiker.processed_spectrum + + def post_process_spectrum(self, spectrum: SpectrumData = None) -> SplitSpectrum: + split_spectrum = SplitSpectrum(spectrum=spectrum) + baseline_subtracted = subtract_baseline_from_split_spectrum( + split_spectrum=split_spectrum + ) + normalized_spectra = normalize_split_spectrum( + split_spectrum=baseline_subtracted + ) + return normalized_spectra diff --git a/src/raman_fitting/processing/spectrum_constructor.py b/src/raman_fitting/processing/spectrum_constructor.py deleted file mode 100644 index 66c349a..0000000 --- a/src/raman_fitting/processing/spectrum_constructor.py +++ /dev/null @@ -1,325 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Mon May 3 11:10:59 2021 - -@author: dw -""" - -import copy -import logging -from collections import namedtuple -from dataclasses import dataclass, field -from operator import itemgetter -from pathlib import Path -from typing import Dict, List - -import numpy as np -import pandas as pd - - -from raman_fitting.indexing.filedata_parser import SpectrumReader -from raman_fitting.processing.cleaner import ( - BaselineSubtractorNormalizer, - Despiker, - SpectrumMethods, -) -from raman_fitting.processing.spectrum_template import SpecTemplate, SpectrumWindows - -logger = logging.getLogger(__name__) - - -@dataclass(order=True, frozen=False) -class SpectrumDataLoader: - """ - Raman Spectrum Loader Dataclass, reads in the file and constructs a clean spectrum from the data. - A sequence of steps is performed on the raw data from SpectrumReader. - The steps/methods are: smoothening filter, despiking and baseline correction. - """ - - _fields = ("ramanshift", "intensity") - file: Path = field(default=Path(Path.cwd().joinpath("empty.txt"))) - spectrum_length: int = field(default=0, init=False) - info: Dict = field(default_factory=dict, repr=False) - ovv: pd.DataFrame = field(default_factory=pd.DataFrame, repr=False) - run_kwargs: Dict = field(default_factory=dict, repr=False) - post_process_methods = [ - ("filter_data", "raw", "filtered"), - ("despike", "filtered", "despiked"), - ("baseline_correction", "despiked", "clean_data"), - ] - - def __post_init__(self): - self._qcnm = self.__class__.__qualname__ - self.register = {} # this stores the data of each method as they are performed - self.filtered_intensity = None - self._despike = None - self._baseline_corrected = None - self.clean_data = None - self.clean_df = {} # dict of DataFrames - self.register_df = pd.DataFrame() - self._read_succes = False - - self.load_data_delegator() - - def register_spectrum(self, ramanshift, intensity, label): - _spec = SpecTemplate(ramanshift, copy.deepcopy(intensity), label) - self.register.update({label: _spec}) - - def __getattr__(self, attr): - """checks if attr is in instance dicts before raising error""" - if attr in self.run_kwargs.keys(): - return self.run_kwargs.get(attr, None) - elif attr in self.info.keys(): - return self.info.get(attr, None) - else: - raise AttributeError( - f'Attribute "{attr}" is not in {self.run_kwargs.keys()} or {self.info.keys()}.' - ) - - def load_data_delegator(self): - """calls the SpectrumReader class""" - - if self.info: - FP_from_info = self.info.get("FilePath", None) - if FP_from_info: - if Path(FP_from_info) != self.file: - raise ValueError( - f"Mismatch in value for FilePath: {self.file} != {FP_from_info}" - ) - else: - self.info = {"FilePath": self.file} - raw_spectrum = SpectrumReader(self.file) - self._raw_spectrum = raw_spectrum - self.register_spectrum(raw_spectrum.ramanshift, raw_spectrum.intensity, "raw") - if raw_spectrum.spectrum_length > 0: - self.spectrum_length = raw_spectrum.spectrum_length - self.spectrum_methods_delegator() - else: - logger.error(f"{self._qcnm} load data fail for:\n\t {self.file}") - self.info = {**self.info, **self.run_kwargs} - - def spectrum_methods_delegator(self): - for method, on_lbl, out_lbl in self.post_process_methods: - try: - getattr(self, method)(on_lbl=on_lbl, out_lbl=out_lbl) - except Exception as exc: - logger.error(f"{self._qcnm} {method} failed for {self.file} with {exc}") - - self.set_clean_data_df() - self.set_df_from_register() - - def filter_data(self, on_lbl="raw", out_lbl="filtered"): - _r, _int, _lbl = self.register.get(on_lbl) - logger.debug(f"{self.file} try to filter len int({len(_int)}),({type(_int)})") - filtered_intensity = SpectrumMethods.filtered_int(intensity=_int) - self.filtered_intensity = filtered_intensity - self.register_spectrum(_r, filtered_intensity, out_lbl) - - def despike(self, on_lbl="filtered", out_lbl="despiked"): - _r, _int, _lbl = self.register.get(on_lbl) - _despike = Despiker(_int) # IDEA check for nan in array - self._despike = _despike - self.register_spectrum(_r, _despike.despiked_intensity, out_lbl) - - def baseline_correction(self, on_lbl="despiked", out_lbl="clean_data"): - _r, _int, _lbl = self.register.get(on_lbl) - _baseline_corrected = BaselineSubtractorNormalizer(_r, _int, label="despiked") - self._baseline_corrected = _baseline_corrected - - _fullspec = _baseline_corrected.norm_data["full"] - self.register_spectrum(_fullspec.ramanshift, _fullspec.intensity, out_lbl) - self.clean_data = _baseline_corrected.norm_data - - def set_clean_data_df(self): - self.clean_df = { - k: pd.DataFrame( - {"ramanshift": val.ramanshift, f"int_{self.SamplePos}": val.intensity} - ) - for k, val in self.clean_data.items() - } - - def set_df_from_register(self): - _regdf = pd.DataFrame() - for k, val in self.register.items(): - _spec = pd.DataFrame( - { - "ramanshift": val.ramanshift, - f"{k}_int_{self.SampleID}_{self.SamplePos}": val.intensity, - } - ) - if _regdf.empty: - _regdf = _spec - else: - _regdf = pd.merge_asof(_regdf, _spec, on="ramanshift") - self.register_df = _regdf - logger.debug( - f"{self._qcnm} set_df_from_register len int({len(_regdf)}),({type(_regdf)})" - ) - - def plot_raw(self): - _raw_lbls = [ - i - for i in self.register_df.columns - if not any(a in i for a in ["ramanshift", "clean_data"]) - ] - self.register_df.plot(x="ramanshift", y=_raw_lbls) - - def split_data(self, on_lbl="filtered"): - _r, _int, _lbl = self.register.get(on_lbl) # unpacking data from register - for windowname, limits in SpectrumWindows().items(): - ind = (_r >= np.min(limits)) & (_r <= np.max(limits)) - _intslice = _int[ind] - label = f"{_lbl}_window_{windowname}" - self.register_spectrum(_r, _intslice, label) - - -class SpectrumDataCollection: - """ - This class takes in a collection of SpectrumDataLoader instances. - It checks each member of the list and this enables the option - to take the mean of several spectra from the same SampleID. - """ - - MeanSpecTemplate = namedtuple( - "MeanSpectras", "windowname sID_rawcols sIDmean_col mean_info mean_spec" - ) - - def __init__(self, spectra: List = [SpectrumDataLoader]): - self._qcnm = self.__class__.__qualname__ - self._spectra = spectra - Validators.check_members( - self._spectra - ) # only raises warning when errors are found - self.spectra = Validators.check_spectra_lengths(self._spectra) - - self.info = self.get_mean_spectra_info(self.spectra) - self.info_df = pd.DataFrame(self.info, index=[0]) - self.prep_clean_data = self.get_mean_spectra_prep_data(self.spectra) - - self.calc_mean() - - @staticmethod - def get_mean_spectra_info(spectra: List[SpectrumDataLoader]) -> Dict: - """retrieves the info dict from spec instances and merges dict in keys that have 1 common value""" - - try: - _all_spec_info = [spec.info for spec in spectra if hasattr(spec, "info")] - - _all_spec_info_merged = { - k: val for i in _all_spec_info for k, val in i.items() - } - - _all_spec_info_sets = [ - (k, set([i.get(k, None) for i in _all_spec_info])) - for k in _all_spec_info_merged - ] - - mean_spec_info = { - k: list(val)[0] for k, val in _all_spec_info_sets if len(val) == 1 - } - except Exception as exc: - logger.warning(f"get_mean_spectra_info failed for spectra {spectra}") - mean_spec_info = {} - - mean_spec_info.update({"mean_spectrum": True}) - - return mean_spec_info - - @staticmethod - def get_mean_spectra_prep_data(spectra: List[SpectrumDataLoader]) -> Dict: - """retrieves the clean data from spec instances and makes lists of tuples""" - # and merges dict in keys that have 1 common value''' - try: - _all_spec = [ - spec - for spec in spectra - if hasattr(spec, "clean_data") and hasattr(spec, "SamplePos") - ] - - _all_spec_clean_data_keys = { - k for i in _all_spec for k in i.clean_data.keys() - } - - clean_prep_data = { - k: [(i.SamplePos, i.clean_data.get(k, None)) for i in _all_spec] - for k in _all_spec_clean_data_keys - } - except Exception as exc: - logger.warning(f"get_mean_spectra_prep_data failed for spectra {spectra}") - clean_prep_data = {} - - return clean_prep_data - - def calc_mean(self): - """Core function of the merging of spectra of different sample positions""" - _merged_window_specs = {} - _speclst = [] - _posmean_lbl_base = f'int_{self.info.get("SampleID")}_mean' - for wndwnm, data in self.prep_clean_data.items(): - _merge_df = pd.DataFrame() - _pos_lbl_lst = [] - - for _pos, _sp in data: - _pos_lbl = f"int_{_pos}" - - _dfspec = pd.DataFrame( - {"ramanshift": _sp.ramanshift, _pos_lbl: _sp.intensity} - ) - - if _merge_df.empty: - _merge_df = _dfspec - else: - _merge_df = pd.merge_asof(_merge_df, _dfspec, on="ramanshift") - _pos_lbl_lst.append(_pos_lbl) - - _posmean_lbl = f"{_posmean_lbl_base}_{len(_pos_lbl_lst)}" - _merge_df = _merge_df.assign( - **{_posmean_lbl: _merge_df[_pos_lbl_lst].mean(axis=1)} - ) - _merged_window_specs.update({wndwnm: _merge_df}) - - _old_spec = self.MeanSpecTemplate( - wndwnm, _pos_lbl_lst, _posmean_lbl, self.info_df, _merge_df - ) - _speclst.append(_old_spec) - - self.fitting_spectra = _speclst - self.mean_data = _merged_window_specs - - def __repr__(self): - return f"{self.info}" - - -class Validators: - """collection of validator for spectrum object""" - - @staticmethod - def check_members(spectra: List[SpectrumDataLoader]): - """checks member of lists""" - _false_spectra = [ - spec - for spec in spectra - if type(spec) != SpectrumDataLoader or not hasattr(spec, "clean_data") - ] - if _false_spectra: - logger.warning( - f'_check_members not all spectra members are "SpectrumDataLoader" or missing clean_data attribute' - ) - - @staticmethod - def check_spectra_lengths(spectra: List[SpectrumDataLoader]) -> List: - lengths = [i.spectrum_length for i in spectra] - set_lengths = set(lengths) - if len(set_lengths) == 1: - # print(f'Spectra all same length {set_lengths}') - pass - else: - length_counts = [(i, lengths.count(i)) for i in set_lengths] - best_guess_length = max(length_counts, key=itemgetter(1))[0] - print(f"Spectra not same length {length_counts} took {best_guess_length}") - # self._raw_spectra = self._spectra - spectra = [ - spec for spec in spectra if spec.spectrum_length == best_guess_length - ] - return spectra diff --git a/src/raman_fitting/processing/spectrum_template.py b/src/raman_fitting/processing/spectrum_template.py deleted file mode 100644 index 95b2ce0..0000000 --- a/src/raman_fitting/processing/spectrum_template.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -from collections import namedtuple - - -def SpectrumWindows(): - windows = { - "full": (200, 3600), - "full_1st_2nd": (800, 3500), - "low": (150, 850), - "1st_order": (900, 2000), - "mid": (1850, 2150), - "2nd_order": (2150, 3380), - "normalization": (1500, 1675), - } - return windows - - -def SpectrumWindowLimits(): - windows = { - "full": (20, -20), - "full_1st_2nd": (20, -20), - "low": (10, -10), - "1st_order": (20, -20), - "mid": (10, -10), - "2nd_order": (20, -20), - "normalization": (10, -10), - } - return windows - - -SpecTemplate = namedtuple("Spectrum", "ramanshift intensity label") - - -class SpectrumTemplate: - def __init__(self, spec_name="spectrum_info"): - self.spec_name = spec_name - self.grp_names() - self.set_template() - - def grp_names(self): - sGrp_cols = ("SampleGroup", "SampleID", "FileCreationDate") - sPos_cols = ("FileStem", "SamplePos", "FilePath") - spectrum_cols = ("ramanshift", "intensity_raw", "intensity") - spectrum_info_cols = ("spectrum_length",) - export_info_cols = ( - "DestGrpDir", - "DestFittingPlots", - "DestFittingComps", - "DestRaw", - ) - info_cols = ( - sGrp_cols - + sPos_cols - + spectrum_cols - + spectrum_info_cols - + export_info_cols - ) - names = { - "sGrp_cols": sGrp_cols, - "sPos_cols": sPos_cols, - "spectrum_cols": spectrum_cols, - "spectrum_info_cols": spectrum_info_cols, - "export_info_cols": export_info_cols, - "all": info_cols, - } - Names = namedtuple("GrpNames", names.keys()) - self.grp_names = Names(**names) - - def set_template(self): - self.template = namedtuple(self.spec_name, self.grp_names.all) diff --git a/src/raman_fitting/types.py b/src/raman_fitting/types.py new file mode 100644 index 0000000..e077616 --- /dev/null +++ b/src/raman_fitting/types.py @@ -0,0 +1,7 @@ +from typing import TypeAlias, Dict + +from raman_fitting.models.deconvolution.base_model import BaseLMFitModel +from raman_fitting.models.fit_models import SpectrumFitModel + +LMFitModelCollection: TypeAlias = Dict[str, Dict[str, BaseLMFitModel]] +SpectrumFitModelCollection: TypeAlias = Dict[str, Dict[str, SpectrumFitModel]] diff --git a/src/raman_fitting/utils/coordinators.py b/src/raman_fitting/utils/coordinators.py deleted file mode 100644 index b1a076d..0000000 --- a/src/raman_fitting/utils/coordinators.py +++ /dev/null @@ -1,140 +0,0 @@ -import logging -from collections import OrderedDict -from warnings import warn - - -logger = logging.getLogger(__name__) - -logging.captureWarnings(True) # sends these warning to the logger - - -class FieldsTrackerWarning(UserWarning): - pass - - -class FieldsTracker: - """ - Keeps check of the fields from multiple sources, - allows to store values in dict - yields results a single results from several sources for each field - status is True when all fields in results have at least one value - """ - - def __init__(self, fields: list = [], sources: tuple = [], **kwargs): - self.fields = fields - self.sources = sources - self._register_template = self.make_register(sources, fields) - self.set_sources_attr() - self._results = {} - - def make_register(self, sources, fields): - _reg = {source: {field: None for field in fields} for source in sources} - return _reg - - def set_sources_attr(self): - for source in self.sources: - setattr(self, f"{source}", self._register_template[source]) - - @property - def register(self): - _reg = {source: getattr(self, source) for source in self.sources} - return _reg - - @property - def status(self): - _st = False - if set(self.results) == set(self.fields): - _st = True - return _st - - @property - def results(self): - return self._results - - def _set_results(self): - _results = self.get_values_from_all_fields() - self._results = _results - - @property - def missing(self): - results = self.results - _missing = set(self.fields) - set(results.keys()) - return _missing - - def get_values_from_all_fields(self): - _result_values = {} - for field in self.fields: - _fvaldict_sources = self.get_field_value_from_sources(field) - if _fvaldict_sources: - _src = {"source": i for i in _fvaldict_sources.keys()} - _value = {"value": i for i in _fvaldict_sources.values()} - _nice_result = {**_src, **_value} - _result_values.update({field: _nice_result}) - return _result_values - - def get_field_value_from_sources(self, field): - _fsvals = OrderedDict({}) - _result = {} - for source in self.sources: - _src = getattr(self, source) - _fval = _src.get(field, None) - if _fval: - _fsvals.update({source: _fval}) - _setvals = _fsvals.values() - _setsources = set(_fsvals.keys()) - _lstsources = list(_setsources) - if len(_setvals) == 1: - _fval = list(_setvals)[0] - if len(_setsources) == 1: - _src = _lstsources[0] - elif len(_setsources) > 1: - _src = list(_fsvals.keys())[0] - warn( - f"Field {field} has multiple sources {_setsources}, one value ", - FieldsTrackerWarning, - ) - _result = {_src: _fval} - elif len(_setvals) > 1: - _firstval = list(_fsvals.items())[0] - warn( - f"Field {field} has multiple sources {_setsources}, different values follow order of sources ", - FieldsTrackerWarning, - ) - _result = {_firstval[0]: _firstval[1]} - return _result - - def multi_store(self, source: str, **kwargs): - _fields_kwargs = {k: val for k, val in kwargs.items() if k in self.fields} - if _fields_kwargs: - for field, val in _fields_kwargs.items(): - self.store(source, field, val) - self._set_results() - - def store(self, source, field, val): - """store one value: source, field, val""" - if source in self.sources and field in self.fields and val: - _src = getattr(self, source) - _fval = _src.get(field, None) - if not _fval: - _src[field] = val - elif _fval == val: - warn( - f"Redefinition of {field} in {source} ignored", - FieldsTrackerWarning, - ) - elif _fval != val: - _src[field] = val - warn( - f"Overwriting of {field} in {source} with new value! {_fval} is not {val}", - FieldsTrackerWarning, - ) - else: - warn(f"Store {source} {val} unexpected", FieldsTrackerWarning) - - setattr(self, source, _src) - self._set_results() - else: - warn( - f"Store in {source} at {field} not in {self.sources} or not in {self.fields} or not {val}, ignored.", - FieldsTrackerWarning, - ) diff --git a/src/raman_fitting/utils/decorators.py b/src/raman_fitting/utils/decorators.py new file mode 100644 index 0000000..8ab7d8f --- /dev/null +++ b/src/raman_fitting/utils/decorators.py @@ -0,0 +1,62 @@ +from typing import Callable +from functools import wraps, partial +from inspect import signature + + +def decorator_with_kwargs(decorator: Callable) -> Callable: + """ + Source: https://gist.github.com/ramonrosa/402af55633e9b6c273882ac074760426 + Decorator factory to give decorated decorators the skill to receive + optional keyword arguments. + If a decorator "some_decorator" is decorated with this function: + @decorator_with_kwargs + def some_decorator(decorated_function, kwarg1=1, kwarg2=2): + def wrapper(*decorated_function_args, **decorated_function_kwargs): + '''Modifies the behavior of decorated_function according + to the value of kwarg1 and kwarg2''' + ... + return wrapper + It will be usable in the following ways: + @some_decorator + def func(x): + ... + @some_decorator() + def func(x): + ... + @some_decorator(kwarg1=3) # or other combinations of kwargs + def func(x, y): + ... + :param decorator: decorator to be given optional kwargs-handling skills + :type decorator: Callable + :raises TypeError: if the decorator does not receive a single Callable or + keyword arguments + :raises TypeError: if the signature of the decorated decorator does not + conform to: Callable, **keyword_arguments + :return: modified decorator + :rtype: Callable + """ + + @wraps(decorator) + def decorator_wrapper(*args, **kwargs): + if (len(kwargs) == 0) and (len(args) == 1) and callable(args[0]): + return decorator(args[0]) + if len(args) == 0: + return partial(decorator, **kwargs) + raise TypeError( + f"{decorator.__name__} expects either a single Callable " + "or keyword arguments" + ) + + signature_values = signature(decorator).parameters.values() + signature_args = [ + param.name for param in signature_values if param.default == param.empty + ] + + if len(signature_args) != 1: + raise TypeError( + f"{decorator.__name__} signature should be of the form:\n" + f"{decorator.__name__}(function: typing.Callable, " + "kwarg_1=default_1, kwarg_2=default_2, ...) -> Callable" + ) + + return decorator_wrapper diff --git a/src/raman_fitting/utils/exceptions.py b/src/raman_fitting/utils/exceptions.py deleted file mode 100644 index cc7d411..0000000 --- a/src/raman_fitting/utils/exceptions.py +++ /dev/null @@ -1,6 +0,0 @@ -class prdError(Exception): - """Base error raised by pyramdeconv.""" - - -class MainDelegatorError(prdError): - """Raised when a method in the main delegator fails.""" diff --git a/src/raman_fitting/utils/string_operations.py b/src/raman_fitting/utils/string_operations.py new file mode 100644 index 0000000..c4fcea1 --- /dev/null +++ b/src/raman_fitting/utils/string_operations.py @@ -0,0 +1,26 @@ +from lmfit.parameter import Parameter + + +def join_prefix_suffix(prefix: str, suffix: str) -> str: + prefix_ = prefix.rstrip("_") + suffix_ = suffix.lstrip("_") + if suffix_ in prefix: + return prefix_ + return f"{prefix_}_{suffix_}" + + +def prepare_text_from_param(param: Parameter) -> str: + text = "" + if not param: + return text + _ptext = "" + _val = param.value + _min = param.min + if _min != _val: + _ptext += f"{_min} < " + _ptext += f"{_val}" + _max = param.max + if _max != _val: + _ptext += f" > {_max}" + text += f", center : {_ptext}" + return text diff --git a/tests/conftest.py b/tests/conftest.py index 23ac8b3..9f95487 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,22 +1,48 @@ -# flake8: noqa -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- """ Configuration file for pytest and commonly used fixtures """ -import pprint -import sys -import pathlib -# Need this for local editable install pytest run to work -# This pythonpath = "src" should have fixed it. -if "src" not in sys.path: - sys.path.append("src") - print("added src to sys.path") - pprint.pprint(sys.path) - pprint.pprint(pathlib.Path.cwd()) +import pytest +from raman_fitting.config import settings +from raman_fitting.config.path_settings import InternalPathSettings +# Global fixtures -import raman_fitting -# Global fixtures +@pytest.fixture(autouse=True) +def tmp_raman_dir(tmp_path): + d = tmp_path / "raman-fitting" + d.mkdir() + yield d + d.rmdir() + + +@pytest.fixture(autouse=True) +def internal_paths(): + return InternalPathSettings() + + +@pytest.fixture(autouse=True) +def example_files(internal_paths): + example_files = list(internal_paths.example_fixtures.rglob("*txt")) + return example_files + + +@pytest.fixture(autouse=True) +def default_definitions(internal_paths): + return settings.default_definitions + + +@pytest.fixture(autouse=True) +def default_models(internal_paths): + return settings.default_models + + +@pytest.fixture(autouse=True) +def default_models_first_order(default_models): + return default_models.get("first_order") + + +@pytest.fixture(autouse=True) +def default_models_second_order(default_models): + return default_models.get("second_order") diff --git a/tests/deconvolution_models/__init__.py b/tests/deconvolution_models/__init__.py index 29dc168..e69de29 100644 --- a/tests/deconvolution_models/__init__.py +++ b/tests/deconvolution_models/__init__.py @@ -1,7 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri May 28 09:04:32 2021 - -@author: zmg -""" diff --git a/tests/deconvolution_models/test_base_model.py b/tests/deconvolution_models/test_base_model.py index 6db6c8f..1687fd2 100644 --- a/tests/deconvolution_models/test_base_model.py +++ b/tests/deconvolution_models/test_base_model.py @@ -4,49 +4,53 @@ @author: DW """ -import unittest +import pytest from functools import partial -import pytest -from lmfit import Model +from pydantic import ValidationError -from raman_fitting.deconvolution_models.base_model import _SUBSTRATE_PEAK, BaseModel +from raman_fitting.models.deconvolution.base_model import ( + SUBSTRATE_PEAK, + BaseLMFitModel, +) -_SUBSTRATE_PREFIX = _SUBSTRATE_PEAK.split("peak")[0] +SUBSTRATE_PREFIX = SUBSTRATE_PEAK.split("peak")[0] -def _get_list_components(bm): +def helper_get_list_components(bm): _listcompsprefix = partial(map, lambda x,: getattr(x, "prefix")) _bm_prefix = list(_listcompsprefix(bm.lmfit_model.components)) return _bm_prefix -class TestBaseModel(unittest.TestCase): - def test_empty_base_model(self): - bm = BaseModel() - self.assertEqual(bm.model_name, "") - self.assertFalse(bm.has_substrate) - bm.add_substrate() - self.assertIn(bm.model_name, _SUBSTRATE_PEAK) - self.assertEqual(type(bm.lmfit_model).__qualname__, "GaussianModel") - self.assertIn(bm.lmfit_model.prefix, _SUBSTRATE_PEAK) - self.assertTrue(issubclass(type(bm.lmfit_model), Model)) +def test_empty_base_model(): + with pytest.raises(ValidationError): + BaseLMFitModel() + with pytest.raises(ValidationError): + BaseLMFitModel(name="Test_empty") + + with pytest.raises(ValidationError): + BaseLMFitModel(peaks="A+B") - def test_base_model_2peaks(self): - bm = BaseModel(model_name="K2+D+G") + with pytest.raises(ValidationError): + BaseLMFitModel(name="Test_empty", peaks="A+B", region_name="full") - self.assertListEqual(_get_list_components(bm), ["D_", "G_"]) - bm.add_substrate() - self.assertListEqual(_get_list_components(bm), ["D_", "G_", _SUBSTRATE_PREFIX]) - bm.remove_substrate() - self.assertListEqual(_get_list_components(bm), ["D_", "G_"]) - def test_base_model_wrong_chars_model_name(self): - bm = BaseModel(model_name="K2+---////+ +7 +K1111+1D+D2") - self.assertListEqual(_get_list_components(bm), ["D2_"]) - bm.add_substrate() - self.assertListEqual(_get_list_components(bm), ["D2_", _SUBSTRATE_PREFIX]) +def test_base_model_2peaks(): + bm = BaseLMFitModel(name="Test_2peaks", peaks="K2+D+G", region_name="full") + assert set(helper_get_list_components(bm)) == set(["D_", "G_"]) + bm.add_substrate() + assert set(helper_get_list_components(bm)) == set(["D_", "G_", SUBSTRATE_PREFIX]) + bm.remove_substrate() + assert set(helper_get_list_components(bm)) == set(["D_", "G_"]) -if __name__ == "__main__": - unittest.main() +def test_base_model_wrong_chars_model_name(): + bm = BaseLMFitModel( + name="Test_wrong_chars", + peaks="K2+---////+ +7 +K1111+1D+D2", + region_name="full", + ) + assert set(helper_get_list_components(bm)) == set(["D2_"]) + bm.add_substrate() + assert set(helper_get_list_components(bm)) == set(["D2_", SUBSTRATE_PREFIX]) diff --git a/tests/deconvolution_models/test_base_peaks.py b/tests/deconvolution_models/test_base_peaks.py index 03baf08..55ac67a 100644 --- a/tests/deconvolution_models/test_base_peaks.py +++ b/tests/deconvolution_models/test_base_peaks.py @@ -1,28 +1,19 @@ -import copy import logging -import unittest - import pytest -from lmfit import Model -# from raman_fitting.deconvolution_models import first_order_peaks -# import raman_fitting -from raman_fitting.deconvolution_models.default_peaks.base_peak import ( +from pydantic import ValidationError +from raman_fitting.models.deconvolution.base_peak import ( BasePeak, - BasePeakWarning, - LMfitModelConstructorMethods, ) + logger = logging.getLogger(__name__) logging.captureWarnings(True) # sends these warning to the logger -# %% - - def _error_message_contains(excinfo, testmsg: str, verbose: bool = False): _fltr_str = [ - i if not i in ["(", ")"] else " " + i if i not in ["(", ")"] else " " for i in str(excinfo.value) if i.isalnum() or i in (",", ".", " ", "_", "(", ")") ] @@ -39,328 +30,132 @@ def _error_message_contains(excinfo, testmsg: str, verbose: bool = False): return _test -# %% - - -class TestBasePeak(unittest.TestCase): - # %% TESTING - - def test_BasePeak_attributes(self): - self.assertTrue(BasePeak.__doc__) - self.assertTrue(BasePeak._fields) - self.assertTrue(BasePeak._sources) - self.assertTrue(BasePeak.PEAK_TYPE_OPTIONS) - - def test_empty_base_class_raises(self): - # %% - - class EmptyTestChild(metaclass=BasePeak): - pass - - eb = EmptyTestChild() - - with pytest.raises(ValueError) as excinfo: - eb.peak_type = "emptytest" - assert _error_message_contains(excinfo, "value emptytest for peak_type not in") - - with pytest.raises(ValueError) as excinfo: - eb.peak_name = 10 * "emptytest" - assert _error_message_contains(excinfo, "value for peak_name is too long 90") - - self.assertFalse(eb.peak_model) - - # %% - - def test_empty_base_class_with_kwargs_raises(self): - # %% - - class EmptyTestChild( - metaclass=BasePeak, - testkwarg2=2, - testkwarg3=3, - peak_type="Voigt", - verbose=True, - ): - pass - - eb = EmptyTestChild() - # %% - - self.assertEqual(eb.peak_type, "Voigt") - self.assertEqual(eb.testkwarg2, 2) - - with pytest.raises(ValueError) as excinfo: - eb.peak_type = "emptytest" - self.assertTrue( - _error_message_contains(excinfo, "value emptytest for peak_type not in") - ) - - with pytest.raises(ValueError) as excinfo: - eb.peak_name = 10 * "emptytest" - self.assertTrue( - _error_message_contains(excinfo, "value for peak_name is too long 90") - ) - - self.assertFalse(eb.peak_model) - # with pytest.raises(AttributeError) as excinfo: - # eb.peak_model - # self.assertTrue( - # _error_message_contains( - # excinfo, "type object empty no attribute _peak_model" - # ) - # ) - - with pytest.raises(ValueError) as excinfo: - eb.peak_type = "VoigtLorentzian" - self.assertTrue( - _error_message_contains( - excinfo, - ''''Multiple options ['Lorentzian', 'Voigt'] for misspelled value "VoigtLorentzian"''', - ) - ) - - def test_empty_base_class_with_false_input(self): - # %% - - class EmptyTestChild(metaclass=BasePeak, peak_type="FalsePeak"): - pass - - with pytest.raises(ValueError) as excinfo: - eb = EmptyTestChild() - self.assertTrue( - _error_message_contains(excinfo, "value emptytest for peak_type not in") - ) - - # %% - def test_base_class_good_with_init_extra_tests(self): - # %% - class TestD1peak(metaclass=BasePeak, debug=True): - """ - here is docstring of TestD1peak, - small spelling error on peak_type - """ - - def __init__(self, *args, **kwargs): - print(f"called __init__ {self} TestD1peak, with {args}, {kwargs}") - self._peak_type = "Voigt" - self.peak_name = "D1D1" - self.input_param_settings = { - "center": {"value": 2650, "min": 2600, "max": 2750}, - "sigma": {"value": 60, "min": 1, "max": 200}, - "amplitude": {"value": 14, "min": 1e-03, "max": 100}, - } - - td1 = TestD1peak() - peakmod = "" - self.assertEqual(str(td1.peak_model), peakmod) - _class_str = f"TestD1peak, {peakmod}, center : 2600 < 2650 > 2750" - self.assertIn(_class_str, str(td1)) - td1.peak_name = "R2D2" - self.assertEqual(td1.peak_model.prefix, "R2D2_") - - # _def_param = td1.param_hints_constructor({}) - # _def_key = list(BasePeak.default_settings.keys())[0] - # _def_param[_def_key].value == BasePeak.default_settings[_def_key]["value"] - - # _def_param = td1.param_hints_constructor( - # td1.fco.register["init"]["param_hints"] - # ) - # self.assertEqual(_def_param["amplitude"].value, 14) - - # with pytest.raises(TypeError) as excinfo: - # _def_param = td1.param_hints_constructor("fail") - # self.assertTrue( - # _error_message_contains( - # excinfo, - # "input_param_hints should be of type dictionary not ", - # ) - # ) - - # _err_hints = copy.copy(td1.fco.register["init"]["param_hints"]) - # _err_hints["center"] = (1, 2, 3, 4) - - # with pytest.raises(ValueError) as excinfo: - # _def_param = td1.param_hints_constructor(_err_hints) - # self.assertTrue( - # _error_message_contains( - # excinfo, " Unable to create a Parameter from center and (1, 2, 3, 4):" - # ) - # ) - - # %% - def test_base_class_good_with_init(self): - class TestD1peak(metaclass=BasePeak, debug=True): - """ - test_base_class_good_with_init - but with spelling error in peak_type - """ - - def __init__(self, *args, **kwargs): - print(f"called __init__ {self} TestD1peak, with {args}, {kwargs}") - self._peak_type = "cartVoigt" - self.peak_name = "D1D1" - self.input_param_settings = { - "center": {"value": 2650, "min": 2600, "max": 2750}, - "sigma": {"value": 60, "min": 1, "max": 200}, - "amplitude": {"value": 14, "min": 1e-03, "max": 100}, - } - - td1 = TestD1peak() - _class_str = "TestD1peak, , center : 2600 < 2650 > 2750" - self.assertIn(_class_str, str(td1)) - # print(td1) - - # %% - - def test_base_class_good_with_init_added_method(self): - # %% - class TestD1peakmeta(metaclass=BasePeak, a=2): - """ - here is docstring of TestD1peak - """ - - def __init__(self, *args, **kwargs): - # super().__init__(self) - # print(f'__subclass __init__ self "{self}" TestD1peakmeta, with {args}, {kwargs}') - self.peak_type = "Lorentzian" - self.peak_name = "D1D1" - self.input_param_settings = { - "center": {"value": 2650, "min": 2600, "max": 2750}, - "sigma": {"value": 60, "min": 1, "max": 200}, - "amplitude": {"value": 14, "min": 1e-03, "max": 100}, - } - self._meta_added_method(kwargs) - - def _meta_added_method(self, *args, **kwargs): - """added method""" - print(f"{self.__dict__}") - self._added_method_arg = kwargs - print(f"added method called {kwargs}") - # return arg - - td1m = TestD1peakmeta(add=33) - _teststr = "TestD1peakmeta, , center : 2600 < 2650 > 2750" - self.assertIn(_teststr, str(td1m)) - # assert str(td1m) == _teststr - - # %% - - def test_base_class_good_with_attributes_and_init(self): - # %% - class NewChildClassAttr(metaclass=BasePeak): - """New child class for easier definition""" - - _test = "testkwarg" - - param_hints = { - "center": {"value": 2435, "min": 2400, "max": 2550}, - "sigma": {"value": 30, "min": 1, "max": 200}, - "amplitude": {"value": 2, "min": 1e-03, "max": 100}, - } - peak_type = "Voigt" #'Voigt' - peak_name = "R2D2" - - def __init__(self, **kwargs): - # print(f'__init child empty pass {self}') - pass - # super().__init__() - - nca = NewChildClassAttr() - _center_value = nca.peak_model.param_hints["center"]["value"] - assert _center_value == 2435 - # print('Instance child:', nca) - # %% - - def test_base_class_good_with_attributes_no_init(self): - class NewChildClassAttrNoInit(metaclass=BasePeak): - """New child class for easier definition""" - - _test = "testkwarg" - - param_hints = { - "center": {"value": 2435, "min": 2400, "max": 2550}, - "sigma": {"value": 30, "min": 1, "max": 200}, - "amplitude": {"value": 2, "min": 1e-03, "max": 100}, - } - peak_type = "Voigt" #'Voigt' - peak_name = "R2D2" - # def __init__(self,**kwargs): - # print(f'__init child empty pass {self}') - # pass - # super().__init__() - - ncni = NewChildClassAttrNoInit() - _center_value = ncni.peak_model.param_hints["center"]["value"] - assert _center_value == 2435 - - # %% - - def test_base_class_good_with_attributes_init_collision_values(self): - # %% - class NewChildClassInit(metaclass=BasePeak, peak_type="Gaussian"): - """New child class for easier definition""" - - _test = "testkwarg" - peak_type = "Lorentzian" - param_hints = { - "center": {"value": 2435, "min": 2400, "max": 2550}, - "sigma": {"value": 30, "min": 1, "max": 200}, - "amplitude": {"value": 2, "min": 1e-03, "max": 100}, - } - - def __init__(self, **kwargs): - self.peak_type = "Voigt" - self.peak_name = "R2D2" - self.param_hints = {"center": {"value": 20}} - - # self.param_hints = { - # 'center': - # {'value': 2435,'min': 2400, 'max': 2550}, - # 'sigma': - # {'value': 30,'min' : 1, 'max': 200}, - # 'amplitude' : - # {'value': 2,'min' : 1E-03, 'max': 100} - # } - # super().__init__(self) - - nci = NewChildClassInit() - self.assertEqual(nci.peak_type, "Gaussian") - self.assertEqual(nci.peak_model.param_hints["center"]["value"], 2435) - # print(nci) - # print(self.__dict__) - - def test_base_with_only_keyword_args(self): - new = BasePeak("newPeak", **{"noname": 2, "debug": False, "peak_type": "Voigt"}) - new.param_hints = {"center": {"value": 200}} - newinst = new() - _newinst_str = "newPeak, , center : -inf < 200 > inf" - self.assertEqual(str(newinst), _newinst_str) - - -class TestLMfitModelConstructorMethods(unittest.TestCase): - LMfit = LMfitModelConstructorMethods - - def test_make_model_from_peak_type_and_name(self): - model = self.LMfit.make_model_from_peak_type_and_name( - peak_type="Voigt", peak_name="lmfitpeak" - ) - self.assertTrue(isinstance(model, Model)) - self.assertEqual(model.prefix, "lmfitpeak") - - with pytest.raises(NotImplementedError) as excinfo: - model = self.LMfit.make_model_from_peak_type_and_name(peak_type="FalsePeak") - self.assertTrue( - _error_message_contains( - excinfo, - " This peak type or model 'FalsePeak' has not been implemented.", - ) - ) - - -# self = TestLMfitModelConstructorMethods() - - -# %% -if __name__ == "__main__": - unittest.main() - self = TestBasePeak() +def test_basepeak_initialization(): + with pytest.raises(ValidationError): + BasePeak() + with pytest.raises(ValidationError): + BasePeak(peak_name="test") + with pytest.raises(ValidationError): + BasePeak(peak_type="Voigt") + test_peak = BasePeak(peak_name="test", peak_type="Voigt") + assert test_peak.peak_name == "test" + + +@pytest.mark.skip(reason="TODO: add field validations") +def test_empty_base_class_with_kwargs_raises(): + eb = BasePeak(peak_type="Voigt", peak_name="test") + + assert eb.peak_type == "Voigt" + + # add in field validation str_length + with pytest.raises(ValueError) as excinfo: + eb.peak_name = 10 * "emptytest" + assert _error_message_contains(excinfo, "value for peak_name is too long 90") + + # add built in field validation for peak_type + with pytest.raises(ValueError) as excinfo: + eb.peak_type = "VoigtLorentzian" + assert _error_message_contains( + excinfo, + ''''Multiple options ['Lorentzian', 'Voigt'] for misspelled value "VoigtLorentzian"''', + ) + + +def test_base_class_good_with_init_extra_tests(): + td1_kwargs = dict( + peak_type="Voigt", + peak_name="D1D1", + param_hints={ + "center": {"value": 2650, "min": 2600, "max": 2750}, + "sigma": {"value": 60, "min": 1, "max": 200}, + "amplitude": {"value": 14, "min": 1e-03, "max": 100}, + }, + ) + + td1 = BasePeak(**td1_kwargs) + assert td1.peak_type == "Voigt" + assert td1.peak_name == "D1D1" + peakmod = "" + assert str(td1.lmfit_model) == peakmod + # _class_str = f"center : 2600 < 2650 > 2750" + # assertIn(_class_str, str(td1)) + # dont test attr setters + # td1.peak_name = "R2D2" + # assert td1.lmfit_model.prefix == "R2D2_" + + +def test_base_class_good_with_init(): + d1_kwargs = dict( + peak_name="D1D1", + peak_type="Gaussian", + param_hints={ + "center": {"value": 2650, "min": 2600, "max": 2750}, + "sigma": {"value": 60, "min": 1, "max": 200}, + "amplitude": {"value": 14, "min": 1e-03, "max": 100}, + }, + ) + + td1 = BasePeak(**d1_kwargs) + assert td1.peak_name == d1_kwargs["peak_name"] + + +def test_base_class_good_with_init_added_method(): + tkwargs = dict( + peak_type="Lorentzian", + peak_name="D1D1", + param_hints={ + "center": {"value": 2650, "min": 2600, "max": 2750}, + "sigma": {"value": 60, "min": 1, "max": 200}, + "amplitude": {"value": 14, "min": 1e-03, "max": 100}, + }, + ) + + td1m = BasePeak(**tkwargs) + assert td1m.peak_type == tkwargs["peak_type"] + + +def test_base_class_good_with_attributes_and_init(): + tkwargs = dict( + param_hints={ + "center": {"value": 2435, "min": 2400, "max": 2550}, + "sigma": {"value": 30, "min": 1, "max": 200}, + "amplitude": {"value": 2, "min": 1e-03, "max": 100}, + }, + peak_type="Voigt", + peak_name="R2D2", + ) + + nca = BasePeak(**tkwargs) + _center_value = nca.lmfit_model.param_hints["center"]["value"] + assert _center_value == 2435 + + +def test_base_class_good_with_attributes_no_init(): + tkwargs = dict( + param_hints={ + "center": {"value": 2435, "min": 2400, "max": 2550}, + "sigma": {"value": 30, "min": 1, "max": 200}, + "amplitude": {"value": 2, "min": 1e-03, "max": 100}, + }, + peak_type="Voigt", + peak_name="R2D2", + ) + + ncni = BasePeak(**tkwargs) + assert ncni.param_hints["center"].value == 2435 + assert ncni.lmfit_model.param_hints["center"]["value"] == 2435 + + +def test_base_class_good_with_attributes_init_collision_values(): + tkwargs = dict( + param_hints={ + "center": {"value": 2435, "min": 2400, "max": 2550}, + "sigma": {"value": 30, "min": 1, "max": 200}, + "amplitude": {"value": 2, "min": 1e-03, "max": 100}, + }, + peak_type="Voigt", + peak_name="R2D2", + ) + nci = BasePeak(**tkwargs) + assert nci.peak_type == "Voigt" + assert nci.lmfit_model.param_hints["center"]["value"] == 2435 diff --git a/tests/deconvolution_models/test_fit_models.py b/tests/deconvolution_models/test_fit_models.py index 2a1a9fa..054ed5e 100644 --- a/tests/deconvolution_models/test_fit_models.py +++ b/tests/deconvolution_models/test_fit_models.py @@ -1,48 +1,39 @@ -# flake8: noqa -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri May 14 09:12:56 2021 +import math -@author: zmg -""" - -import unittest - -import pandas as pd import pytest -from lmfit import Model - -import raman_fitting -from raman_fitting.deconvolution_models.fit_models import Fitter, PrepareParams - -# try: -# import raman_fitting - - -# except Exception as e: -# print(f'pytest file {__file__}, {__name__} error {e}') - - -class TestFitter(unittest.TestCase): - def test_empty_Fitter(self): - ft = Fitter({}) - self.assertFalse(ft.start_fit) - self.assertEqual(ft.spectra, {}) - ft.fit_delegator() - - -class TestPrepareParams(unittest.TestCase): - def test_empty_PrepareParams(self): - # pp = - with self.assertRaises(AttributeError): - PrepareParams({}) - - -def _testing(): - self = ft - self = prep - -if __name__ == "__main__": - unittest.main() +from raman_fitting.models.fit_models import SpectrumFitModel +from raman_fitting.imports.spectrumdata_parser import SpectrumReader +from raman_fitting.processing.post_processing import SpectrumProcessor + + +@pytest.fixture +def clean_spec(example_files) -> None: + file = [i for i in example_files if "_pos4" in i.stem][0] + specread = SpectrumReader(file) + + spectrum_processor = SpectrumProcessor(specread.spectrum) + clean_spec_1st_order = spectrum_processor.clean_spectrum.spec_regions[ + "savgol_filter_raw_region_first_order" + ] + clean_spec_1st_order.region_name = "first_order" + return clean_spec_1st_order + + +def test_fit_first_order(clean_spec, default_models): + spectrum = clean_spec + test_component = "center" + + for model_name, test_model in default_models["first_order"].items(): + # with subTest(model_name=model_name, test_model=test_model): + spec_fit = SpectrumFitModel( + **{"spectrum": spectrum, "model": test_model, "region": "first_order"} + ) + spec_fit.run_fit() + for component in test_model.lmfit_model.components: + # with subTest(component=component): + peak_component = f"{component.prefix}{test_component}" + fit_value = spec_fit.fit_result.best_values[peak_component] + init_value = spec_fit.fit_result.init_values[peak_component] + assert math.isclose(fit_value, init_value, rel_tol=0.05) + assert spec_fit.fit_result.success diff --git a/tests/deconvolution_models/test_peak_validation.py b/tests/deconvolution_models/test_peak_validation.py index d8f4337..e69de29 100644 --- a/tests/deconvolution_models/test_peak_validation.py +++ b/tests/deconvolution_models/test_peak_validation.py @@ -1,80 +0,0 @@ -# flake8: noqa - -import unittest - -import pytest -from lmfit import Model - -import raman_fitting -from raman_fitting.deconvolution_models.peak_validation import ( - NotFoundAnyModelsWarning, - PeakModelValidator, -) - - -class TestPeakModelValidator(unittest.TestCase): - def setUp(self): - self.pmv = PeakModelValidator() - - def test_pmv_valid_models(self): - self.assertTrue(self.pmv.valid_models) - - def test_pmv_set_debug(self): - self.assertFalse(self.pmv.debug) - self.assertTrue(self.pmv._set_debug(**{"debug": True})) - - def test_get_subclasses_from_base(self): - with self.assertWarns(NotFoundAnyModelsWarning): - self.pmv.get_subclasses_from_base("") - - with self.assertWarns(NotFoundAnyModelsWarning): - self.pmv.get_subclasses_from_base(str) - - def test_validation_inspect_models(self): - _valid = self.pmv.validation_inspect_models([str]) - self.assertTrue(_valid) - self.assertFalse(_valid[1][0].valid) - self.assertIn("has no attr", _valid[1][0].message) - - _valid = self.pmv.validation_inspect_models([Model]) - self.assertTrue(_valid) - self.assertFalse(_valid[1][0].valid) - self.assertIn("Unable to initialize model", _valid[1][0].message) - - def test_get_cmap_list(self): - _cmap = self.pmv.get_cmap_list([], cmap_options=()) - self.assertEqual(_cmap, []) - _cmap = self.pmv.get_cmap_list( - [1] * 50, cmap_options=(), fallback_color=self.pmv.fallback_color - ) - self.assertEqual(_cmap, [self.pmv.fallback_color] * 50) - _cmap = self.pmv.get_cmap_list([1] * 5) - self.assertEqual(len(_cmap), 5) - - def test___getattr__(self): - with self.assertRaises(AttributeError): - self.pmv.fake_attr - - def test___iter__(self): - _iter = [i for i in self.pmv] - self.assertIsInstance(_iter, list) - - def test_if_lmfit_models(self): - if self.pmv.lmfit_models: - _getdict = self.pmv.get_dict() - self.assertIsInstance(_getdict, dict) - - _getdict = self.pmv.get_model_dict(self.pmv.lmfit_models) - self.assertIsInstance(_getdict, dict) - - -def _debugging(): - self = TestPeakModelValidator() - peaks = PeakModelValidator() - self.pmv = peaks - # all([isinstance(i.peak_model, Model) for i in peaks.lmfit_models]) - # all([isinstance(i.peak_model, Model) for i in peaks.get_dict().values()]) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/delegating/test_main_delegator.py b/tests/delegating/test_main_delegator.py index 0c857fe..5b4369b 100644 --- a/tests/delegating/test_main_delegator.py +++ b/tests/delegating/test_main_delegator.py @@ -1,47 +1,27 @@ -import datetime -import unittest - -# from raman_fitting.deconvolution_models import first_order_peaks -import pandas as pd import pytest -import raman_fitting -from raman_fitting.datafiles import example_files -from raman_fitting.deconvolution_models.base_model import InitializeModels -from raman_fitting.delegating.main_delegator import ( - MainDelegator, - add_make_sample_group_destdirs, -) - +from raman_fitting.config.path_settings import RunModes +from raman_fitting.delegating.main_delegator import MainDelegator -class TestMainDelegator(unittest.TestCase): - def setUp(self): - self.maindebug = MainDelegator(run_mode="DEBUG") - self.models = self.maindebug.initialize_default_models() - def test_initialize_models(self): - self.assertTrue(isinstance(self.models, InitializeModels)) +@pytest.fixture(scope="module") +def delegator(): + return MainDelegator(run_mode=RunModes.PYTEST) - def test_index(self): - self.assertTrue(hasattr(self.maindebug, "index")) - self.assertTrue(isinstance(getattr(self.maindebug, "index"), pd.DataFrame)) - _sample_group = self.maindebug.sample_group_gen() - _arg = next(_sample_group) - _destdirs = add_make_sample_group_destdirs(_arg[-1]) - _alltest = all([_arg[0] in a for a in [i.parts for i in _destdirs.values()]]) - self.assertTrue(_alltest) +def test_initialize_models(delegator): + assert "first_order" in delegator.lmfit_models + assert "first_order" in delegator.selected_models + with pytest.raises(KeyError): + delegator.select_fitting_model("no_name", "no model") - def test_generator(self): - _sample_group = self.maindebug.sample_group_gen() - _sample_group_arg = next(_sample_group) - self.assertTrue(_sample_group_arg) - _sID_gen = self.maindebug._sID_gen(*_sample_group_arg) - _sID_arg = next(_sID_gen) - self.assertTrue(_sID_arg) +def test_delegator_index(delegator): + assert delegator.index + assert len(delegator.index.raman_files) == 5 + selection = delegator.select_samples_from_index() + assert len(delegator.index.raman_files) == len(selection) -if __name__ == "__main__": - unittest.main() - self = TestMainDelegator() +def test_main_run(delegator): + assert delegator.results diff --git a/tests/exporting/test_plotting.py b/tests/exporting/test_plotting.py index aaf8bb1..a978b36 100644 --- a/tests/exporting/test_plotting.py +++ b/tests/exporting/test_plotting.py @@ -6,12 +6,38 @@ @author: zmg """ +# flake8: noqa + +import pytest + +from raman_fitting.models.deconvolution.init_models import InitializeModels +from raman_fitting.exports.plot_formatting import ( + get_cmap_list, + assign_colors_to_peaks, + DEFAULT_COLOR, + COLOR_BLACK, +) + + +# class PeakModelAnnotation(unittest.TestCase): +@pytest.fixture() +def initialized_models(): + return InitializeModels() + + +def test_get_cmap_list(): + assert get_cmap_list(0) == None + _cmap = get_cmap_list(50) + assert _cmap == [DEFAULT_COLOR] * 50 + _cmap = get_cmap_list(5) + assert len(_cmap) >= 5 + _cmap1 = get_cmap_list(5, default_color=COLOR_BLACK) + assert _cmap1 == [COLOR_BLACK] * 5 -def _testing(): - peak1, res1_peak_spec, res2_peak_spec = ( - modname_1, - fitres_1, - fitres_2, - ) - peak1, res1_peak_spec = "1st_6peaks+Si", self._1st["1st_6peaks+Si"] +def test_assign_colors_to_peaks(initialized_models): + for order_type, model_collection in initialized_models.lmfit_models.items(): + for model_name, model in model_collection.items(): + annotated_models = assign_colors_to_peaks(model.lmfit_model.components) + prefixes = set([i.prefix for i in model.lmfit_model.components]) + assert prefixes == set(annotated_models.keys()) diff --git a/tests/indexing/test_filename_parser.py b/tests/indexing/test_filename_parser.py index 9e1b134..cd6eb05 100644 --- a/tests/indexing/test_filename_parser.py +++ b/tests/indexing/test_filename_parser.py @@ -1,101 +1,63 @@ -import unittest - -# import importlib -from importlib import resources -from pathlib import Path - -# from raman_fitting.deconvolution_models import first_order_peaks -import raman_fitting -from raman_fitting.datafiles import example_files -from raman_fitting.indexing.filename_parser import PathParser -from raman_fitting.indexing.filename_parser import ( - _extra_sID_name_mapper, - _extra_overwrite_sID_from_mapper, +import pytest + +from raman_fitting.imports.models import RamanFileInfo +from raman_fitting.imports.samples.sample_id_helpers import ( + overwrite_sample_id_from_mapper, + overwrite_sample_group_id_from_parts, ) -from raman_fitting.indexing.filename_parser import ( - _extra_sgrpID_name_mapper, - _extra_overwrite_sgrpID_from_parts, + + +from raman_fitting.imports.samples.sample_id_helpers import ( + parse_string_to_sample_id_and_position, ) -from raman_fitting.indexing.filename_parser_helpers import filestem_to_sid_and_pos - -# import pytest - - -class TestFilenameParser(unittest.TestCase): - example_parse_expected = { - "errEMP2_1.txt": ("errEMP2", 1), - "errTS2_pos1.txt": ("errTS2", 1), - "Si_spectrum01.txt": ("Si", 1), - "testDW38C_pos1.txt": ("testDW38C", 1), - "testDW38C_pos2.txt": ("testDW38C", 2), - "testDW38C_pos3.txt": ("testDW38C", 3), - "testDW38C_pos4.txt": ("testDW38C", 4), - "DW_AB_CD-EF_GE_pos3": ("DW_AB_CD-EF_GE", 3), - "DW99-pos3": ("DW99", 3), - "Si": ("Si", 0), - } - - result_attr = "parse_result" - - def setUp(self): - _example_path = Path(example_files.__path__[0]) - _example_files_contents = list(Path(_example_path).rglob("*txt")) - - self.datafiles = _example_files_contents - # list(filter(lambda x: x.endswith('.txt'), _example_files_contents)) - _pathparsers = [] - for fn in self.datafiles: - _pathparsers.append(PathParser(_example_path.joinpath(fn))) - self.data_PPs = _pathparsers - self.empty_PP = PathParser() - - # Make expected results - # {i.name: (i.parse_result['SampleID'], i.parse_result['SamplePos']) for i in self.data_PPs} - - def test_PathParser(self): - self.assertTrue(all(isinstance(i, PathParser) for i in self.data_PPs)) - # Check if instance has results attribute - self.assertTrue(all(hasattr(i, self.result_attr) for i in self.data_PPs)) - - def test_PathParser_empty(self): - self.assertTrue(hasattr(self.empty_PP, "_flavour")) - self.assertTrue(hasattr(self.empty_PP, self.result_attr)) - - def test_PP_extra_from_map(self): - for k, val in _extra_sID_name_mapper.items(): - _mapval = _extra_overwrite_sID_from_mapper(k) - - self.assertEqual(_mapval, val) - - def test_PP_extra_from_parts(self): - self.assertEqual("TEST", _extra_overwrite_sgrpID_from_parts([], "TEST")) - - for k, val in _extra_sgrpID_name_mapper.items(): - emptymap_PP = PathParser(f"{k}/TEST.txt") - self.assertEqual( - val, - _extra_overwrite_sgrpID_from_parts(emptymap_PP.parts, "TEST"), - ) - - def test_PP_parse_filepath_to_sid_and_pos(self): - for file, _expected in self.example_parse_expected.items(): - self.assertEqual(filestem_to_sid_and_pos(file), _expected) - - # def test_PathParser(self): - # _dfpath = Path(__file__).parent.parent.parent / 'src' / 'raman_fitting' / 'datafiles' - # _fls = list(_dfpath.rglob('*.txt')) - # _res = [] - # for fn in _fls: - # _res.append(PathParser(fn)) - # sIDs = [i.parse_result['SampleID'] for i in _res] - # self.assertEqual(sIDs, self.sIDs_expected) - - # def test_empty(self): - # PathParser('') - - -if __name__ == "__main__": - unittest.main() - self = TestFilenameParser() - self.setUp() +example_parse_fixture = { + "errEMP2_1.txt": ("errEMP2", 1), + "errTS2_pos1.txt": ("errTS2", 1), + "Si_spectrum01.txt": ("Si", 1), + "testDW38C_pos1.txt": ("testDW38C", 1), + "testDW38C_pos2.txt": ("testDW38C", 2), + "testDW38C_pos3.txt": ("testDW38C", 3), + "testDW38C_pos4.txt": ("testDW38C", 4), + "DW_AB_CD-EF_GE_pos3": ("DW_AB_CD-EF_GE", 3), + "DW99-pos3": ("DW99", 3), + "Si": ("Si", 0), +} + + +# class TestFilenameParser(unittest.TestCase): +result_attr = "parse_result" +sample_id_name_mapper = {} +sGrp_name_mapper = {} + + +@pytest.fixture() +def path_parsers(example_files): + path_parsers_ = [] + for fn in example_files: + path_parsers_.append(RamanFileInfo(**{"file": fn})) + return path_parsers_ + + +def test_ramanfileinfo(path_parsers): + assert all(isinstance(i, RamanFileInfo) for i in path_parsers) + + +def test_sample_id_name_mapper(): + for k, val in sample_id_name_mapper.items(): + _mapval = overwrite_sample_id_from_mapper(k, sample_id_name_mapper) + assert _mapval == val + + +def test_overwrite_sample_id_from_mapper(): + assert "TEST" == overwrite_sample_group_id_from_parts([], "TEST", sGrp_name_mapper) + for k, val in sGrp_name_mapper.items(): + empty_path_parts = RamanFileInfo(file=f"{k}/TEST.txt") + assert val == overwrite_sample_group_id_from_parts( + empty_path_parts.parts, "TEST", sGrp_name_mapper + ) + + +def test_parse_string_to_sample_id_and_position(): + for file, _expected in example_parse_fixture.items(): + assert parse_string_to_sample_id_and_position(file) == _expected diff --git a/tests/indexing/test_filename_parser_collector.py b/tests/indexing/test_filename_parser_collector.py deleted file mode 100644 index 7defe65..0000000 --- a/tests/indexing/test_filename_parser_collector.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Created on Sun Aug 8 19:28:26 2021 - -@author: DW -""" diff --git a/tests/indexing/test_filename_parser_helpers.py b/tests/indexing/test_filename_parser_helpers.py deleted file mode 100644 index f050e8e..0000000 --- a/tests/indexing/test_filename_parser_helpers.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Created on Sun Aug 8 19:27:44 2021 - -@author: DW -""" diff --git a/tests/indexing/test_indexer.py b/tests/indexing/test_indexer.py index eedbbf6..3f2cb19 100644 --- a/tests/indexing/test_indexer.py +++ b/tests/indexing/test_indexer.py @@ -1,44 +1,39 @@ -import datetime -import unittest -from importlib import resources -from pathlib import Path - -# from raman_fitting.deconvolution_models import first_order_peaks -import pandas as pd import pytest -import raman_fitting -from raman_fitting.datafiles import example_files -from raman_fitting.indexing.indexer import MakeRamanFilesIndex - - -class TestIndexer(unittest.TestCase): - def setUp(self): - _example_path = Path(example_files.__path__[0]) - _example_files_contents = list(Path(_example_path).rglob("*txt")) - - self._example_files = [i for i in _example_files_contents] - - self.RamanIndex = MakeRamanFilesIndex(run_mode="make_examples") - - def test_MakeRamanFilesIndex_make_examples(self): - self.assertEqual(len(self.RamanIndex), len(self._example_files)) - - def test_load_index(self): - _loaded_index = self.RamanIndex.load_index() - self.assertTrue(isinstance(_loaded_index, pd.DataFrame)) - - for col in _loaded_index.columns: - _setload = set(_loaded_index[col].values) - - _setindex = set(self.RamanIndex.index[col].values) - if all(isinstance(i, datetime.date) for i in list(_setindex)): - # Convert pandas, np.datetime to normal dt - _setload = set([pd.to_datetime(i).date() for i in list(_setload)]) - - self.assertEqual(_setload, _setindex) - - -if __name__ == "__main__": - unittest.main() - self = TestIndexer() +from raman_fitting.config.path_settings import ( + get_run_mode_paths, + RunModes, +) +from raman_fitting.imports.files.file_indexer import ( + RamanFileIndex, + initialize_index_from_source_files, +) +from raman_fitting.imports.models import RamanFileInfo + +run_mode = RunModes.PYTEST +run_paths = get_run_mode_paths(run_mode) + + +@pytest.fixture +def index(example_files, internal_paths, tmp_raman_dir): + pytest_fixtures_files = list(internal_paths.pytest_fixtures.rglob("*txt")) + index_file = internal_paths.temp_index_file + all_test_files = example_files + pytest_fixtures_files + index = initialize_index_from_source_files( + index_file=index_file, files=all_test_files, force_reindex=True + ) + return index + + +def test_index_make_examples(index, example_files): + assert isinstance(index, RamanFileIndex) + assert isinstance(index.raman_files[0], RamanFileInfo) + assert len(index.dataset) > 1 + assert len(index.dataset) == len(example_files) + + +# @unittest.skip("export_index not yet implemented") +def test_load_index(index): + index.index_file.exists() + new_index = RamanFileIndex(index_file=index.index_file, force_reindex=False) + assert isinstance(new_index, RamanFileIndex) diff --git a/tests/models/__init__.py b/tests/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/models/test_base_peak.py b/tests/models/test_base_peak.py new file mode 100644 index 0000000..4be455c --- /dev/null +++ b/tests/models/test_base_peak.py @@ -0,0 +1,34 @@ +from raman_fitting.models.deconvolution.base_peak import BasePeak + + +def test_initialize_base_peaks( + default_definitions, default_models_first_order, default_models_second_order +): + peaks = {} + + peak_items = { + **default_definitions["first_order"]["peaks"], + **default_definitions["second_order"]["peaks"], + }.items() + for k, v in peak_items: + peaks.update({k: BasePeak(**v)}) + + peak_d = BasePeak(**default_definitions["first_order"]["peaks"]["D"]) + assert ( + peak_d.peak_name + == default_definitions["first_order"]["peaks"]["D"]["peak_name"] + ) + assert ( + peak_d.peak_type + == default_definitions["first_order"]["peaks"]["D"]["peak_type"] + ) + assert ( + peak_d.lmfit_model.components[0].prefix + == default_definitions["first_order"]["peaks"]["D"]["peak_name"] + "_" + ) + assert ( + peak_d.param_hints["center"].value + == default_definitions["first_order"]["peaks"]["D"]["param_hints"]["center"][ + "value" + ] + ) diff --git a/tests/models/test_calculate_params.py b/tests/models/test_calculate_params.py new file mode 100644 index 0000000..f7d413c --- /dev/null +++ b/tests/models/test_calculate_params.py @@ -0,0 +1,67 @@ +import pytest + +from raman_fitting.models.post_deconvolution.calculate_params import ratio_funcs + + +result_first = {"D_center": 1, "G_center": 2, "D1D1_center": 3} +first_peaks = "G+D+D2+D3+D4+D5" +result_second = ( + {"D4D4 +D1D1+GD1+D2D2"}, + {"D_center": 1, "G_center": 2, "D1D1_center": 3}, +) +var_name = "peak" + + +@pytest.fixture +def list_of_ratio_funcs(): + return list(ratio_funcs) + + +@pytest.fixture +def results_first(default_models_first_order): + return { + k: val.get("value") + for k, val in default_models_first_order[ + "5peaks" + ].lmfit_model.param_hints.items() + if "value" in val + } + + +@pytest.fixture +def results_second(default_models_second_order): + return { + k: val.get("value") + for k, val in default_models_second_order[ + "2nd_4peaks" + ].lmfit_model.param_hints.items() + if "value" in val + } + + +def test_calculate_params_keyerror(list_of_ratio_funcs, results_first): + var_name = "no_var" + with pytest.raises(KeyError): + list_of_ratio_funcs[0](results_first, var_name) + + +def test_calculate_params_from_results( + results_first, results_second, list_of_ratio_funcs +): + combined_results = {**results_first, **results_second} + + prefix = "" + var_name = "center" + + results = {} + for ratio_func in list_of_ratio_funcs: + label, ratio = ratio_func(combined_results, var_name, prefix=prefix) + + func = ratio_func.__name__ + results[func] = {"label": label, "ratio": ratio} + assert results + assert results["ratio_d_to_g"]["ratio"] < 1 + assert results["ratio_d_to_g"]["label"] == "D/G" + for k, val in results.items(): + assert val["label"] + assert val["ratio"] > 0 diff --git a/tests/models/test_fit_models.py b/tests/models/test_fit_models.py new file mode 100644 index 0000000..2f8a402 --- /dev/null +++ b/tests/models/test_fit_models.py @@ -0,0 +1,34 @@ +from raman_fitting.imports.spectrumdata_parser import SpectrumReader +from raman_fitting.models.fit_models import SpectrumFitModel +from raman_fitting.processing.post_processing import SpectrumProcessor + + +def test_fit_model(example_files, default_models_first_order): + file = [i for i in example_files if "_pos4" in i.stem][0] + + specread = SpectrumReader(file) + + spectrum_processor = SpectrumProcessor(specread.spectrum) + clean_spec_1st_order = spectrum_processor.clean_spectrum.spec_regions[ + "savgol_filter_raw_region_first_order" + ] + clean_spec_1st_order.region_name = "first_order" + + model_2peaks = default_models_first_order["2peaks"] + spec_fit = SpectrumFitModel( + spectrum=clean_spec_1st_order, + model=model_2peaks, + region=clean_spec_1st_order.region_name, + ) + spec_fit.run_fit() + assert spec_fit.fit_result.success + assert spec_fit.fit_result.best_values + assert spec_fit.param_results["ratios"]["center"]["ratio_d_to_g"]["ratio"] < 1 + assert spec_fit.param_results["ratios"]["center"]["ratio_la_d_to_g"]["ratio"] < 10 + d_amp_ = spec_fit.fit_result.best_values["D_amplitude"] + g_amp_ = spec_fit.fit_result.best_values["G_amplitude"] + dg_ratio = d_amp_ / g_amp_ + assert ( + spec_fit.param_results["ratios"]["amplitude"]["ratio_d_to_g"]["ratio"] + == dg_ratio + ) diff --git a/tests/processing/test_cleaner.py b/tests/processing/test_cleaner.py index 2c24491..c199833 100644 --- a/tests/processing/test_cleaner.py +++ b/tests/processing/test_cleaner.py @@ -1,20 +1,18 @@ -import unittest +import pytest import numpy as np +from raman_fitting.processing.despike import SpectrumDespiker -from raman_fitting.processing.cleaner import Despiker +int_arrays = ( + np.array([1, 2, 3, 4, 5]), + np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + np.array([2, 2, 2, 2, 2, 2, 30, 20, 2, 2, 2, 2, 2, 2]) +) +@pytest.mark.parametrize('array', int_arrays) +def test_despiker(array): + despiker = SpectrumDespiker.model_construct() -class TestDespiker(unittest.TestCase): - def test_Despiker(self): - desp = Despiker(np.array([1, 2, 3, 4, 5])) - self.assertEqual(len(desp.df), 5) - - desp = Despiker(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])) - self.assertEqual(len(desp.df), 10) - - desp = Despiker(np.array([2, 2, 2, 2, 2, 2, 30, 20, 2, 2, 2, 2, 2, 2])) - -if __name__ == "__main__": - unittest.main() + desp_int = despiker.process_intensity(array) + assert len(desp_int) == len(array) diff --git a/tests/processing/test_prepare_mean_spectrum.py b/tests/processing/test_prepare_mean_spectrum.py deleted file mode 100644 index 4a369b6..0000000 --- a/tests/processing/test_prepare_mean_spectrum.py +++ /dev/null @@ -1,10 +0,0 @@ -# flake8: noqa -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from collections import namedtuple - -import numpy as np -import pandas as pd - -from raman_fitting.processing.spectrum_template import SpectrumTemplate \ No newline at end of file diff --git a/tests/processing/test_spectrum_constructor.py b/tests/processing/test_spectrum_constructor.py index ffb6ef1..ee33ce0 100644 --- a/tests/processing/test_spectrum_constructor.py +++ b/tests/processing/test_spectrum_constructor.py @@ -1,65 +1,18 @@ -# flake8: noqa -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri May 14 09:01:57 2021 - -@author: zmg -""" - -import unittest -from pathlib import Path - import pytest -from raman_fitting.datafiles import example_files -from raman_fitting.processing.spectrum_constructor import ( - SpectrumDataCollection, - SpectrumDataLoader, -) - - -class TestSpectrumDataLoader(unittest.TestCase): - def setUp(self): - _example_path = Path(example_files.__path__[0]) - _example_files_contents = list(Path(_example_path).rglob("*txt")) - - self.testfile = next( - filter(lambda x: "testDW38C_pos4" in x.name, _example_files_contents) - ) - self.errorfile = next( - filter(lambda x: "wrong" in x.name, _example_files_contents) - ) - - def test_SpectrumDataLoader_empty(self): - spd = SpectrumDataLoader() - self.assertEqual(spd.file.name, "empty.txt") - - def test_SpectrumDataLoader_file(self): - pass - spd = SpectrumDataLoader( - self.testfile, run_kwargs=dict(SampleID="testfile", SamplePos=1) - ) - self.assertEqual(len(spd.register_df), 1600) - self.assertEqual(len(spd.register_df.columns), 5) - - # self = spcoll +from raman_fitting.imports.spectrumdata_parser import SpectrumReader +from raman_fitting.models.deconvolution.spectrum_regions import RegionNames -# class SpectrumData(): -def _debugging(): - self = TestSpectrumDataLoader() - pass - # spectrum_data = SpectrumDataLoader( - # file=meannm[-1], run_kwargs=_spectrum_position_info_kwargs, ovv=meangrp - # ) - # self = spectrum_data - # self._despike.Z_t - # self._despike.input_intensity - # self = self._despike - # rr = RL.export_collect[0] - # spec = rr.fitter.spectra_arg._spectra[0] +def test_spectrum_data_loader_empty(): + with pytest.raises(ValueError): + SpectrumReader("empty.txt") -if __name__ == "__main__": - unittest.main() +def test_spectrum_data_loader_file(example_files): + for file in example_files: + sprdr = SpectrumReader(file) + assert len(sprdr.spectrum.intensity) == 1600 + assert len(sprdr.spectrum.ramanshift) == 1600 + assert sprdr.spectrum.source == file + assert sprdr.spectrum.region_name == RegionNames.full diff --git a/src/raman_fitting/datafiles/example_files/empty-lines_1.txt b/tests/test_fixtures/empty-lines_1.txt similarity index 100% rename from src/raman_fitting/datafiles/example_files/empty-lines_1.txt rename to tests/test_fixtures/empty-lines_1.txt diff --git a/src/raman_fitting/datafiles/example_files/wrong-values-in-lines_pos1.txt b/tests/test_fixtures/wrong-values-in-lines_pos1.txt similarity index 100% rename from src/raman_fitting/datafiles/example_files/wrong-values-in-lines_pos1.txt rename to tests/test_fixtures/wrong-values-in-lines_pos1.txt diff --git a/tests/test_sample.py b/tests/test_sample.py deleted file mode 100644 index c3afbfb..0000000 --- a/tests/test_sample.py +++ /dev/null @@ -1,7 +0,0 @@ -# content of test_sample.py -def inc(x): - return x + 1 - - -def test_answer(): - assert "PyTestsNonZero" == "PyTestsNonZero" diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py deleted file mode 100644 index 792d600..0000000 --- a/tests/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# diff --git a/tests/utils/test_coordinators.py b/tests/utils/test_coordinators.py deleted file mode 100644 index 7119582..0000000 --- a/tests/utils/test_coordinators.py +++ /dev/null @@ -1,69 +0,0 @@ -import logging -import unittest -import warnings - -import raman_fitting -from raman_fitting.utils.coordinators import FieldsTracker, FieldsTrackerWarning - -logger = logging.getLogger(__name__) -logging.captureWarnings(True) # sends these warning to the logger - - -def ignore_warnings(test_func): - def do_test(self, *args, **kwargs): - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - test_func(self, *args, **kwargs) - - return do_test - - -class TestFieldsTracker(unittest.TestCase): - @ignore_warnings - def testFCO(self): - # %% - fco = FieldsTracker( - fields=["peak_name", "peak_type", "param_hints"], - sources=("kwargs", "cls_dict", "init"), - ) - assert not fco.results - assert fco.status == False - - fco.store("kwargs", "peak_name", "R2D2") - - with self.assertWarns(UserWarning) as cm: - fco.store("cls_dict", "peak_name", "R4D4") - self.assertIn( - "Field peak_name has multiple sources", ", ".join(map(str, cm.warning.args)) - ) - - with self.assertWarns(UserWarning) as cm: - fco.store("notinsources", "peak_name", "R4D4") - self.assertIn( - "Store in notinsources at peak_name", ", ".join(map(str, cm.warning.args)) - ) - - fco.store("init", "peak_type", "Voirentzian") - - with self.assertWarns(UserWarning) as cm: - fco.store("init", "peak_type", "Voirentzian") - self.assertIn( - "Redefinition of peak_type in init ignored", - ", ".join(map(str, cm.warning.args)), - ) - - fco.store("init", "peak_name", "Voirentzian") - fco.store("init", "param_hints", "Voirentzian") - fco.store("init", "peak_type", "Voirentzian") - - assert set(fco.results.keys()) == set(["peak_name", "peak_type", "param_hints"]) - assert fco.status == True - - _test_dict = { - "peak_name": "multi_store", - "peak_type": "multi_test", - "param_hints": 2, - } - fco.multi_store("cls_dict", **_test_dict) - assert fco.register["cls_dict"] == _test_dict - assert fco.results["param_hints"]["value"] == 2 diff --git a/todos.md b/todos.md index bc085ac..3e93380 100644 --- a/todos.md +++ b/todos.md @@ -3,10 +3,6 @@ # IDEA change version definition # IDEA list: -# added setup.cfg -# added unittests -# added README.md -# add project.toml only for # improved logger, each module needs a getlogger(name) # IDEA future daemonize the fitting process for using the package and dropping files in the datafiles folder # IDEA add docs with Sphinx, readthedocs