From 9c09db448f3bf3bca96228c55a12ba3f27455142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Thu, 5 Sep 2024 20:08:24 +0200 Subject: [PATCH 01/12] dev: Use Ruff instead of Black/isort/pylint --- .vscode/extensions.json | 5 +- .vscode/settings.json | 12 ++-- Makefile | 21 ++---- pyproject.toml | 29 ++------ requirements-dev.txt | 151 ++++++---------------------------------- 5 files changed, 42 insertions(+), 176 deletions(-) diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 488b3edd..5aac9a6a 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -2,17 +2,14 @@ "recommendations": [ "bierner.markdown-mermaid", "bradlc.vscode-tailwindcss", + "charliermarsh.ruff", "DavidAnson.vscode-markdownlint", "EditorConfig.EditorConfig", "github.vscode-github-actions", - "mechatroner.rainbow-csv", "mikestead.dotenv", "ms-azuretools.vscode-azurefunctions", "ms-azuretools.vscode-bicep", - "ms-python.black-formatter", "ms-python.debugpy", - "ms-python.isort", - "ms-python.pylint", "ms-python.python", "ms-python.vscode-pylance", "ms-toolsai.jupyter", diff --git a/.vscode/settings.json b/.vscode/settings.json index 330dd11f..aac49909 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,14 +1,14 @@ { - "python.analysis.autoImportCompletions": true, - "python.analysis.typeCheckingMode": "standard", "python.languageServer": "Pylance", + "python.analysis.typeCheckingMode": "basic", + "ruff.nativeServer": "on", "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter", - "editor.formatOnPaste": true, "editor.formatOnSave": true, + "editor.defaultFormatter": "charliermarsh.ruff", "editor.codeActionsOnSave": { - "source.organizeImports": "always" - }, + "source.fixAll": "explicit", + "source.organizeImports": "explicit" + } }, "[markdown]": { "editor.defaultFormatter": "DavidAnson.vscode-markdownlint", diff --git a/Makefile b/Makefile index 1fd42905..5dfd14ea 100644 --- a/Makefile +++ b/Makefile @@ -80,17 +80,8 @@ upgrade: az bicep upgrade test: - @echo "➡️ Test generic formatter (Black)..." - python3 -m black --check . - - @echo "➡️ Test import formatter (isort)..." - python3 -m isort --jobs -1 --check . - - @echo "➡️ Test dependencies issues (deptry)..." - python3 -m deptry . - - @echo "➡️ Test code smells (Pylint)..." - python3 -m pylint . + @echo "➡️ Test code smells (Ruff)..." + python3 -m ruff check --select I,PL,RUF,UP,ASYNC,A,DTZ,T20,ARG,PERF --ignore RUF012 @echo "➡️ Test types (Pyright)..." python3 -m pyright . @@ -101,11 +92,11 @@ test: tests/*.py lint: - @echo "➡️ Fix with generic formatter (Black)..." - python3 -m black . + @echo "➡️ Fix with formatter..." + python3 -m ruff format - @echo "➡️ Fix with import formatter (isort)..." - python3 -m isort --jobs -1 . + @echo "➡️ Lint with linter..." + python3 -m ruff check --select I,PL,RUF,UP,ASYNC,A,DTZ,T20,ARG,PERF --ignore RUF012 --fix tunnel: @echo "➡️ Creating tunnel..." diff --git a/pyproject.toml b/pyproject.toml index 26950387..50a6bab8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,18 +47,16 @@ dependencies = [ [project.optional-dependencies] dev = [ - "black[jupyter]==24.4.2", # Code formatter "deepeval==0.21.59", # LLM model evaluation "deptry==0.16.1", # Dependency tree testing - "isort==5.13.2", # Import sorter "pip-tools==7.4.1", # Compile requirements.txt from pyproject.toml - "pylint==3.2.5", # Linter "pyright==1.1.371", # Static type checker "pytest-assume==2.4.3", # Pytest plugin for conditional tests "pytest-asyncio==0.23.7", # Pytest plugin for async tests "pytest-repeat==0.9.3", # Pytest plugin for repeating tests "pytest-xdist[psutil]==3.6.1", # Pytest plugin for parallel testing "pytest==8.2.2", # Testing framework + "ruff==0.6.2", # Linter ] [tool.setuptools] @@ -81,27 +79,14 @@ DEP002 = [ "aiodns", # Resolver is required for the AIOHTTP AsyncResolver TCP resolver ] -[tool.black] -target-version = ["py311"] +[tool.ruff] +target-version = "py311" -[tool.isort] -combine_as_imports = true -profile = "black" -skip_gitignore = true +[tool.ruff.lint.isort] +combine-as-imports = true -[tool.pylint.MAIN] -fail-under = 8.0 -ignore-paths = [ - ".python_packages/.+", # Azure Functions local build - ".venv/.+", # Python virtual environment -] -init-hook='import sys; sys.path.append(".")' -jobs = 0 -py-version = "3.11" -recursive = true - -[tool.pylint.format] -max-line-length = "88" +[tool.ruff.format] +docstring-code-format = true [tool.pyright] pythonVersion = "3.11" diff --git a/requirements-dev.txt b/requirements-dev.txt index 35a7a40e..9d8c18c5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -125,14 +125,6 @@ asgiref==3.8.1 \ --hash=sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47 \ --hash=sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590 # via opentelemetry-instrumentation-asgi -astroid==3.2.3 \ - --hash=sha256:3eae9ea67c11c858cdd2c91337d2e816bd019ac897ca07d7b346ac10105fceb3 \ - --hash=sha256:7099b5a60985529d8d46858befa103b82d0d05a5a5e8b816b5303ed96075e1d9 - # via pylint -asttokens==2.4.1 \ - --hash=sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24 \ - --hash=sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0 - # via stack-data attrs==23.2.0 \ --hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \ --hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 @@ -209,30 +201,6 @@ beautifulsoup4==4.12.3 \ --hash=sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051 \ --hash=sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed # via django-htmlmin -black[jupyter]==24.4.2 \ - --hash=sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474 \ - --hash=sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1 \ - --hash=sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0 \ - --hash=sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8 \ - --hash=sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96 \ - --hash=sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1 \ - --hash=sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04 \ - --hash=sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021 \ - --hash=sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94 \ - --hash=sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d \ - --hash=sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c \ - --hash=sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7 \ - --hash=sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c \ - --hash=sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc \ - --hash=sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7 \ - --hash=sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d \ - --hash=sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c \ - --hash=sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741 \ - --hash=sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce \ - --hash=sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb \ - --hash=sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063 \ - --hash=sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e - # via call-center-ai (pyproject.toml) brotli==1.1.0 \ --hash=sha256:03d20af184290887bdea3f0f78c4f737d126c74dc2f3ccadf07e54ceca3bf208 \ --hash=sha256:0541e747cce78e24ea12d69176f6a7ddb690e62c425e01d31cc065e69ce55b48 \ @@ -483,7 +451,6 @@ click==8.1.7 \ --hash=sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28 \ --hash=sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de # via - # black # deptry # pip-tools # typer @@ -533,10 +500,6 @@ datasets==2.20.0 \ --hash=sha256:3c4dbcd27e0f642b9d41d20ff2efa721a5e04b32b2ca4009e0fc9139e324553f \ --hash=sha256:76ac02e3bdfff824492e20678f0b6b1b6d080515957fe834b00c2ba8d6b18e5e # via ragas -decorator==5.1.1 \ - --hash=sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330 \ - --hash=sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186 - # via ipython deepeval==0.21.59 \ --hash=sha256:3c57fdbbe9fdb5dc3407c7b444307ffc4bfb1067a79aa5984083bc0d3048f47f \ --hash=sha256:44abf67b5af4126c4d7da0613a055ca8d5a4cedbc4e5d33441ad4c39c7272fd3 @@ -565,7 +528,6 @@ dill==0.3.8 \ # via # datasets # multiprocess - # pylint distro==1.9.0 \ --hash=sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed \ --hash=sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2 @@ -588,10 +550,6 @@ execnet==2.1.1 \ --hash=sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc \ --hash=sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3 # via pytest-xdist -executing==2.0.1 \ - --hash=sha256:35afe2ce3affba8ee97f2d69927fa823b08b472b7b994e36a52a964b93d16147 \ - --hash=sha256:eac49ca94516ccc753f9fb5ce82603156e590b27525a8bc32cce8ae302eb61bc - # via stack-data filelock==3.15.4 \ --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 @@ -782,10 +740,6 @@ iniconfig==2.0.0 \ --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 # via pytest -ipython==8.26.0 \ - --hash=sha256:1cec0fbba8404af13facebe83d04436a7434c7400e59f47acf467c64abd0956c \ - --hash=sha256:e6b347c27bdf9c32ee9d31ae85defc525755a1869f14057e900675b9e8d6e6ff - # via black isodate==0.6.1 \ --hash=sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96 \ --hash=sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9 @@ -796,16 +750,6 @@ isodate==0.6.1 \ # azure-search-documents # azure-storage-queue # msrest -isort==5.13.2 \ - --hash=sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109 \ - --hash=sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6 - # via - # call-center-ai (pyproject.toml) - # pylint -jedi==0.19.1 \ - --hash=sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd \ - --hash=sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0 - # via ipython jinja2==3.1.4 \ --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d @@ -930,14 +874,6 @@ marshmallow==3.21.3 \ --hash=sha256:4f57c5e050a54d66361e826f94fba213eb10b67b2fdb02c3e0343ce207ba1662 \ --hash=sha256:86ce7fb914aa865001a4b2092c4c2872d13bc347f3d42673272cabfdbad386f1 # via dataclasses-json -matplotlib-inline==0.1.7 \ - --hash=sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90 \ - --hash=sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca - # via ipython -mccabe==0.7.0 \ - --hash=sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325 \ - --hash=sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e - # via pylint mdurl==0.1.2 \ --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba @@ -1073,9 +1009,7 @@ multiprocess==0.70.16 \ mypy-extensions==1.0.0 \ --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \ --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782 - # via - # black - # typing-inspect + # via typing-inspect nest-asyncio==1.6.0 \ --hash=sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe \ --hash=sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c @@ -1369,7 +1303,6 @@ packaging==24.1 \ --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 # via - # black # build # datasets # huggingface-hub @@ -1408,18 +1341,6 @@ pandas==2.2.2 \ --hash=sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce \ --hash=sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad # via datasets -parso==0.8.4 \ - --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \ - --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d - # via jedi -pathspec==0.12.1 \ - --hash=sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08 \ - --hash=sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712 - # via black -pexpect==4.9.0 \ - --hash=sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 \ - --hash=sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f - # via ipython phonenumbers==8.13.39 \ --hash=sha256:3ad2d086fa71e7eef409001b9195ac54bebb0c6e3e752209b558ca192c9229a0 \ --hash=sha256:db7ca4970d206b2056231105300753b1a5b229f43416f8c2b3010e63fbb68d77 @@ -1428,12 +1349,6 @@ pip-tools==7.4.1 \ --hash=sha256:4c690e5fbae2f21e87843e89c26191f0d9454f362d8acdbd695716493ec8b3a9 \ --hash=sha256:864826f5073864450e24dbeeb85ce3920cdfb09848a3d69ebf537b521f14bcc9 # via call-center-ai (pyproject.toml) -platformdirs==4.2.2 \ - --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ - --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 - # via - # black - # pylint pluggy==1.5.0 \ --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 @@ -1444,10 +1359,6 @@ portalocker==2.10.0 \ # via # deepeval # msal-extensions -prompt-toolkit==3.0.47 \ - --hash=sha256:0d7bfa67001d5e39d02c224b663abc33687405033a8c422d0d675a5a13361d10 \ - --hash=sha256:1e1b29cb58080b1e69f207c893a1a7bf16d127a5c30c9d17a25a5d77792e5360 - # via ipython protobuf==4.25.1 \ --hash=sha256:0bf384e75b92c42830c0a679b0cd4d6e2b36ae0cf3dbb1e1dfdda48a244f4bcd \ --hash=sha256:0f881b589ff449bf0b931a711926e9ddaad3b35089cc039ce1af50b21a4ae8cb \ @@ -1484,14 +1395,6 @@ psutil==5.9.8 \ # via # azure-monitor-opentelemetry-exporter # pytest-xdist -ptyprocess==0.7.0 \ - --hash=sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 \ - --hash=sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220 - # via pexpect -pure-eval==0.2.2 \ - --hash=sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350 \ - --hash=sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3 - # via stack-data pyarrow==16.1.0 \ --hash=sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a \ --hash=sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2 \ @@ -1695,9 +1598,7 @@ pydantic-settings==2.3.3 \ pygments==2.18.0 \ --hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \ --hash=sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a - # via - # ipython - # rich + # via rich pyjwt[crypto]==2.8.0 \ --hash=sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de \ --hash=sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320 @@ -1705,10 +1606,6 @@ pyjwt[crypto]==2.8.0 \ # call-center-ai (pyproject.toml) # msal # twilio -pylint==3.2.5 \ - --hash=sha256:32cd6c042b5004b8e857d727708720c54a676d1e22917cf1a2df9b4d4868abd6 \ - --hash=sha256:e9b7171e242dcc6ebd0aaa7540481d1a72860748a0a7816b8fe6cf6c80a6fe7e - # via call-center-ai (pyproject.toml) pyproject-hooks==1.1.0 \ --hash=sha256:4b37730834edbd6bd37f26ece6b44802fb1c1ee2ece0e54ddff8bfc06db86965 \ --hash=sha256:7ceeefe9aec63a1064c18d939bdc3adf2d8aa1988a510afec15151578b232aa2 @@ -1942,6 +1839,26 @@ rich==13.7.1 \ # via # deepeval # typer +ruff==0.6.2 \ + --hash=sha256:094600ee88cda325988d3f54e3588c46de5c18dae09d683ace278b11f9d4d534 \ + --hash=sha256:1175d39faadd9a50718f478d23bfc1d4da5743f1ab56af81a2b6caf0a2394f23 \ + --hash=sha256:17002fe241e76544448a8e1e6118abecbe8cd10cf68fde635dad480dba594570 \ + --hash=sha256:239ee6beb9e91feb8e0ec384204a763f36cb53fb895a1a364618c6abb076b3be \ + --hash=sha256:279d5f7d86696df5f9549b56b9b6a7f6c72961b619022b5b7999b15db392a4da \ + --hash=sha256:2aed7e243be68487aa8982e91c6e260982d00da3f38955873aecd5a9204b1d66 \ + --hash=sha256:316d418fe258c036ba05fbf7dfc1f7d3d4096db63431546163b472285668132b \ + --hash=sha256:3dbeac76ed13456f8158b8f4fe087bf87882e645c8e8b606dd17b0b66c2c1158 \ + --hash=sha256:5b939f9c86d51635fe486585389f54582f0d65b8238e08c327c1534844b3bb9a \ + --hash=sha256:5c8cbc6252deb3ea840ad6a20b0f8583caab0c5ef4f9cca21adc5a92b8f79f3c \ + --hash=sha256:7438a7288f9d67ed3c8ce4d059e67f7ed65e9fe3aa2ab6f5b4b3610e57e3cb56 \ + --hash=sha256:7db6880c53c56addb8638fe444818183385ec85eeada1d48fc5abe045301b2f1 \ + --hash=sha256:a8f310d63af08f583363dfb844ba8f9417b558199c58a5999215082036d795a1 \ + --hash=sha256:d0d62ca91219f906caf9b187dea50d17353f15ec9bb15aae4a606cd697b49b4c \ + --hash=sha256:d371f7fc9cec83497fe7cf5eaf5b76e22a8efce463de5f775a1826197feb9df8 \ + --hash=sha256:d72b8b3abf8a2d51b7b9944a41307d2f442558ccb3859bbd87e6ae9be1694a5d \ + --hash=sha256:d9f3469c7dd43cd22eb1c3fc16926fb8258d50cb1b216658a07be95dd117b0f2 \ + --hash=sha256:f28fcd2cd0e02bdf739297516d5643a945cc7caf09bd9bcb4d932540a5ea4fa9 + # via call-center-ai (pyproject.toml) sentry-sdk==2.9.0 \ --hash=sha256:0bea5fa8b564cc0d09f2e6f55893e8f70286048b0ffb3a341d5b695d1af0e6ee \ --hash=sha256:4c85bad74df9767976afb3eeddc33e0e153300e887d637775a753a35ef99bee6 @@ -1954,7 +1871,6 @@ six==1.16.0 \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 # via - # asttokens # azure-communication-sms # azure-core # html5lib @@ -2025,10 +1941,6 @@ sqlalchemy==2.0.31 \ # via # langchain # langchain-community -stack-data==0.6.3 \ - --hash=sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9 \ - --hash=sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695 - # via ipython tabulate==0.9.0 \ --hash=sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c \ --hash=sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f @@ -2084,14 +1996,6 @@ tiktoken==0.7.0 \ # langchain-openai # opentelemetry-instrumentation-openai # ragas -tokenize-rt==5.2.0 \ - --hash=sha256:9fe80f8a5c1edad2d3ede0f37481cc0cc1538a2f442c9c2f9e4feacd2792d054 \ - --hash=sha256:b79d41a65cfec71285433511b50271b05da3584a1da144a0752e9c621a285289 - # via black -tomlkit==0.13.0 \ - --hash=sha256:08ad192699734149f5b97b45f1f18dad7eb1b6d16bc72ad0c2335772650d7b72 \ - --hash=sha256:7075d3042d03b80f603482d69bf0c8f345c2b30e41699fd8883227f89972b264 - # via pylint tqdm==4.66.4 \ --hash=sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644 \ --hash=sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb @@ -2100,12 +2004,6 @@ tqdm==4.66.4 \ # deepeval # huggingface-hub # openai -traitlets==5.14.3 \ - --hash=sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7 \ - --hash=sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f - # via - # ipython - # matplotlib-inline twilio==9.2.1 \ --hash=sha256:dc1eb456182232e44f1a0e129a8956e7b7b45e4c0fb7f5b25b3ab6fa76e0efad \ --hash=sha256:fd18f5e72429dedd2f83df821308d74b961b2a049ddb5c3cf7dd1eff8794fe1b @@ -2128,7 +2026,6 @@ typing-extensions==4.12.2 \ # azure-storage-queue # call-center-ai (pyproject.toml) # huggingface-hub - # ipython # openai # opentelemetry-sdk # pydantic @@ -2150,10 +2047,6 @@ urllib3==2.2.2 \ # via # requests # sentry-sdk -wcwidth==0.2.13 \ - --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \ - --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5 - # via prompt-toolkit webencodings==0.5.1 \ --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \ --hash=sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923 From bd191056cdf73c18342877272fc7ac10369c7689 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Thu, 5 Sep 2024 20:10:21 +0200 Subject: [PATCH 02/12] quality: Comply with Ruff --- examples/blocklist.ipynb | 10 ++--- function_app.py | 51 +++++++++++------------- helpers/__init__.py | 4 +- helpers/call_events.py | 35 +++++++++-------- helpers/call_llm.py | 14 +++---- helpers/call_utils.py | 52 +++++++++++-------------- helpers/config.py | 10 ++--- helpers/config_models/ai_search.py | 8 ++-- helpers/config_models/cache.py | 23 ++++++----- helpers/config_models/conversation.py | 14 +++---- helpers/config_models/database.py | 27 +++++++------ helpers/config_models/llm.py | 26 ++++++------- helpers/config_models/prompts.py | 35 +++++++---------- helpers/config_models/root.py | 2 +- helpers/config_models/sms.py | 25 ++++++------ helpers/http.py | 18 ++++----- helpers/llm_tools.py | 7 ++-- helpers/llm_utils.py | 14 +++---- helpers/llm_worker.py | 56 +++++++++++++-------------- helpers/monitoring.py | 2 +- helpers/translation.py | 12 ++---- models/call.py | 24 ++++++------ models/claim.py | 2 +- models/message.py | 32 +++++++-------- models/readiness.py | 3 +- models/reminder.py | 2 +- persistence/ai_search.py | 10 ++--- persistence/communication_services.py | 2 +- persistence/cosmos_db.py | 30 +++++++------- persistence/icache.py | 5 +-- persistence/isearch.py | 2 +- persistence/isms.py | 1 - persistence/istore.py | 8 ++-- persistence/memory.py | 6 +-- persistence/redis.py | 7 ++-- persistence/sqlite.py | 18 ++++----- persistence/twilio.py | 6 +-- tests/cache.py | 2 +- tests/conftest.py | 42 +++++++++----------- tests/llm.py | 11 +++--- tests/search.py | 12 +++--- tests/store.py | 2 +- 42 files changed, 310 insertions(+), 362 deletions(-) diff --git a/examples/blocklist.ipynb b/examples/blocklist.ipynb index ee169d7f..8549cea2 100644 --- a/examples/blocklist.ipynb +++ b/examples/blocklist.ipynb @@ -63,14 +63,14 @@ "metadata": {}, "outputs": [], "source": [ + "import pandas as pd\n", "from azure.ai.contentsafety import BlocklistClient\n", "from azure.ai.contentsafety.models import (\n", + " AddOrUpdateTextBlocklistItemsOptions,\n", " TextBlocklist,\n", " TextBlocklistItem,\n", - " AddOrUpdateTextBlocklistItemsOptions,\n", ")\n", "from azure.core.credentials import AzureKeyCredential\n", - "import pandas as pd\n", "\n", "key = AzureKeyCredential(\"xxx\")\n", "client = BlocklistClient(\n", @@ -102,10 +102,10 @@ "blocklists = client.list_text_blocklists()\n", "\n", "if not blocklists:\n", - " print(\"There are no blocklists.\")\n", + " print(\"There are no blocklists.\") # noqa: T201\n", "\n", "for blocklist in blocklists:\n", - " print(f\"{blocklist.blocklist_name}: {blocklist.description}\")" + " print(f\"{blocklist.blocklist_name}: {blocklist.description}\") # noqa: T201" ] }, { @@ -135,7 +135,7 @@ " block_items[row[\"blocklist\"]].append(text)\n", "\n", "for blocklist, words in block_items.items():\n", - " print(f\"Creating blocklist {blocklist} with {len(words)} words\")\n", + " print(f\"Creating blocklist {blocklist} with {len(words)} words\") # noqa: T201\n", " client.create_or_update_text_blocklist(\n", " blocklist_name=blocklist,\n", " options=TextBlocklist(blocklist_name=blocklist),\n", diff --git a/function_app.py b/function_app.py index 15d31b48..d793b9e4 100644 --- a/function_app.py +++ b/function_app.py @@ -3,7 +3,6 @@ from datetime import timedelta from http import HTTPStatus from os import getenv -from typing import Any, Optional, Union from urllib.parse import quote_plus, urljoin from uuid import UUID @@ -66,7 +65,7 @@ ) # pyright: ignore # Azure Communication Services -_automation_client: Optional[CallAutomationClient] = None +_automation_client: CallAutomationClient | None = None _source_caller = PhoneNumberIdentifier(CONFIG.communication_services.phone_number) logger.info("Using phone number %s", CONFIG.communication_services.phone_number) _communication_services_jwks_client = jwt.PyJWKClient( @@ -97,7 +96,9 @@ methods=["GET"], ) @tracer.start_as_current_span("openapi_get") -async def openapi_get(req: func.HttpRequest) -> func.HttpResponse: +async def openapi_get( + req: func.HttpRequest, # noqa: ARG001 +) -> func.HttpResponse: """ Generate the OpenAPI specification for the API. @@ -105,10 +106,9 @@ async def openapi_get(req: func.HttpRequest) -> func.HttpResponse: Returns a JSON object with the OpenAPI specification. """ - with open( + with open( # noqa: ASYNC230 encoding="utf-8", file=resources_dir("openapi.json"), - mode="r", ) as f: openapi = json.load(f) openapi["info"]["version"] = CONFIG.version @@ -130,7 +130,9 @@ async def openapi_get(req: func.HttpRequest) -> func.HttpResponse: methods=["GET"], ) @tracer.start_as_current_span("health_liveness_get") -async def health_liveness_get(req: func.HttpRequest) -> func.HttpResponse: +async def health_liveness_get( + req: func.HttpRequest, # noqa: ARG001 +) -> func.HttpResponse: """ Check if the service is running. @@ -146,7 +148,9 @@ async def health_liveness_get(req: func.HttpRequest) -> func.HttpResponse: methods=["GET"], ) @tracer.start_as_current_span("health_readiness_get") -async def health_readiness_get(req: func.HttpRequest) -> func.HttpResponse: +async def health_readiness_get( + req: func.HttpRequest, # noqa: ARG001 +) -> func.HttpResponse: """ Check if the service is ready to serve requests. @@ -415,9 +419,7 @@ async def call_post(req: func.HttpRequest) -> func.HttpResponse: cognitive_services_endpoint=CONFIG.cognitive_service.endpoint, source_caller_id_number=_source_caller, # deepcode ignore AttributeLoadOnNone: Phone number is validated with Pydantic - target_participant=PhoneNumberIdentifier( - initiate.phone_number - ), # pyright: ignore + target_participant=PhoneNumberIdentifier(initiate.phone_number), # pyright: ignore ) logger.info( "Created call with connection id: %s", @@ -556,7 +558,7 @@ async def communicationservices_event_post( Returns a 204 No Content if the events are properly fomatted. A 401 Unauthorized if the JWT token is invalid. Otherwise, returns a 400 Bad Request. """ # Validate JWT token - service_jwt: Union[str, None] = req.headers.get("Authorization") + service_jwt: str | None = req.headers.get("Authorization") if not service_jwt: return _standard_error( message="Authorization header missing", @@ -614,7 +616,8 @@ async def communicationservices_event_post( return func.HttpResponse(status_code=HTTPStatus.NO_CONTENT) -async def _communicationservices_event_worker( +# TODO: Refacto, too long (and remove PLR0912/PLR0915 ignore) +async def _communicationservices_event_worker( # noqa: PLR0912, PLR0915 call_id: UUID, event_dict: dict, post: func.Out[str], @@ -688,7 +691,7 @@ async def _trainings_callback(_call: CallStateModel) -> None: recognition_result: str = event.data["recognitionType"] if recognition_result == "speech": # Handle voice - speech_text: Optional[str] = event.data["speechResult"]["speech"] + speech_text: str | None = event.data["speechResult"]["speech"] if speech_text: await on_speech_recognized( call=call, @@ -835,7 +838,7 @@ def _trigger_post_event( async def _communicationservices_event_url( - phone_number: PhoneNumber, initiate: Optional[CallInitiateModel] = None + phone_number: PhoneNumber, initiate: CallInitiateModel | None = None ) -> tuple[str, CallStateModel]: """ Generate the callback URL for a call. @@ -933,7 +936,7 @@ async def _trainings_callback(_call: CallStateModel) -> None: ) -def _str_to_contexts(value: Optional[str]) -> Optional[set[CallContextEnum]]: +def _str_to_contexts(value: str | None) -> set[CallContextEnum] | None: """ Convert a string to a set of contexts. @@ -965,12 +968,7 @@ def _validation_error( Response body is a JSON object with the following structure: ``` - { - "error": { - "message": "Validation error", - "details": ["Error message"] - } - } + {"error": {"message": "Validation error", "details": ["Error message"]}} ``` Returns a 400 Bad Request with a JSON body. @@ -991,7 +989,7 @@ def _validation_error( def _standard_error( message: str, - details: Optional[list[str]] = None, + details: list[str] | None = None, status_code: HTTPStatus = HTTPStatus.BAD_REQUEST, ) -> func.HttpResponse: """ @@ -1000,12 +998,7 @@ def _standard_error( Response body is a JSON object with the following structure: ``` - { - "error": { - "message": "Error message", - "details": ["Error details"] - } - } + {"error": {"message": "Error message", "details": ["Error details"]}} ``` Returns a JOSN with a JSON body and the specified status code. @@ -1031,7 +1024,7 @@ async def _use_automation_client() -> CallAutomationClient: Returns a `CallAutomationClient` instance. """ - global _automation_client # pylint: disable=global-statement + global _automation_client # noqa: PLW0603 if not isinstance(_automation_client, CallAutomationClient): _automation_client = CallAutomationClient( # Deployment diff --git a/helpers/__init__.py b/helpers/__init__.py index ba2b0525..cdf06e2e 100644 --- a/helpers/__init__.py +++ b/helpers/__init__.py @@ -4,10 +4,10 @@ def init_env(): path = find_dotenv() if not path: - print("Env file not found") + print("Env file not found") # noqa: T201 return load_dotenv(path) - print(f'Env file loaded from "{path}"') + print(f'Env file loaded from "{path}"') # noqa: T201 init_env() diff --git a/helpers/call_events.py b/helpers/call_events.py index 099fb30f..c19d87a9 100644 --- a/helpers/call_events.py +++ b/helpers/call_events.py @@ -1,5 +1,5 @@ import asyncio -from typing import Awaitable, Callable, Optional +from collections.abc import Awaitable, Callable from azure.communication.callautomation import DtmfTone, RecognitionChoice from azure.communication.callautomation.aio import CallAutomationClient @@ -154,7 +154,7 @@ async def on_speech_recognized( async def on_recognize_timeout_error( call: CallStateModel, client: CallAutomationClient, - contexts: Optional[set[CallContextEnum]], + contexts: set[CallContextEnum] | None, ) -> None: if ( contexts and CallContextEnum.IVR_LANG_SELECT in contexts @@ -224,7 +224,8 @@ async def on_recognize_unknown_error( ) -> None: span_attribute(CallAttributes.CALL_CHANNEL, "voice") - if error_code == 8511: # Failure while trying to play the prompt + if error_code == 8511: # noqa: PLR2004 + # Failure while trying to play the prompt logger.warning("Failed to play prompt") else: logger.warning( @@ -245,7 +246,7 @@ async def on_recognize_unknown_error( async def on_play_completed( call: CallStateModel, client: CallAutomationClient, - contexts: Optional[set[CallContextEnum]], + contexts: set[CallContextEnum] | None, post_callback: Callable[[CallStateModel], Awaitable[None]], ) -> None: logger.debug("Play completed") @@ -279,15 +280,19 @@ async def on_play_error(error_code: int) -> None: logger.debug("Play failed") span_attribute(CallAttributes.CALL_CHANNEL, "voice") # See: https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/communication-services/how-tos/call-automation/play-action.md - if error_code == 8535: # Action failed, file format + if error_code == 8535: # noqa: PLR2004 + # Action failed, file format logger.warning("Error during media play, file format is invalid") - elif error_code == 8536: # Action failed, file downloaded + elif error_code == 8536: # noqa: PLR2004 + # Action failed, file downloaded logger.warning("Error during media play, file could not be downloaded") - elif error_code == 8565: # Action failed, AI services config + elif error_code == 8565: # noqa: PLR2004 + # Action failed, AI services config logger.error( "Error during media play, impossible to connect with Azure AI services" ) - elif error_code == 9999: # Unknown + elif error_code == 9999: # noqa: PLR2004 + # Unknown error code logger.warning("Error during media play, unknown internal server error") else: logger.warning("Error during media play, unknown error code %s", error_code) @@ -429,7 +434,7 @@ async def on_end_call( Shortcut to run all post-call intelligence tasks in background. """ if ( - len(call.messages) >= 3 + len(call.messages) >= 3 # noqa: PLR2004 and call.messages[-3].action == MessageActionEnum.CALL and call.messages[-2].persona == MessagePersonaEnum.ASSISTANT and call.messages[-1].action == MessageActionEnum.HANGUP @@ -451,11 +456,9 @@ async def _intelligence_sms(call: CallStateModel) -> None: Send an SMS report to the customer. """ - def _validate(req: Optional[str]) -> tuple[bool, Optional[str], Optional[str]]: + def _validate(req: str | None) -> tuple[bool, str | None, str | None]: if not req: return False, "No SMS content", None - if len(req) < 10: - return False, "SMS content too short", None return True, None, req content = await completion_sync( @@ -503,8 +506,8 @@ async def _intelligence_synthesis(call: CallStateModel) -> None: logger.debug("Synthesizing call") def _validate( - req: Optional[str], - ) -> tuple[bool, Optional[str], Optional[SynthesisModel]]: + req: str | None, + ) -> tuple[bool, str | None, SynthesisModel | None]: if not req: return False, "Empty response", None try: @@ -534,8 +537,8 @@ async def _intelligence_next(call: CallStateModel) -> None: logger.debug("Generating next action") def _validate( - req: Optional[str], - ) -> tuple[bool, Optional[str], Optional[NextModel]]: + req: str | None, + ) -> tuple[bool, str | None, NextModel | None]: if not req: return False, "Empty response", None try: diff --git a/helpers/call_llm.py b/helpers/call_llm.py index f9f84ea6..a142db43 100644 --- a/helpers/call_llm.py +++ b/helpers/call_llm.py @@ -1,6 +1,6 @@ import asyncio import time -from typing import Awaitable, Callable +from collections.abc import Awaitable, Callable from azure.communication.callautomation.aio import CallAutomationClient from openai import APIError @@ -35,8 +35,9 @@ _db = CONFIG.database.instance() +# TODO: Refacto, this function is too long (and remove PLR0912/PLR0915 ignore) @tracer.start_as_current_span("call_load_llm_chat") -async def load_llm_chat( +async def load_llm_chat( # noqa: PLR0912, PLR0915 call: CallStateModel, client: CallAutomationClient, post_callback: Callable[[CallStateModel], Awaitable[None]], @@ -171,9 +172,7 @@ def _clear_tasks() -> None: text=await CONFIG.prompts.tts.timeout_loading(call), ) - elif ( - loading_task.done() - ): # Do not play timeout prompt plus loading, it can be frustrating for the user + elif loading_task.done(): # Do not play timeout prompt plus loading, it can be frustrating for the user loading_task = _loading_task() await handle_media( call=call, @@ -184,7 +183,7 @@ def _clear_tasks() -> None: # Wait to not block the event loop for other requests await asyncio.sleep(1) - except Exception: # pylint: disable=broad-exception-caught + except Exception: logger.warning("Error loading intelligence", exc_info=True) if is_error: # Error during chat @@ -233,8 +232,9 @@ def _clear_tasks() -> None: return call +# TODO: Refacto, this function is too long (and remove PLR0911/PLR0912/PLR0915 ignore) @tracer.start_as_current_span("call_execute_llm_chat") -async def _execute_llm_chat( +async def _execute_llm_chat( # noqa: PLR0911, PLR0912, PLR0915 call: CallStateModel, client: CallAutomationClient, post_callback: Callable[[CallStateModel], Awaitable[None]], diff --git a/helpers/call_utils.py b/helpers/call_utils.py index f8bb02e0..11322013 100644 --- a/helpers/call_utils.py +++ b/helpers/call_utils.py @@ -1,8 +1,8 @@ import json import re +from collections.abc import AsyncGenerator, Generator from contextlib import asynccontextmanager from enum import Enum -from typing import AsyncGenerator, Generator, Optional from azure.communication.callautomation import ( FileSource, @@ -26,11 +26,12 @@ StyleEnum as MessageStyleEnum, ) +_MAX_CHARACTERS_PER_TTS = 400 # Azure Speech Service TTS limit is 400 characters _SENTENCE_PUNCTUATION_R = ( r"([!?;]+|[\.\-:]+(?:$| ))" # Split by sentence by punctuation ) _TTS_SANITIZER_R = re.compile( - r"[^\w\sÀ-ÿ'«»“”\"\"‘’''(),.!?;:\-\+_@/&€$%=]" + r"[^\w\sÀ-ÿ'«»“”\"\"‘’''(),.!?;:\-\+_@/&€$%=]" # noqa: RUF001 ) # Sanitize text for TTS @@ -79,9 +80,9 @@ def tts_sentence_split( async def _handle_recognize_media( call: CallStateModel, client: CallAutomationClient, - context: Optional[ContextEnum], + context: ContextEnum | None, style: MessageStyleEnum, - text: Optional[str], + text: str | None, ) -> None: """ Play a media to a call participant and start recognizing the response. @@ -107,9 +108,7 @@ async def _handle_recognize_media( else None ), # If no text is provided, only recognize speech_language=call.lang.short_code, - target_participant=PhoneNumberIdentifier( - call.initiate.phone_number - ), # pyright: ignore + target_participant=PhoneNumberIdentifier(call.initiate.phone_number), # pyright: ignore ) except ResourceNotFoundError: logger.debug("Call hung up before recognizing") @@ -124,7 +123,7 @@ async def _handle_play_text( call: CallStateModel, client: CallAutomationClient, text: str, - context: Optional[ContextEnum] = None, + context: ContextEnum | None = None, style: MessageStyleEnum = MessageStyleEnum.NONE, ) -> None: """ @@ -157,7 +156,7 @@ async def handle_media( client: CallAutomationClient, call: CallStateModel, sound_url: str, - context: Optional[ContextEnum] = None, + context: ContextEnum | None = None, ) -> None: """ Play a media to a call participant. @@ -180,11 +179,11 @@ async def handle_media( raise e -async def handle_recognize_text( +async def handle_recognize_text( # noqa: PLR0913 call: CallStateModel, client: CallAutomationClient, - text: Optional[str], - context: Optional[ContextEnum] = None, + text: str | None, + context: ContextEnum | None = None, no_response_error: bool = False, store: bool = True, style: MessageStyleEnum = MessageStyleEnum.NONE, @@ -231,11 +230,11 @@ async def handle_recognize_text( ) -async def handle_play_text( +async def handle_play_text( # noqa: PLR0913 call: CallStateModel, client: CallAutomationClient, text: str, - context: Optional[ContextEnum] = None, + context: ContextEnum | None = None, store: bool = True, style: MessageStyleEnum = MessageStyleEnum.NONE, ) -> None: @@ -310,11 +309,11 @@ async def _chunk_before_tts( ) ) - # Split text in chunks of max 400 characters, separated by sentence + # Split text in chunks, separated by sentence chunks = [] chunk = "" for to_add, _ in tts_sentence_split(text, True): - if len(chunk) + len(to_add) >= 400: + if len(chunk) + len(to_add) >= _MAX_CHARACTERS_PER_TTS: chunks.append(chunk.strip()) # Remove trailing space chunk = "" chunk += to_add @@ -332,16 +331,13 @@ def _audio_from_text( """ Generate an audio source that can be read by Azure Communication Services SDK. - Text requires to be SVG escaped, and SSML tags are used to control the voice. Plus, text is slowed down by 5% to make it more understandable for elderly people. Text is also truncated to 400 characters, as this is the limit of Azure Communication Services TTS, but a warning is logged. + Text requires to be SVG escaped, and SSML tags are used to control the voice. Plus, text is slowed down by 5% to make it more understandable for elderly people. Text is also truncated, as this is the limit of Azure Communication Services TTS, but a warning is logged. See: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-structure """ - # Azure Speech Service TTS limit is 400 characters - if len(text) > 400: - logger.warning( - "Text is too long to be processed by TTS, truncating to 400 characters, fix this!" - ) - text = text[:400] + if len(text) > _MAX_CHARACTERS_PER_TTS: + logger.warning("Text is too long to be processed by TTS, truncating, fix this!") + text = text[:_MAX_CHARACTERS_PER_TTS] # Escape text for SSML text = text.replace("&", "&").replace("<", "<").replace(">", ">") # Build SSML tree @@ -368,7 +364,7 @@ async def handle_recognize_ivr( choices: list[RecognitionChoice], client: CallAutomationClient, text: str, - context: Optional[ContextEnum] = None, + context: ContextEnum | None = None, ) -> None: """ Recognize an IVR response after playing a text. @@ -390,9 +386,7 @@ async def handle_recognize_ivr( text=text, ), speech_language=call.lang.short_code, - target_participant=PhoneNumberIdentifier( - call.initiate.phone_number - ), # pyright: ignore + target_participant=PhoneNumberIdentifier(call.initiate.phone_number), # pyright: ignore ) except ResourceNotFoundError: logger.debug("Call hung up before recognizing") @@ -420,7 +414,7 @@ async def handle_transfer( client: CallAutomationClient, call: CallStateModel, target: str, - context: Optional[ContextEnum] = None, + context: ContextEnum | None = None, ) -> None: logger.info("Transferring call: %s", target) try: @@ -439,7 +433,7 @@ async def handle_transfer( raise e -def _context_builder(contexts: Optional[set[Optional[ContextEnum]]]) -> Optional[str]: +def _context_builder(contexts: set[ContextEnum | None] | None) -> str | None: if not contexts: return None return json.dumps([context.value for context in contexts if context]) diff --git a/helpers/config.py b/helpers/config.py index a6b9c276..c66b5290 100644 --- a/helpers/config.py +++ b/helpers/config.py @@ -1,5 +1,4 @@ from os import environ -from typing import Optional import yaml from dotenv import find_dotenv @@ -17,16 +16,16 @@ class ConfigBadFormat(Exception): def load_config() -> RootModel: - config: Optional[RootModel] = None + config: RootModel | None = None config_env = "CONFIG_JSON" config_file = "config.yaml" if config_env in environ: config = RootModel.model_validate_json(environ[config_env]) - print(f'Config loaded from env "{config_env}"') + print(f'Config loaded from env "{config_env}"') # noqa: T201 return config - print(f'Cannot find env "{config_env}", trying to load from file') + print(f'Cannot find env "{config_env}", trying to load from file') # noqa: T201 path = find_dotenv(filename=config_file) if not path: raise ConfigNotFound(f'Cannot find config file "{config_file}"') @@ -34,10 +33,9 @@ def load_config() -> RootModel: with open( encoding="utf-8", file=path, - mode="r", ) as f: config = RootModel.model_validate(yaml.safe_load(f)) - print(f'Config loaded from file "{path}"') + print(f'Config loaded from file "{path}"') # noqa: T201 return config except ValidationError as e: raise ConfigBadFormat("Config values are not valid") from e diff --git a/helpers/config_models/ai_search.py b/helpers/config_models/ai_search.py index 69ba28eb..6825091f 100644 --- a/helpers/config_models/ai_search.py +++ b/helpers/config_models/ai_search.py @@ -1,4 +1,4 @@ -from functools import lru_cache +from functools import cache from pydantic import BaseModel, Field, SecretStr @@ -14,10 +14,10 @@ class AiSearchModel(BaseModel, frozen=True): strictness: float = Field(default=2, ge=0, le=5) top_n_documents: int = Field(default=5, ge=1) - @lru_cache(maxsize=None) # pylint: disable=method-cache-max-size-none + @cache def instance(self) -> ISearch: - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel - from persistence.ai_search import ( # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG + from persistence.ai_search import ( AiSearchSearch, ) diff --git a/helpers/config_models/cache.py b/helpers/config_models/cache.py index 2eccafab..4f615793 100644 --- a/helpers/config_models/cache.py +++ b/helpers/config_models/cache.py @@ -1,6 +1,5 @@ from enum import Enum -from functools import lru_cache -from typing import Optional +from functools import cache from pydantic import BaseModel, Field, SecretStr, ValidationInfo, field_validator @@ -15,9 +14,9 @@ class ModeEnum(str, Enum): class MemoryModel(BaseModel, frozen=True): max_size: int = Field(default=100, ge=10) - @lru_cache(maxsize=None) # pylint: disable=method-cache-max-size-none + @cache def instance(self) -> ICache: - from persistence.memory import ( # pylint: disable=import-outside-toplevel + from persistence.memory import ( MemoryCache, ) @@ -31,9 +30,9 @@ class RedisModel(BaseModel, frozen=True): port: int = 6379 ssl: bool = True - @lru_cache(maxsize=None) # pylint: disable=method-cache-max-size-none + @cache def instance(self) -> ICache: - from persistence.redis import ( # pylint: disable=import-outside-toplevel + from persistence.redis import ( RedisCache, ) @@ -41,17 +40,17 @@ def instance(self) -> ICache: class CacheModel(BaseModel): - memory: Optional[MemoryModel] = MemoryModel() # Object is fully defined by default + memory: MemoryModel | None = MemoryModel() # Object is fully defined by default mode: ModeEnum = ModeEnum.MEMORY - redis: Optional[RedisModel] = None + redis: RedisModel | None = None @field_validator("redis") @classmethod def _validate_sqlite( cls, - redis: Optional[RedisModel], + redis: RedisModel | None, info: ValidationInfo, - ) -> Optional[RedisModel]: + ) -> RedisModel | None: if not redis and info.data.get("mode", None) == ModeEnum.REDIS: raise ValueError("Redis config required") return redis @@ -60,9 +59,9 @@ def _validate_sqlite( @classmethod def _validate_memory( cls, - memory: Optional[MemoryModel], + memory: MemoryModel | None, info: ValidationInfo, - ) -> Optional[MemoryModel]: + ) -> MemoryModel | None: if not memory and info.data.get("mode", None) == ModeEnum.MEMORY: raise ValueError("Memory config required") return memory diff --git a/helpers/config_models/conversation.py b/helpers/config_models/conversation.py index da63d611..6a7eed54 100644 --- a/helpers/config_models/conversation.py +++ b/helpers/config_models/conversation.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Annotated, Any, Optional, Union +from typing import Annotated, Any from pydantic import BaseModel, ConfigDict, EmailStr, Field, create_model from pydantic.fields import FieldInfo @@ -15,7 +15,7 @@ class LanguageEntryModel(BaseModel): See: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts#supported-languages """ - custom_voice_endpoint_id: Optional[str] = None + custom_voice_endpoint_id: str | None = None pronunciations_en: list[str] short_code: str voice: str @@ -128,9 +128,7 @@ class WorkflowInitiateModel(BaseModel): ge=0.75, le=1.25, ) - task: str = ( - "Helping the customer to file an insurance claim. The customer is probably calling because they have a problem with something covered by their policy, but it's not certain. The assistant needs information from the customer to complete the claim. The conversation is over when all the data relevant to the case has been collected. Filling in as much information as possible is important for further processing." - ) + task: str = "Helping the customer to file an insurance claim. The customer is probably calling because they have a problem with something covered by their policy, but it's not certain. The assistant needs information from the customer to complete the claim. The conversation is over when all the data relevant to the case has been collected. Filling in as much information as possible is important for further processing." def claim_model(self) -> type[BaseModel]: return _fields_to_pydantic( @@ -197,10 +195,10 @@ def _fields_to_pydantic(name: str, fields: list[ClaimFieldModel]) -> type[BaseMo def _field_to_pydantic( field: ClaimFieldModel, -) -> Union[Annotated[Any, ...], tuple[type, FieldInfo]]: +) -> Annotated[Any, ...] | tuple[type, FieldInfo]: field_type = _type_to_pydantic(field.type) return ( - Optional[field_type], + field_type | None, Field( default=None, description=field.description, @@ -210,7 +208,7 @@ def _field_to_pydantic( def _type_to_pydantic( data: ClaimTypeEnum, -) -> Union[type, Annotated[Any, ...]]: +) -> type | Annotated[Any, ...]: if data == ClaimTypeEnum.DATETIME: return datetime if data == ClaimTypeEnum.EMAIL: diff --git a/helpers/config_models/database.py b/helpers/config_models/database.py index 6f876321..0be2adfd 100644 --- a/helpers/config_models/database.py +++ b/helpers/config_models/database.py @@ -1,6 +1,5 @@ from enum import Enum -from functools import lru_cache -from typing import Optional +from functools import cache from pydantic import BaseModel, SecretStr, ValidationInfo, field_validator @@ -18,10 +17,10 @@ class CosmosDbModel(BaseModel, frozen=True): database: str endpoint: str - @lru_cache(maxsize=None) # pylint: disable=method-cache-max-size-none + @cache def instance(self) -> IStore: - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel - from persistence.cosmos_db import ( # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG + from persistence.cosmos_db import ( CosmosDbStore, ) @@ -41,10 +40,10 @@ def full_path(self) -> str: """ return f"{self.path}-v{self.schema_version}.sqlite" - @lru_cache(maxsize=None) # pylint: disable=method-cache-max-size-none + @cache def instance(self) -> IStore: - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel - from persistence.sqlite import ( # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG + from persistence.sqlite import ( SqliteStore, ) @@ -52,17 +51,17 @@ def instance(self) -> IStore: class DatabaseModel(BaseModel): - cosmos_db: Optional[CosmosDbModel] = None + cosmos_db: CosmosDbModel | None = None mode: ModeEnum = ModeEnum.SQLITE - sqlite: Optional[SqliteModel] = SqliteModel() # Object is fully defined by default + sqlite: SqliteModel | None = SqliteModel() # Object is fully defined by default @field_validator("cosmos_db") @classmethod def _validate_cosmos_db( cls, - cosmos_db: Optional[CosmosDbModel], + cosmos_db: CosmosDbModel | None, info: ValidationInfo, - ) -> Optional[CosmosDbModel]: + ) -> CosmosDbModel | None: if not cosmos_db and info.data.get("mode", None) == ModeEnum.COSMOS_DB: raise ValueError("Cosmos DB config required") return cosmos_db @@ -71,9 +70,9 @@ def _validate_cosmos_db( @classmethod def _validate_sqlite( cls, - sqlite: Optional[SqliteModel], + sqlite: SqliteModel | None, info: ValidationInfo, - ) -> Optional[SqliteModel]: + ) -> SqliteModel | None: if not sqlite and info.data.get("mode", None) == ModeEnum.SQLITE: raise ValueError("SQLite config required") return sqlite diff --git a/helpers/config_models/llm.py b/helpers/config_models/llm.py index f4cc1f50..d3b8a569 100644 --- a/helpers/config_models/llm.py +++ b/helpers/config_models/llm.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Any, Optional, Union +from typing import Any from azure.identity import ManagedIdentityCredential, get_bearer_token_provider from openai import AsyncAzureOpenAI, AsyncOpenAI @@ -23,8 +23,8 @@ class AbstractPlatformModel(BaseModel): class AzureOpenaiPlatformModel(AbstractPlatformModel): - _client: Optional[AsyncAzureOpenAI] = None - api_key: Optional[SecretStr] = None + _client: AsyncAzureOpenAI | None = None + api_key: SecretStr | None = None deployment: str endpoint: str @@ -53,7 +53,7 @@ def instance(self) -> tuple[AsyncAzureOpenAI, AbstractPlatformModel]: class OpenaiPlatformModel(AbstractPlatformModel): - _client: Optional[AsyncOpenAI] = None + _client: AsyncOpenAI | None = None api_key: SecretStr endpoint: str @@ -70,17 +70,17 @@ def instance(self) -> tuple[AsyncOpenAI, AbstractPlatformModel]: class SelectedPlatformModel(BaseModel): - azure_openai: Optional[AzureOpenaiPlatformModel] = None + azure_openai: AzureOpenaiPlatformModel | None = None mode: ModeEnum - openai: Optional[OpenaiPlatformModel] = None + openai: OpenaiPlatformModel | None = None @field_validator("azure_openai") @classmethod def _validate_azure_openai( cls, - azure_openai: Optional[AzureOpenaiPlatformModel], + azure_openai: AzureOpenaiPlatformModel | None, info: ValidationInfo, - ) -> Optional[AzureOpenaiPlatformModel]: + ) -> AzureOpenaiPlatformModel | None: if not azure_openai and info.data.get("mode", None) == ModeEnum.AZURE_OPENAI: raise ValueError("Azure OpenAI config required") return azure_openai @@ -89,14 +89,14 @@ def _validate_azure_openai( @classmethod def _validate_openai( cls, - openai: Optional[OpenaiPlatformModel], + openai: OpenaiPlatformModel | None, info: ValidationInfo, - ) -> Optional[OpenaiPlatformModel]: + ) -> OpenaiPlatformModel | None: if not openai and info.data.get("mode", None) == ModeEnum.OPENAI: raise ValueError("OpenAI config required") return openai - def selected(self) -> Union[AzureOpenaiPlatformModel, OpenaiPlatformModel]: + def selected(self) -> AzureOpenaiPlatformModel | OpenaiPlatformModel: platform = ( self.azure_openai if self.mode == ModeEnum.AZURE_OPENAI else self.openai ) @@ -112,8 +112,6 @@ class LlmModel(BaseModel): serialization_alias="primary", # Backwards compatibility with v6 ) - def selected( - self, is_fast: bool - ) -> Union[AzureOpenaiPlatformModel, OpenaiPlatformModel]: + def selected(self, is_fast: bool) -> AzureOpenaiPlatformModel | OpenaiPlatformModel: platform = self.fast if is_fast else self.slow return platform.selected() diff --git a/helpers/config_models/prompts.py b/helpers/config_models/prompts.py index 413e3c57..d7f59344 100644 --- a/helpers/config_models/prompts.py +++ b/helpers/config_models/prompts.py @@ -4,7 +4,6 @@ from html import escape from logging import Logger from textwrap import dedent -from typing import Optional from azure.core.exceptions import HttpResponseError from openai.types.chat import ChatCompletionSystemMessageParam @@ -23,14 +22,14 @@ class SoundModel(BaseModel): ready_tpl: str = "{public_url}/ready.wav" def loading(self) -> str: - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG return self.loading_tpl.format( public_url=CONFIG.resources.public_url, ) def ready(self) -> str: - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG return self.ready_tpl.format( public_url=CONFIG.resources.public_url, @@ -320,7 +319,7 @@ class LlmModel(BaseModel): """ def default_system(self, call: CallStateModel) -> str: - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG return self._format( self.default_system_tpl.format( @@ -337,7 +336,7 @@ def default_system(self, call: CallStateModel) -> str: def chat_system( self, call: CallStateModel, trainings: list[TrainingModel] ) -> list[ChatCompletionSystemMessageParam]: - from models.message import ( # pylint: disable=import-outside-toplevel + from models.message import ( ActionEnum as MessageActionEnum, StyleEnum as MessageStyleEnum, ) @@ -441,7 +440,7 @@ def next_system( def _format( self, prompt_tpl: str, - trainings: Optional[list[TrainingModel]] = None, + trainings: list[TrainingModel] | None = None, **kwargs: str, ) -> str: # Remove possible indentation then render the template @@ -486,34 +485,26 @@ def _messages( @cached_property def logger(self) -> Logger: - from helpers.logging import logger # pylint: disable=import-outside-toplevel + from helpers.logging import logger return logger class TtsModel(BaseModel): tts_lang: str = "en-US" - calltransfer_failure_tpl: str = ( - "It seems I can't connect you with an agent at the moment, but the next available agent will call you back as soon as possible." - ) - connect_agent_tpl: str = ( - "I'm sorry, I wasn't able to respond your request. Please allow me to transfer you to an agent who can assist you further. Please stay on the line and I will get back to you shortly." - ) + calltransfer_failure_tpl: str = "It seems I can't connect you with an agent at the moment, but the next available agent will call you back as soon as possible." + connect_agent_tpl: str = "I'm sorry, I wasn't able to respond your request. Please allow me to transfer you to an agent who can assist you further. Please stay on the line and I will get back to you shortly." end_call_to_connect_agent_tpl: str = ( "Of course, stay on the line. I will transfer you to an agent." ) error_tpl: str = ( "I'm sorry, I have encountered an error. Could you repeat your request?" ) - goodbye_tpl: str = ( - "Thank you for calling, I hope I've been able to help. You can call back, I've got it all memorized. {bot_company} wishes you a wonderful day!" - ) + goodbye_tpl: str = "Thank you for calling, I hope I've been able to help. You can call back, I've got it all memorized. {bot_company} wishes you a wonderful day!" hello_tpl: str = """ Hello, I'm {bot_name}, the virtual assistant {bot_company}! Here's how I work: while I'm processing your information, wou will hear a music. Feel free to speak to me in a natural way - I'm designed to understand your requests. During the conversation, you can also send me text messages. """ - timeout_silence_tpl: str = ( - "I'm sorry, I didn't hear anything. If you need help, let me know how I can help you." - ) + timeout_silence_tpl: str = "I'm sorry, I didn't hear anything. If you need help, let me know how I can help you." welcome_back_tpl: str = "Hello, I'm {bot_name}, from {bot_company}!" timeout_loading_tpl: str = ( "It's taking me longer than expected to reply. Thank you for your patience…" @@ -551,7 +542,7 @@ async def timeout_silence(self, call: CallStateModel) -> str: return await self._translate(self.timeout_silence_tpl, call) async def welcome_back(self, call: CallStateModel) -> str: - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG return await self._translate( self.welcome_back_tpl, @@ -589,7 +580,7 @@ async def _translate(self, prompt_tpl: str, call: CallStateModel, **kwargs) -> s If the translation fails, the initial prompt is returned. """ - from helpers.translation import ( # pylint: disable=import-outside-toplevel + from helpers.translation import ( translate_text, ) @@ -606,7 +597,7 @@ async def _translate(self, prompt_tpl: str, call: CallStateModel, **kwargs) -> s @cached_property def logger(self) -> Logger: - from helpers.logging import logger # pylint: disable=import-outside-toplevel + from helpers.logging import logger return logger diff --git a/helpers/config_models/root.py b/helpers/config_models/root.py index ed9d1612..853ffa6a 100644 --- a/helpers/config_models/root.py +++ b/helpers/config_models/root.py @@ -53,7 +53,7 @@ class RootModel(BaseSettings): @classmethod def settings_customise_sources( cls, - settings_cls: type[BaseSettings], + settings_cls: type[BaseSettings], # noqa: ARG003 init_settings: PydanticBaseSettingsSource, env_settings: PydanticBaseSettingsSource, dotenv_settings: PydanticBaseSettingsSource, diff --git a/helpers/config_models/sms.py b/helpers/config_models/sms.py index bee0b54b..f82baff8 100644 --- a/helpers/config_models/sms.py +++ b/helpers/config_models/sms.py @@ -1,6 +1,5 @@ from enum import Enum -from functools import lru_cache -from typing import Optional +from functools import cache from pydantic import BaseModel, SecretStr, ValidationInfo, field_validator @@ -20,10 +19,10 @@ class CommunicationServiceModel(BaseModel, frozen=True): Model is purely empty to fit to the `ISms` interface and the "mode" enum code organization. As the Communication Services is also used as the only call interface, it is not necessary to duplicate the models. """ - @lru_cache(maxsize=None) # pylint: disable=method-cache-max-size-none + @cache def instance(self) -> ISms: - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel - from persistence.communication_services import ( # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG + from persistence.communication_services import ( CommunicationServicesSms, ) @@ -35,9 +34,9 @@ class TwilioModel(BaseModel, frozen=True): auth_token: SecretStr phone_number: PhoneNumber - @lru_cache(maxsize=None) # pylint: disable=method-cache-max-size-none + @cache def instance(self) -> ISms: - from persistence.twilio import ( # pylint: disable=import-outside-toplevel + from persistence.twilio import ( TwilioSms, ) @@ -45,19 +44,19 @@ def instance(self) -> ISms: class SmsModel(BaseModel): - communication_services: Optional[CommunicationServiceModel] = ( + communication_services: CommunicationServiceModel | None = ( CommunicationServiceModel() ) # Object is fully defined by default mode: ModeEnum = ModeEnum.COMMUNICATION_SERVICES - twilio: Optional[TwilioModel] = None + twilio: TwilioModel | None = None @field_validator("communication_services") @classmethod def _validate_communication_services( cls, - communication_services: Optional[CommunicationServiceModel], + communication_services: CommunicationServiceModel | None, info: ValidationInfo, - ) -> Optional[CommunicationServiceModel]: + ) -> CommunicationServiceModel | None: if ( not communication_services and info.data.get("mode", None) == ModeEnum.COMMUNICATION_SERVICES @@ -69,9 +68,9 @@ def _validate_communication_services( @classmethod def _validate_twilio( cls, - twilio: Optional[TwilioModel], + twilio: TwilioModel | None, info: ValidationInfo, - ) -> Optional[TwilioModel]: + ) -> TwilioModel | None: if not twilio and info.data.get("mode", None) == ModeEnum.TWILIO: raise ValueError("Twilio config required") return twilio diff --git a/helpers/http.py b/helpers/http.py index b993690b..8bc21795 100644 --- a/helpers/http.py +++ b/helpers/http.py @@ -1,5 +1,3 @@ -from typing import Optional - from aiohttp import ( AsyncResolver, ClientSession, @@ -11,10 +9,10 @@ from azure.core.pipeline.transport._aiohttp import AioHttpTransport from twilio.http.async_http_client import AsyncTwilioHttpClient -_cookie_jar: Optional[DummyCookieJar] = None -_session: Optional[ClientSession] = None -_transport: Optional[AioHttpTransport] = None -_twilio_http: Optional[AsyncTwilioHttpClient] = None +_cookie_jar: DummyCookieJar | None = None +_session: ClientSession | None = None +_transport: AioHttpTransport | None = None +_twilio_http: AsyncTwilioHttpClient | None = None async def _aiohttp_cookie_jar() -> DummyCookieJar: @@ -25,7 +23,7 @@ async def _aiohttp_cookie_jar() -> DummyCookieJar: Returns a `DummyCookieJar` instance. """ - global _cookie_jar # pylint: disable=global-statement + global _cookie_jar # noqa: PLW0603 if not _cookie_jar: _cookie_jar = DummyCookieJar() return _cookie_jar @@ -39,7 +37,7 @@ async def aiohttp_session() -> ClientSession: Returns a `ClientSession` instance. """ - global _session # pylint: disable=global-statement + global _session # noqa: PLW0603 if not _session: _session = ClientSession( # Same config as default in the SDK @@ -65,7 +63,7 @@ async def azure_transport() -> AioHttpTransport: Returns a `AioHttpTransport` instance. """ - global _transport # pylint: disable=global-statement + global _transport # noqa: PLW0603 if not _transport: # Azure SDK implements its own retry logic (e.g. for Cosmos DB), so we don't add it here _transport = AioHttpTransport( @@ -83,7 +81,7 @@ async def twilio_http() -> AsyncTwilioHttpClient: Returns a `AsyncTwilioHttpClient` instance. """ - global _twilio_http # pylint: disable=global-statement + global _twilio_http # noqa: PLW0603 if not _twilio_http: _twilio_http = AsyncTwilioHttpClient( timeout=10, diff --git a/helpers/llm_tools.py b/helpers/llm_tools.py index 202d6538..3f8f2382 100644 --- a/helpers/llm_tools.py +++ b/helpers/llm_tools.py @@ -1,7 +1,8 @@ import asyncio +from collections.abc import Awaitable, Callable from html import escape from inspect import getmembers, isfunction -from typing import Annotated, Awaitable, Callable, Literal +from typing import Annotated, Literal from azure.communication.callautomation.aio import CallAutomationClient from openai.types.chat import ChatCompletionToolParam @@ -586,8 +587,8 @@ async def speech_lang( async def to_openai(call: CallStateModel) -> list[ChatCompletionToolParam]: return await asyncio.gather( *[ - function_schema(type, call=call) - for name, type in getmembers(LlmPlugins, isfunction) + function_schema(arg_type, call=call) + for name, arg_type in getmembers(LlmPlugins, isfunction) if not name.startswith("_") and name != "to_openai" ] ) diff --git a/helpers/llm_utils.py b/helpers/llm_utils.py index 18276bac..542b9c33 100644 --- a/helpers/llm_utils.py +++ b/helpers/llm_utils.py @@ -4,8 +4,9 @@ """ import inspect +from collections.abc import Callable from textwrap import dedent -from typing import Any, Callable, ForwardRef, Tuple, TypeVar, Union +from typing import Annotated, Any, ForwardRef, TypeVar from jinja2 import Environment from openai.types.chat import ChatCompletionToolParam @@ -13,7 +14,6 @@ from pydantic import BaseModel, TypeAdapter from pydantic._internal._typing_extra import eval_type_lenient from pydantic.json_schema import JsonSchemaValue -from typing_extensions import Annotated from helpers.logging import logger @@ -125,7 +125,7 @@ def _typed_signature(func: Callable[..., Any]) -> inspect.Signature: def _param_annotations( typed_signature: inspect.Signature, -) -> dict[str, Union[Annotated[type[Any], str], type[Any]]]: +) -> dict[str, Annotated[type[Any], str] | type[Any]]: """ Get the type annotations of the parameters of a function and return a dictionary of the annotated parameters. """ @@ -138,7 +138,7 @@ def _param_annotations( async def _parameter_json_schema( name: str, - value: Union[Annotated[type[Any], str], type[Any]], + value: Annotated[type[Any], str] | type[Any], default_values: dict[str, Any], **kwargs: Any, ) -> JsonSchemaValue: @@ -148,9 +148,7 @@ async def _parameter_json_schema( Kwargs are passed to the Jinja template for rendering the parameter description. """ - def _description( - name: str, value: Union[Annotated[type[Any], str], type[Any]] - ) -> str: + def _description(name: str, value: Annotated[type[Any], str] | type[Any]) -> str: # Handles Annotated if hasattr(value, "__metadata__"): retval = value.__metadata__[0] @@ -199,7 +197,7 @@ def _default_values(typed_signature: inspect.Signature) -> dict[str, Any]: async def _parameters( required_params: set[str], - param_annotations: dict[str, Union[Annotated[type[Any], str], type[Any]]], + param_annotations: dict[str, Annotated[type[Any], str] | type[Any]], default_values: dict[str, Any], **kwargs: Any, ) -> Parameters: diff --git a/helpers/llm_worker.py b/helpers/llm_worker.py index ee8ee738..21790bfe 100644 --- a/helpers/llm_worker.py +++ b/helpers/llm_worker.py @@ -1,7 +1,8 @@ import json +from collections.abc import AsyncGenerator, Callable from functools import lru_cache from os import environ -from typing import AsyncGenerator, Callable, Optional, TypeVar, Union +from typing import TypeVar import tiktoken from json_repair import repair_json @@ -85,7 +86,7 @@ async def completion_stream( max_tokens: int, messages: list[MessageModel], system: list[ChatCompletionSystemMessageParam], - tools: Optional[list[ChatCompletionToolParam]] = None, + tools: list[ChatCompletionToolParam] | None = None, ) -> AsyncGenerator[ChoiceDelta, None]: """ Returns a stream of completions. @@ -114,7 +115,7 @@ async def completion_stream( ): yield chunck return - except Exception as e: # pylint: disable=broad-exception-caught + except Exception as e: if not any(isinstance(e, exception) for exception in _retried_exceptions): raise e logger.warning( @@ -135,12 +136,13 @@ async def completion_stream( yield chunck -async def _completion_stream_worker( +# TODO: Refacto, too long (and remove PLR0912 ignore) +async def _completion_stream_worker( # noqa: PLR0912 is_fast: bool, max_tokens: int, messages: list[MessageModel], system: list[ChatCompletionSystemMessageParam], - tools: Optional[list[ChatCompletionToolParam]] = None, + tools: list[ChatCompletionToolParam] | None = None, ) -> AsyncGenerator[ChoiceDelta, None]: """ Returns a stream of completions. @@ -171,11 +173,11 @@ async def _completion_stream_worker( try: if platform.streaming: # Streaming - stream: AsyncStream[ChatCompletionChunk] = ( - await client.chat.completions.create( - **chat_kwargs, - stream=True, - ) + stream: AsyncStream[ + ChatCompletionChunk + ] = await client.chat.completions.create( + **chat_kwargs, + stream=True, ) async for chunck in stream: choices = chunck.choices @@ -246,14 +248,12 @@ async def _completion_stream_worker( async def completion_sync( res_type: type[T], system: list[ChatCompletionSystemMessageParam], - validation_callback: Callable[ - [Optional[str]], tuple[bool, Optional[str], Optional[T]] - ], + validation_callback: Callable[[str | None], tuple[bool, str | None, T | None]], validate_json: bool = False, - _previous_result: Optional[str] = None, + _previous_result: str | None = None, _retries_remaining: int = 3, - _validation_error: Optional[str] = None, -) -> Optional[T]: + _validation_error: str | None = None, +) -> T | None: # Initialize prompts messages = system if _validation_error: @@ -269,7 +269,7 @@ async def completion_sync( ] # Generate - res_content: Optional[str] = await _completion_sync_worker( + res_content: str | None = await _completion_sync_worker( is_fast=False, json_output=validate_json, system=messages, @@ -307,8 +307,8 @@ async def _completion_sync_worker( is_fast: bool, system: list[ChatCompletionSystemMessageParam], json_output: bool = False, - max_tokens: Optional[int] = None, -) -> Optional[str]: + max_tokens: int | None = None, +) -> str | None: """ Returns a completion. """ @@ -363,21 +363,19 @@ async def _completion_sync_worker( return choice.message.content if choice else None -def _limit_messages( +def _limit_messages( # noqa: PLR0913 context_window: int, - max_tokens: Optional[int], + max_tokens: int | None, messages: list[MessageModel], model: str, system: list[ChatCompletionSystemMessageParam], max_messages: int = 1000, - tools: Optional[list[ChatCompletionToolParam]] = None, + tools: list[ChatCompletionToolParam] | None = None, ) -> list[ - Union[ - ChatCompletionAssistantMessageParam, - ChatCompletionSystemMessageParam, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam, - ] + ChatCompletionAssistantMessageParam + | ChatCompletionSystemMessageParam + | ChatCompletionToolMessageParam + | ChatCompletionUserMessageParam ]: """ Returns a list of messages limited by the context size. @@ -440,7 +438,7 @@ def _count_tokens(content: str, model: str) -> int: def _use_llm( is_fast: bool, -) -> tuple[Union[AsyncAzureOpenAI, AsyncOpenAI], LlmAbstractPlatformModel]: +) -> tuple[AsyncAzureOpenAI | AsyncOpenAI, LlmAbstractPlatformModel]: """ Returns an LLM client and platform model. diff --git a/helpers/monitoring.py b/helpers/monitoring.py index 32a03787..5687c025 100644 --- a/helpers/monitoring.py +++ b/helpers/monitoring.py @@ -14,7 +14,7 @@ AioHttpClientInstrumentor().instrument() # Instrument aiohttp HTTPXClientInstrumentor().instrument() # Instrument httpx except ValueError as e: - print( + print( # noqa: T201 "Azure Application Insights instrumentation failed, likely due to a missing APPLICATIONINSIGHTS_CONNECTION_STRING environment variable.", e, ) diff --git a/helpers/translation.py b/helpers/translation.py index c7ba4d34..38be7d8f 100644 --- a/helpers/translation.py +++ b/helpers/translation.py @@ -1,5 +1,3 @@ -from typing import Optional - from azure.ai.translation.text.aio import TextTranslationClient from azure.ai.translation.text.models import TranslatedTextItem from azure.core.credentials import AzureKeyCredential @@ -18,7 +16,7 @@ logger.info("Using Translation %s", CONFIG.ai_translation.endpoint) _cache = CONFIG.cache.instance() -_client = Optional[TextTranslationClient] +_client: TextTranslationClient | None = None @retry( @@ -27,9 +25,7 @@ stop=stop_after_attempt(3), wait=wait_random_exponential(multiplier=0.8, max=8), ) -async def translate_text( - text: str, source_lang: str, target_lang: str -) -> Optional[str]: +async def translate_text(text: str, source_lang: str, target_lang: str) -> str | None: """ Translate text from source language to target language. @@ -45,7 +41,7 @@ async def translate_text( return cached.decode() # Try live - translation: Optional[str] = None + translation: str | None = None client = await _use_client() res: list[TranslatedTextItem] = await client.translate( body=[text], @@ -64,7 +60,7 @@ async def _use_client() -> TextTranslationClient: """ Generate the Translation client and close it after use. """ - global _client # pylint: disable=global-statement + global _client # noqa: PLW0603 if not isinstance(_client, TextTranslationClient): _client = TextTranslationClient( # Performance diff --git a/models/call.py b/models/call.py index c8f9764d..040731ee 100644 --- a/models/call.py +++ b/models/call.py @@ -2,7 +2,7 @@ import random import string from datetime import UTC, datetime, tzinfo -from typing import Any, Optional +from typing import Any from uuid import UUID, uuid4 from pydantic import BaseModel, Field, ValidationInfo, computed_field, field_validator @@ -27,13 +27,13 @@ class CallGetModel(BaseModel): created_at: datetime = Field(default_factory=lambda: datetime.now(UTC), frozen=True) # Editable fields initiate: CallInitiateModel = Field(frozen=True) - claim: dict[str, Any] = ( - {} - ) # Place after "initiate" as it depends on it for validation + claim: dict[ + str, Any + ] = {} # Place after "initiate" as it depends on it for validation messages: list[MessageModel] = [] - next: Optional[NextModel] = None + next: NextModel | None = None reminders: list[ReminderModel] = [] - synthesis: Optional[SynthesisModel] = None + synthesis: SynthesisModel | None = None @computed_field @property @@ -58,9 +58,9 @@ def in_progress(self) -> bool: @field_validator("claim") @classmethod def _validate_claim( - cls, claim: Optional[dict[str, Any]], info: ValidationInfo + cls, claim: dict[str, Any] | None, info: ValidationInfo ) -> dict[str, Any]: - initiate: Optional[CallInitiateModel] = info.data.get("initiate", None) + initiate: CallInitiateModel | None = info.data.get("initiate", None) if not initiate: return {} return ( @@ -82,14 +82,14 @@ class CallStateModel(CallGetModel, extra="ignore"): frozen=True, ) # Editable fields - lang_short_code: Optional[str] = None + lang_short_code: str | None = None recognition_retry: int = 0 - voice_id: Optional[str] = None + voice_id: str | None = None @computed_field @property def lang(self) -> LanguageEntryModel: # pyright: ignore - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG lang = CONFIG.conversation.initiate.lang default = lang.default_lang @@ -114,7 +114,7 @@ async def trainings(self, cache_only: bool = True) -> list[TrainingModel]: Is using query expansion from last messages. Then, data is sorted by score. """ - from helpers.config import CONFIG # pylint: disable=import-outside-toplevel + from helpers.config import CONFIG with tracer.start_as_current_span("call_trainings"): search = CONFIG.ai_search.instance() diff --git a/models/claim.py b/models/claim.py index 214f12b7..d22ed151 100644 --- a/models/claim.py +++ b/models/claim.py @@ -12,6 +12,6 @@ class ClaimTypeEnum(str, Enum): class ClaimFieldModel(BaseModel): - description: Optional[str] = None + description: str | None = None name: str type: ClaimTypeEnum diff --git a/models/message.py b/models/message.py index 28e30684..56039833 100644 --- a/models/message.py +++ b/models/message.py @@ -3,7 +3,7 @@ from datetime import UTC, datetime from enum import Enum from inspect import getmembers, isfunction -from typing import Any, Optional, Union +from typing import Any from json_repair import repair_json from openai.types.chat import ( @@ -82,7 +82,7 @@ def __add__(self, other: ChoiceDeltaToolCall) -> "ToolModel": return self async def execute_function(self, plugins: object) -> None: - from helpers.logging import logger # pylint: disable=import-outside-toplevel + from helpers.logging import logger json_str = self.function_arguments name = self.function_name @@ -131,7 +131,7 @@ async def execute_function(self, plugins: object) -> None: ) res = "Wrong arguments, please fix them and try again." res_log = res - except Exception as e: # pylint: disable=broad-exception-caught + except Exception as e: logger.warning( "Error executing function %s with args %s", self.function_name, @@ -145,7 +145,7 @@ async def execute_function(self, plugins: object) -> None: @staticmethod def _available_function_names() -> list[str]: - from helpers.llm_tools import ( # pylint: disable=import-outside-toplevel + from helpers.llm_tools import ( LlmPlugins, ) @@ -177,11 +177,9 @@ def _validate_created_at(cls, created_at: datetime) -> datetime: def to_openai( self, ) -> list[ - Union[ - ChatCompletionAssistantMessageParam, - ChatCompletionToolMessageParam, - ChatCompletionUserMessageParam, - ] + ChatCompletionAssistantMessageParam + | ChatCompletionToolMessageParam + | ChatCompletionUserMessageParam ]: # Removing newlines from the content to avoid hallucinations issues with GPT-4 Turbo content = " ".join([line.strip() for line in self.content.splitlines()]) @@ -211,14 +209,14 @@ def to_openai( tool_calls=[tool_call.to_openai() for tool_call in self.tool_calls], ) ) - for tool_call in self.tool_calls: - res.append( - ChatCompletionToolMessageParam( - content=tool_call.content, - role="tool", - tool_call_id=tool_call.tool_id, - ) + res.extend( + ChatCompletionToolMessageParam( + content=tool_call.content, + role="tool", + tool_call_id=tool_call.tool_id, ) + for tool_call in self.tool_calls + ) return res @@ -236,7 +234,7 @@ def remove_message_action(text: str) -> str: return text -def extract_message_style(text: str) -> tuple[Optional[StyleEnum], str]: +def extract_message_style(text: str) -> tuple[StyleEnum | None, str]: """ Detect the style of a message. """ diff --git a/models/readiness.py b/models/readiness.py index 759783a8..4e4c6699 100644 --- a/models/readiness.py +++ b/models/readiness.py @@ -1,5 +1,4 @@ from enum import Enum -from typing import List from pydantic import BaseModel @@ -15,5 +14,5 @@ class ReadinessCheckModel(BaseModel): class ReadinessModel(BaseModel): - checks: List[ReadinessCheckModel] + checks: list[ReadinessCheckModel] status: ReadinessEnum diff --git a/models/reminder.py b/models/reminder.py index dee021a8..bc714a69 100644 --- a/models/reminder.py +++ b/models/reminder.py @@ -10,5 +10,5 @@ class ReminderModel(BaseModel): # Editable fields description: str due_date_time: datetime - owner: Optional[str] = None # Optional for backwards compatibility + owner: str | None = None # Optional for backwards compatibility title: str diff --git a/persistence/ai_search.py b/persistence/ai_search.py index 34f8d244..dcbb228f 100644 --- a/persistence/ai_search.py +++ b/persistence/ai_search.py @@ -1,5 +1,3 @@ -from typing import Optional - from azure.core.credentials import AzureKeyCredential from azure.core.exceptions import ( HttpResponseError, @@ -35,7 +33,7 @@ class AiSearchSearch(ISearch): - _client: Optional[SearchClient] = None + _client: SearchClient | None = None _config: AiSearchModel def __init__(self, cache: ICache, config: AiSearchModel): @@ -59,7 +57,7 @@ async def areadiness(self) -> ReadinessEnum: logger.error("Error requesting AI Search", exc_info=True) except ServiceRequestError: logger.error("Error connecting to AI Search", exc_info=True) - except Exception: # pylint: disable=broad-exception-caught + except Exception: logger.error( "Unknown error while checking AI Search readiness", exc_info=True ) @@ -76,7 +74,7 @@ async def training_asearch_all( lang: str, text: str, cache_only: bool = False, - ) -> Optional[list[TrainingModel]]: + ) -> list[TrainingModel] | None: logger.debug('Searching training data for "%s"', text) if not text: return None @@ -140,7 +138,7 @@ async def training_asearch_all( ) except ValidationError as e: logger.debug("Parsing error: %s", e.errors()) - except ResourceNotFoundError as e: + except ResourceNotFoundError: logger.warning('AI Search index "%s" not found', self._config.index) except HttpResponseError as e: logger.error("Error requesting AI Search: %s", e) diff --git a/persistence/communication_services.py b/persistence/communication_services.py index 0736a116..532b5ca0 100644 --- a/persistence/communication_services.py +++ b/persistence/communication_services.py @@ -13,7 +13,7 @@ class CommunicationServicesSms(ISms): - _client: Optional[SmsClient] = None + _client: SmsClient | None = None _config: CommunicationServicesModel def __init__(self, config: CommunicationServicesModel): diff --git a/persistence/cosmos_db.py b/persistence/cosmos_db.py index f445cf17..104f05c8 100644 --- a/persistence/cosmos_db.py +++ b/persistence/cosmos_db.py @@ -1,7 +1,7 @@ import asyncio -import logging +from collections.abc import AsyncGenerator from contextlib import asynccontextmanager -from typing import AsyncGenerator, Optional +from http import HTTPStatus from uuid import UUID, uuid4 from azure.cosmos import ConsistencyLevel @@ -20,7 +20,7 @@ class CosmosDbStore(IStore): - _client: Optional[CosmosClient] = None + _client: CosmosClient | None = None _config: CosmosDbModel def __init__(self, cache: ICache, config: CosmosDbModel): @@ -54,9 +54,9 @@ async def areadiness(self) -> ReadinessEnum: read_item = await db.read_item( item=test_id, partition_key=test_partition ) - assert { - k: v for k, v in read_item.items() if k in test_dict - } == test_dict # Check only the relevant fields, Cosmos DB adds metadata + assert ( + {k: v for k, v in read_item.items() if k in test_dict} == test_dict + ) # Check only the relevant fields, Cosmos DB adds metadata # Delete the item await db.delete_item(item=test_id, partition_key=test_partition) # Test the item does not exist @@ -67,7 +67,7 @@ async def areadiness(self) -> ReadinessEnum: logger.error("Readiness test failed", exc_info=True) except CosmosHttpResponseError: logger.error("Error requesting CosmosDB", exc_info=True) - except Exception: # pylint: disable=broad-exception-caught + except Exception: logger.error( "Unknown error while checking Cosmos DB readiness", exc_info=True ) @@ -80,12 +80,12 @@ async def _item_exists(self, test_id: str, partition_key: str) -> bool: await db.read_item(item=test_id, partition_key=partition_key) exist = True except CosmosHttpResponseError as e: - if e.status_code != 404: + if e.status_code != HTTPStatus.NOT_FOUND: logger.error("Error requesting CosmosDB: %s", e) exist = True return exist - async def call_aget(self, call_id: UUID) -> Optional[CallStateModel]: + async def call_aget(self, call_id: UUID) -> CallStateModel | None: logger.debug("Loading call %s", call_id) # Try cache @@ -151,7 +151,7 @@ async def call_aset(self, call: CallStateModel) -> bool: return res - async def call_asearch_one(self, phone_number: str) -> Optional[CallStateModel]: + async def call_asearch_one(self, phone_number: str) -> CallStateModel | None: logger.debug("Loading last call for %s", phone_number) # Try cache @@ -196,8 +196,8 @@ async def call_asearch_one(self, phone_number: str) -> Optional[CallStateModel]: async def call_asearch_all( self, count: int, - phone_number: Optional[str] = None, - ) -> tuple[Optional[list[CallStateModel]], int]: + phone_number: str | None = None, + ) -> tuple[list[CallStateModel] | None, int]: logger.debug("Searching calls, for %s and count %s", phone_number, count) # TODO: Cache results calls, total = await asyncio.gather( @@ -209,8 +209,8 @@ async def call_asearch_all( async def _call_asearch_all_calls_worker( self, count: int, - phone_number: Optional[str] = None, - ) -> Optional[list[CallStateModel]]: + phone_number: str | None = None, + ) -> list[CallStateModel] | None: calls: list[CallStateModel] = [] try: async with self._use_client() as db: @@ -245,7 +245,7 @@ async def _call_asearch_all_calls_worker( async def _call_asearch_all_total_worker( self, - phone_number: Optional[str] = None, + phone_number: str | None = None, ) -> int: total = 0 try: diff --git a/persistence/icache.py b/persistence/icache.py index d86b2454..880fc721 100644 --- a/persistence/icache.py +++ b/persistence/icache.py @@ -6,7 +6,6 @@ class ICache(ABC): - @abstractmethod @tracer.start_as_current_span("cache_areadiness") async def areadiness(self) -> ReadinessEnum: @@ -14,12 +13,12 @@ async def areadiness(self) -> ReadinessEnum: @abstractmethod @tracer.start_as_current_span("cache_aconnect") - async def aget(self, key: str) -> Optional[bytes]: + async def aget(self, key: str) -> bytes | None: pass @abstractmethod @tracer.start_as_current_span("cache_aset") - async def aset(self, key: str, value: Union[str, bytes, None]) -> bool: + async def aset(self, key: str, value: str | bytes | None) -> bool: pass @abstractmethod diff --git a/persistence/isearch.py b/persistence/isearch.py index aafe6170..aa66559a 100644 --- a/persistence/isearch.py +++ b/persistence/isearch.py @@ -25,5 +25,5 @@ async def training_asearch_all( lang: str, text: str, cache_only: bool = False, - ) -> Optional[list[TrainingModel]]: + ) -> list[TrainingModel] | None: pass diff --git a/persistence/isms.py b/persistence/isms.py index 50ba3690..460ffb00 100644 --- a/persistence/isms.py +++ b/persistence/isms.py @@ -6,7 +6,6 @@ class ISms(ABC): - @abstractmethod @tracer.start_as_current_span("sms_areadiness") async def areadiness(self) -> ReadinessEnum: diff --git a/persistence/istore.py b/persistence/istore.py index b0ee5840..b0acd216 100644 --- a/persistence/istore.py +++ b/persistence/istore.py @@ -21,7 +21,7 @@ async def areadiness(self) -> ReadinessEnum: @abstractmethod @tracer.start_as_current_span("store_call_aget") - async def call_aget(self, call_id: UUID) -> Optional[CallStateModel]: + async def call_aget(self, call_id: UUID) -> CallStateModel | None: pass @abstractmethod @@ -31,7 +31,7 @@ async def call_aset(self, call: CallStateModel) -> bool: @abstractmethod @tracer.start_as_current_span("store_call_adel") - async def call_asearch_one(self, phone_number: str) -> Optional[CallStateModel]: + async def call_asearch_one(self, phone_number: str) -> CallStateModel | None: pass @abstractmethod @@ -39,8 +39,8 @@ async def call_asearch_one(self, phone_number: str) -> Optional[CallStateModel]: async def call_asearch_all( self, count: int, - phone_number: Optional[str] = None, - ) -> tuple[Optional[list[CallStateModel]], int]: + phone_number: str | None = None, + ) -> tuple[list[CallStateModel] | None, int]: pass def _cache_key_call_id(self, call_id: UUID) -> str: diff --git a/persistence/memory.py b/persistence/memory.py index 0a9ae769..1a9bf12b 100644 --- a/persistence/memory.py +++ b/persistence/memory.py @@ -17,7 +17,7 @@ class MemoryCache(ICache): See: https://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU) """ - _cache: OrderedDict[str, Union[bytes, None]] = OrderedDict() + _cache: OrderedDict[str, bytes | None] = OrderedDict() _config: MemoryModel def __init__(self, config: MemoryModel): @@ -33,7 +33,7 @@ async def areadiness(self) -> ReadinessEnum: """ return ReadinessEnum.OK # Always ready, it's memory :) - async def aget(self, key: str) -> Optional[bytes]: + async def aget(self, key: str) -> bytes | None: """ Get a value from the cache. @@ -46,7 +46,7 @@ async def aget(self, key: str) -> Optional[bytes]: self._cache.move_to_end(sha_key, last=False) # Move to first return res - async def aset(self, key: str, value: Union[str, bytes, None]) -> bool: + async def aset(self, key: str, value: str | bytes | None) -> bool: """ Set a value in the cache. """ diff --git a/persistence/redis.py b/persistence/redis.py index ccf8c906..e300f66c 100644 --- a/persistence/redis.py +++ b/persistence/redis.py @@ -1,5 +1,4 @@ import hashlib -from typing import Optional, Union from uuid import uuid4 from opentelemetry.instrumentation.redis import RedisInstrumentor @@ -68,11 +67,11 @@ async def areadiness(self) -> ReadinessEnum: logger.error("Readiness test failed", exc_info=True) except RedisError: logger.error("Error requesting Redis", exc_info=True) - except Exception: # pylint: disable=broad-exception-caught + except Exception: logger.error("Unknown error while checking Redis readiness", exc_info=True) return ReadinessEnum.FAIL - async def aget(self, key: str) -> Optional[bytes]: + async def aget(self, key: str) -> bytes | None: """ Get a value from the cache. @@ -88,7 +87,7 @@ async def aget(self, key: str) -> Optional[bytes]: logger.error("Error getting value", exc_info=True) return res - async def aset(self, key: str, value: Union[str, bytes, None]) -> bool: + async def aset(self, key: str, value: str | bytes | None) -> bool: """ Set a value in the cache. diff --git a/persistence/sqlite.py b/persistence/sqlite.py index d76b84c6..319d4de2 100644 --- a/persistence/sqlite.py +++ b/persistence/sqlite.py @@ -1,7 +1,7 @@ import asyncio import os +from collections.abc import AsyncGenerator from contextlib import asynccontextmanager -from typing import AsyncGenerator, Optional from uuid import UUID from aiosqlite import Connection, connect as sqlite_connect @@ -52,11 +52,11 @@ async def areadiness(self) -> ReadinessEnum: async with self._use_db() as db: await db.execute("SELECT 1") return ReadinessEnum.OK - except Exception: # pylint: disable=broad-exception-caught + except Exception: logger.error("Unknown error while checking SQLite readiness", exc_info=True) return ReadinessEnum.FAIL - async def call_aget(self, call_id: UUID) -> Optional[CallStateModel]: + async def call_aget(self, call_id: UUID) -> CallStateModel | None: logger.debug("Loading call %s", call_id) # Try cache @@ -116,7 +116,7 @@ async def call_aset(self, call: CallStateModel) -> bool: return True - async def call_asearch_one(self, phone_number: str) -> Optional[CallStateModel]: + async def call_asearch_one(self, phone_number: str) -> CallStateModel | None: logger.debug("Loading last call for %s", phone_number) # Try cache @@ -154,8 +154,8 @@ async def call_asearch_one(self, phone_number: str) -> Optional[CallStateModel]: async def call_asearch_all( self, count: int, - phone_number: Optional[str] = None, - ) -> tuple[Optional[list[CallStateModel]], int]: + phone_number: str | None = None, + ) -> tuple[list[CallStateModel] | None, int]: logger.debug("Searching calls, for %s and count %s", phone_number, count) # TODO: Cache results calls, total = await asyncio.gather( @@ -167,8 +167,8 @@ async def call_asearch_all( async def _call_asearch_all_calls_worker( self, count: int, - phone_number: Optional[str] = None, - ) -> Optional[list[CallStateModel]]: + phone_number: str | None = None, + ) -> list[CallStateModel] | None: calls: list[CallStateModel] = [] async with self._use_db() as db: where_clause = ( @@ -200,7 +200,7 @@ async def _call_asearch_all_calls_worker( async def _call_asearch_all_total_worker( self, - phone_number: Optional[str] = None, + phone_number: str | None = None, ) -> int: async with self._use_db() as db: where_clause = ( diff --git a/persistence/twilio.py b/persistence/twilio.py index 464b12a9..cb2b2acd 100644 --- a/persistence/twilio.py +++ b/persistence/twilio.py @@ -1,5 +1,3 @@ -from typing import Optional - from twilio.base.exceptions import TwilioRestException from twilio.rest import Client @@ -12,7 +10,7 @@ class TwilioSms(ISms): - _client: Optional[Client] = None + _client: Client | None = None _config: TwilioModel def __init__(self, config: TwilioModel): @@ -34,7 +32,7 @@ async def areadiness(self) -> ReadinessEnum: return ReadinessEnum.OK except AssertionError: logger.error("Readiness test failed", exc_info=True) - except Exception: # pylint: disable=broad-exception-caught + except Exception: logger.error("Unknown error while checking Twilio readiness", exc_info=True) return ReadinessEnum.FAIL diff --git a/tests/cache.py b/tests/cache.py index 59991011..e07db750 100644 --- a/tests/cache.py +++ b/tests/cache.py @@ -1,5 +1,5 @@ import pytest -from pytest import assume # pylint: disable=no-name-in-module # pyright: ignore +from pytest import assume from helpers.config import CONFIG from helpers.config_models.cache import ModeEnum as CacheModeEnum diff --git a/tests/conftest.py b/tests/conftest.py index 055d23d8..d6e9dcae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,8 +2,9 @@ import random import string import xml.etree.ElementTree as ET +from collections.abc import Callable from textwrap import dedent -from typing import Any, Callable, Optional, Union +from typing import Any import pytest import yaml @@ -45,10 +46,10 @@ def __init__( async def start_recognizing_media( self, - play_prompt: Union[FileSource, TextSource, SsmlSource], - *args, - operation_context: Optional[str] = None, - **kwargs, + play_prompt: FileSource | TextSource | SsmlSource, + *args, # noqa: ARG002 + operation_context: str | None = None, + **kwargs, # noqa: ARG002 ) -> None: contexts = _str_to_contexts(operation_context) for context in contexts or []: @@ -57,10 +58,10 @@ async def start_recognizing_media( async def play_media( self, - play_source: Union[FileSource, TextSource, SsmlSource], - *args, - operation_context: Optional[str] = None, - **kwargs, + play_source: FileSource | TextSource | SsmlSource, + *args, # noqa: ARG002 + operation_context: str | None = None, + **kwargs, # noqa: ARG002 ) -> None: contexts = _str_to_contexts(operation_context) for context in contexts or []: @@ -69,16 +70,16 @@ async def play_media( async def transfer_call_to_participant( self, - *args, - **kwargs, + *args, # noqa: ARG002 + **kwargs, # noqa: ARG002 ) -> TransferCallResult: self._transfer_callback() return TransferCallResult() async def hang_up( self, - *args, - **kwargs, + *args, # noqa: ARG002 + **kwargs, # noqa: ARG002 ) -> None: self._hang_up_callback() @@ -89,9 +90,7 @@ async def cancel_all_media_operations( ) -> None: pass - def _log_media( - self, play_source: Union[FileSource, TextSource, SsmlSource] - ) -> None: + def _log_media(self, play_source: FileSource | TextSource | SsmlSource) -> None: if isinstance(play_source, TextSource): self._play_media_callback(play_source.text.strip()) elif isinstance(play_source, SsmlSource): @@ -118,8 +117,8 @@ def __init__( def get_call_connection( self, - *args, - **kwargs, + *args, # noqa: ARG002 + **kwargs, # noqa: ARG002 ) -> CallConnectionClientMock: return self._call_client @@ -202,9 +201,7 @@ def should_use_azure_openai(self) -> bool: return True def _cache_key(self, prompt: str) -> str: - llm_string = self._model._get_llm_string( - input=prompt - ) # pylint: disable=protected-access + llm_string = self._model._get_llm_string(input=prompt) llm_hash = hashlib.sha256(llm_string.encode(), usedforsecurity=False).digest() return f"call-center-ai/{llm_hash}" @@ -221,7 +218,6 @@ def with_conversations(fn=None) -> MarkDecorator: with open( encoding="utf-8", file="tests/conversations.yaml", - mode="r", ) as f: file: dict = yaml.safe_load(f) conversations: list[Conversation] = [] @@ -230,7 +226,7 @@ def with_conversations(fn=None) -> MarkDecorator: conversations.append(Conversation.model_validate(conv)) except ValidationError: logger.error("Failed to parse conversation", exc_info=True) - print(f"Loaded {len(conversations)} conversations") + print(f"Loaded {len(conversations)} conversations") # noqa: T201 keys = sorted(Conversation.model_fields.keys() - {"id"}) values = [ pytest.param( diff --git a/tests/llm.py b/tests/llm.py index ff9a303e..8d044e30 100644 --- a/tests/llm.py +++ b/tests/llm.py @@ -2,7 +2,6 @@ import json import re from datetime import datetime -from typing import Optional import pytest from deepeval import assert_test @@ -16,7 +15,7 @@ from deepeval.models.gpt_model import GPTModel from deepeval.test_case import LLMTestCase from pydantic import TypeAdapter -from pytest import assume # pylint: disable=no-name-in-module # pyright: ignore +from pytest import assume from helpers.call_events import ( on_call_connected, @@ -58,8 +57,8 @@ def measure( async def a_measure( self, test_case: LLMTestCase, - *args, - **kwargs, + *args, # noqa: ARG002 + **kwargs, # noqa: ARG002 ) -> float: assert test_case.input # Extract claim data @@ -83,7 +82,7 @@ async def a_measure( self.success = self.score >= self.threshold return self.score - async def _score_data(self, key: str, throry: str, real: Optional[str]) -> float: + async def _score_data(self, key: str, throry: str, real: str | None) -> float: res, _ = await self.model.a_generate( f""" Assistant is a data analyst expert with 20 years of experience. @@ -230,7 +229,7 @@ def __name__(self): # pyright: ignore @with_conversations @pytest.mark.asyncio(scope="session") -async def test_llm( +async def test_llm( # noqa: PLR0913 call: CallStateModel, claim_tests_excl: list[str], deepeval_model: GPTModel, diff --git a/tests/search.py b/tests/search.py index 84c76910..9d018366 100644 --- a/tests/search.py +++ b/tests/search.py @@ -7,7 +7,7 @@ from deepeval.models.gpt_model import GPTModel from deepeval.test_case import LLMTestCase from pydantic import TypeAdapter -from pytest import assume # pylint: disable=no-name-in-module # pyright: ignore +from pytest import assume from helpers.config import CONFIG from helpers.logging import logger @@ -38,8 +38,8 @@ def measure( async def a_measure( self, test_case: LLMTestCase, - *args, - **kwargs, + *args, # noqa: ARG002 + **kwargs, # noqa: ARG002 ) -> float: assert test_case.input assert test_case.retrieval_context @@ -120,11 +120,11 @@ def __name__(self): # pyright: ignore @with_conversations @pytest.mark.asyncio(scope="session") @pytest.mark.repeat(10) # Catch multi-threading and concurrency issues -async def test_relevancy( +async def test_relevancy( # noqa: PLR0913 call: CallStateModel, - claim_tests_excl: list[str], + claim_tests_excl: list[str], # noqa: ARG001 deepeval_model: GPTModel, - expected_output: str, + expected_output: str, # noqa: ARG001 speeches: list[str], lang: str, ) -> None: diff --git a/tests/store.py b/tests/store.py index ab2e6650..dd17113b 100644 --- a/tests/store.py +++ b/tests/store.py @@ -1,5 +1,5 @@ import pytest -from pytest import assume # pylint: disable=no-name-in-module # pyright: ignore +from pytest import assume from helpers.config import CONFIG from helpers.config_models.database import ModeEnum as DatabaseModeEnum From 3d3718e09cdcc4d46a3a512f7524d78f843710b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Thu, 5 Sep 2024 19:37:16 +0200 Subject: [PATCH 03/12] dev: Option to disable content filter --- Makefile | 3 +++ bicep/app.bicep | 53 ++++++++++++++++++++++++++++++------------------ bicep/main.bicep | 4 +++- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 5dfd14ea..802d80b5 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,8 @@ name_sanitized := $(shell echo $(name) | tr '[:upper:]' '[:lower:]') bot_phone_number ?= $(shell cat config.yaml | yq '.communication_services.phone_number') event_subscription_name ?= $(shell echo '$(name_sanitized)-$(bot_phone_number)' | tr -dc '[:alnum:]-') twilio_phone_number ?= $(shell cat config.yaml | yq '.sms.twilio.phone_number') +# Bicep inputs +prompt_content_filter ?= true # Bicep outputs app_url ?= $(shell az deployment sub show --name $(name_sanitized) | yq '.properties.outputs["appUrl"].value') blob_storage_public_name ?= $(shell az deployment sub show --name $(name_sanitized) | yq '.properties.outputs["blobStoragePublicName"].value') @@ -139,6 +141,7 @@ deploy-bicep: 'functionappLocation=$(functionapp_location)' \ 'instance=$(name)' \ 'openaiLocation=$(openai_location)' \ + 'promptContentFilter=$(prompt_content_filter)' \ 'searchLocation=$(search_location)' \ 'version=$(version_full)' \ --template-file bicep/main.bicep \ diff --git a/bicep/app.bicep b/bicep/app.bicep index 7a392385..65c9cebb 100644 --- a/bicep/app.bicep +++ b/bicep/app.bicep @@ -16,6 +16,7 @@ param llmSlowQuota int param llmSlowVersion string param location string param openaiLocation string +param promptContentFilter bool param searchLocation string param tags object param version string @@ -473,6 +474,7 @@ resource contentfilter 'Microsoft.CognitiveServices/accounts/raiPolicies@2024-04 contentFilters: [ // Indirect attacks { + allowedContentLevel: 'Medium' blocking: true enabled: true name: 'indirect_attack' @@ -480,6 +482,7 @@ resource contentfilter 'Microsoft.CognitiveServices/accounts/raiPolicies@2024-04 } // Jailbreak { + allowedContentLevel: 'Medium' blocking: true enabled: true name: 'jailbreak' @@ -487,63 +490,73 @@ resource contentfilter 'Microsoft.CognitiveServices/accounts/raiPolicies@2024-04 } // Prompt { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'hate' source: 'Prompt' } { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'sexual' source: 'Prompt' } { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'selfharm' source: 'Prompt' } { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'violence' source: 'Prompt' } { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'profanity' source: 'Prompt' } // Completion { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'hate' source: 'Completion' } { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'sexual' source: 'Completion' } { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'selfharm' source: 'Completion' } { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'violence' source: 'Completion' } { - blocking: false - enabled: false + allowedContentLevel: 'Low' + blocking: !promptContentFilter + enabled: !promptContentFilter name: 'profanity' source: 'Completion' } diff --git a/bicep/main.bicep b/bicep/main.bicep index fdef80a1..2162b04d 100644 --- a/bicep/main.bicep +++ b/bicep/main.bicep @@ -1,5 +1,5 @@ param cognitiveCommunicationLocation string -param embeddingDeploymentType string = 'Standard' // Pay-as-you-go in a single region +param embeddingDeploymentType string = 'Standard' // Pay-as-you-go in a single region param embeddingModel string = 'text-embedding-ada-002' param embeddingQuota int = 100 param embeddingVersion string = '2' @@ -17,6 +17,7 @@ param llmSlowQuota int = 400 param llmSlowVersion string = '2024-05-13' param location string = deployment().location param openaiLocation string +param promptContentFilter bool = true // Should be set to false but requires a custom approval from Microsoft param searchLocation string param version string @@ -64,6 +65,7 @@ module app 'app.bicep' = { llmSlowVersion: llmSlowVersion location: location openaiLocation: openaiLocation + promptContentFilter: promptContentFilter searchLocation: searchLocation tags: tags version: version From af852c6b7bc8c82685f494a8802cb167ee8cad60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Fri, 6 Sep 2024 10:35:01 +0200 Subject: [PATCH 04/12] chore: Remove dead code --- Makefile | 4 ---- bicep/app.bicep | 1 - bicep/main.bicep | 1 - 3 files changed, 6 deletions(-) diff --git a/Makefile b/Makefile index 802d80b5..716d506a 100644 --- a/Makefile +++ b/Makefile @@ -13,17 +13,13 @@ search_location := francecentral # Sanitize variables name_sanitized := $(shell echo $(name) | tr '[:upper:]' '[:lower:]') # App configuration -bot_phone_number ?= $(shell cat config.yaml | yq '.communication_services.phone_number') -event_subscription_name ?= $(shell echo '$(name_sanitized)-$(bot_phone_number)' | tr -dc '[:alnum:]-') twilio_phone_number ?= $(shell cat config.yaml | yq '.sms.twilio.phone_number') # Bicep inputs prompt_content_filter ?= true # Bicep outputs app_url ?= $(shell az deployment sub show --name $(name_sanitized) | yq '.properties.outputs["appUrl"].value') blob_storage_public_name ?= $(shell az deployment sub show --name $(name_sanitized) | yq '.properties.outputs["blobStoragePublicName"].value') -communication_id ?= $(shell az deployment sub show --name $(name_sanitized) | yq '.properties.outputs["communicationId"].value') function_app_name ?= $(shell az deployment sub show --name $(name_sanitized) | yq '.properties.outputs["functionAppName"].value') -log_analytics_workspace_customer_id ?= $(shell az deployment sub show --name $(name_sanitized) | yq '.properties.outputs["logAnalyticsWorkspaceName"].value') version: @bash ./cicd/version/version.sh -g . -c diff --git a/bicep/app.bicep b/bicep/app.bicep index 65c9cebb..8e8d35f7 100644 --- a/bicep/app.bicep +++ b/bicep/app.bicep @@ -114,7 +114,6 @@ var config = { output appUrl string = appUrl output blobStoragePublicName string = storageAccount.name -output communicationId string = communicationServices.id output functionAppName string = functionAppName output logAnalyticsCustomerId string = logAnalytics.properties.customerId diff --git a/bicep/main.bicep b/bicep/main.bicep index 2162b04d..6e266f8e 100644 --- a/bicep/main.bicep +++ b/bicep/main.bicep @@ -25,7 +25,6 @@ targetScope = 'subscription' output appUrl string = app.outputs.appUrl output blobStoragePublicName string = app.outputs.blobStoragePublicName -output communicationId string = app.outputs.communicationId output functionAppName string = app.outputs.functionAppName output logAnalyticsCustomerId string = app.outputs.logAnalyticsCustomerId From f1964c715598da30b3c2bfd7f597ede990a684f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Fri, 6 Sep 2024 10:36:07 +0200 Subject: [PATCH 05/12] perf: Default LLM to fast, fast as GPT 4o-mini --- bicep/main.bicep | 10 +++++----- helpers/config_models/conversation.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bicep/main.bicep b/bicep/main.bicep index 6e266f8e..367e1a58 100644 --- a/bicep/main.bicep +++ b/bicep/main.bicep @@ -5,11 +5,11 @@ param embeddingQuota int = 100 param embeddingVersion string = '2' param functionappLocation string param instance string -param llmFastContext int = 16385 -param llmFastDeploymentType string = 'Standard' // Pay-as-you-go in a single region -param llmFastModel string = 'gpt-35-turbo' -param llmFastQuota int = 200 -param llmFastVersion string = '1106' +param llmFastContext int = 128000 +param llmFastDeploymentType string = 'GlobalStandard' // Pay-as-you-go in all regions +param llmFastModel string = 'gpt-4o-mini' +param llmFastQuota int = 600 +param llmFastVersion string = '2024-07-18' param llmSlowContext int = 128000 param llmSlowDeploymentType string = 'GlobalStandard' // Pay-as-you-go in all regions param llmSlowModel string = 'gpt-4o' diff --git a/helpers/config_models/conversation.py b/helpers/config_models/conversation.py index 6a7eed54..9de704c3 100644 --- a/helpers/config_models/conversation.py +++ b/helpers/config_models/conversation.py @@ -173,7 +173,7 @@ class ConversationModel(BaseModel): serialization_alias="voice_timeout_after_silence_sec", # Compatibility with v7 ) slow_llm_for_chat: bool = Field( - default=True, + default=False, serialization_alias="use_slow_llm_for_chat_as_default", # Compatibility with v7 ) voice_recognition_retry_max: int = Field( From 5186f58bf6a8163b351378303223ac2e7c4f09c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Fri, 6 Sep 2024 10:36:25 +0200 Subject: [PATCH 06/12] perf: Upgrade GPT 4o model to newer version --- bicep/main.bicep | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bicep/main.bicep b/bicep/main.bicep index 367e1a58..1ad4b7e0 100644 --- a/bicep/main.bicep +++ b/bicep/main.bicep @@ -13,8 +13,8 @@ param llmFastVersion string = '2024-07-18' param llmSlowContext int = 128000 param llmSlowDeploymentType string = 'GlobalStandard' // Pay-as-you-go in all regions param llmSlowModel string = 'gpt-4o' -param llmSlowQuota int = 400 -param llmSlowVersion string = '2024-05-13' +param llmSlowQuota int = 300 +param llmSlowVersion string = '2024-08-06' param location string = deployment().location param openaiLocation string param promptContentFilter bool = true // Should be set to false but requires a custom approval from Microsoft From b5a7d4bcc493f55063d4b1592238c816318b5c52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Fri, 6 Sep 2024 10:36:43 +0200 Subject: [PATCH 07/12] quality: Code lint --- bicep/main.bicep | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bicep/main.bicep b/bicep/main.bicep index 1ad4b7e0..f6c62d6c 100644 --- a/bicep/main.bicep +++ b/bicep/main.bicep @@ -11,7 +11,7 @@ param llmFastModel string = 'gpt-4o-mini' param llmFastQuota int = 600 param llmFastVersion string = '2024-07-18' param llmSlowContext int = 128000 -param llmSlowDeploymentType string = 'GlobalStandard' // Pay-as-you-go in all regions +param llmSlowDeploymentType string = 'GlobalStandard' // Pay-as-you-go in all regions param llmSlowModel string = 'gpt-4o' param llmSlowQuota int = 300 param llmSlowVersion string = '2024-08-06' From b261c2c1808b7cc0dd9b6b7fb17269c2a0446dc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Sat, 7 Sep 2024 12:48:33 +0200 Subject: [PATCH 08/12] dev: Fix Python min version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 50a6bab8..78782ed2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ dynamic = ["version"] license = {file = "LICENSE"} name = "call-center-ai" readme = "README.md" -requires-python = "==3.11" +requires-python = ">=3.11" dependencies = [ "aiohttp-retry==2.8.3", # Retry middleware for aiohttp, used with Twilio SDK "aiohttp[speedups]==3.9.5", # Async HTTP client for Azure and Twilio SDKs, plus async DNS resolver and async Brotli compression From 357778073ac34682edf44150f69529cc5645ba2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Sat, 7 Sep 2024 13:05:16 +0200 Subject: [PATCH 09/12] chore: Remove dead code --- bicep/app.bicep | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/bicep/app.bicep b/bicep/app.bicep index 8e8d35f7..166228b8 100644 --- a/bicep/app.bicep +++ b/bicep/app.bicep @@ -422,19 +422,6 @@ resource cognitiveCommunication 'Microsoft.CognitiveServices/accounts@2024-04-01 } } -resource cognitiveDocument 'Microsoft.CognitiveServices/accounts@2024-04-01-preview' = { - name: '${prefix}-${location}-document' - location: location - tags: tags - sku: { - name: 'S0' // Pay-as-you-go - } - kind: 'FormRecognizer' - properties: { - customSubDomainName: '${prefix}-${location}-document' - } -} - // Cognitive Services OpenAI Contributor resource roleOpenaiContributor 'Microsoft.Authorization/roleDefinitions@2022-04-01' existing = { name: 'a001fd3d-188f-4b5d-821b-7da978bf7442' From a8347533e442b7b4754c832b17c55491ed852a8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Sat, 7 Sep 2024 13:28:45 +0200 Subject: [PATCH 10/12] fix: Some large sentences are spoken without pauses --- helpers/call_utils.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/helpers/call_utils.py b/helpers/call_utils.py index 11322013..0ec4bbe9 100644 --- a/helpers/call_utils.py +++ b/helpers/call_utils.py @@ -54,6 +54,12 @@ def tts_sentence_split( """ Split a text into sentences. + Whitespaces are not returned, but punctiation is kept as it was in the original text. + + Example: + - Input: "Hello, world! How are you? I'm fine. Thank you... Goodbye!" + - Output: [("Hello, world!", 13), ("How are you?", 12), ("I'm fine.", 9), ("Thank you...", 13), ("Goodbye!", 8)] + Returns a generator of tuples with the sentence and the original sentence length. """ # Split by sentence by punctuation @@ -313,12 +319,19 @@ async def _chunk_before_tts( chunks = [] chunk = "" for to_add, _ in tts_sentence_split(text, True): - if len(chunk) + len(to_add) >= _MAX_CHARACTERS_PER_TTS: - chunks.append(chunk.strip()) # Remove trailing space + if ( + len(chunk) + len(to_add) >= _MAX_CHARACTERS_PER_TTS + ): # If chunck overflows TTS capacity, start a new record + # Remove trailing space as sentences are separated by spaces + chunks.append(chunk.strip()) + # Reset chunk chunk = "" - chunk += to_add - if chunk: - chunks.append(chunk) + # Add space to separate sentences + chunk += to_add + " " + + if chunk: # If there is a remaining chunk, add it + # Remove trailing space as sentences are separated by spaces + chunks.append(chunk.strip()) return chunks From 2f6d9c1760342931ada139d7ea6cc996b4ec24e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Sat, 7 Sep 2024 13:33:07 +0200 Subject: [PATCH 11/12] dev: Enhance func ignore to lower package size --- .funcignore | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/.funcignore b/.funcignore index 5d0b2128..f6c013ed 100644 --- a/.funcignore +++ b/.funcignore @@ -1,15 +1,30 @@ # Local dev +__pycache__/ .devcontainer/ .gitmodules/ +.python_packages/ .python-version +.ruff_cache/ .syft.yaml +.venv/ +.version.cache +.version.config +.vscode/ +*.egg-info/ +*.pyc *.sqlite* -bicep/ -cicd/ +build/ +configs/ +DevTunnels/ Makefile requirements-dev.txt -# Local app config file +# CICD +.github/ +bicep/ +cicd/ + +# Config *.env *.settings.json config.* @@ -20,21 +35,7 @@ sbom-reports/ test-reports/ tests/ -# Documentation and examples +# Documentation *.md docs/ examples/ - -# Local cache -__pycache__/ -.python_packages/ -.venv/ -.version.cache -.version.config -*.egg-info/ -*.pyc -build/ -DevTunnels/ - -# GitHub -.github/ From eb388171a3f4a36196a4cbe87394a45ba2427a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mence=20Lesn=C3=A9?= Date: Sat, 7 Sep 2024 13:40:23 +0200 Subject: [PATCH 12/12] doc: Update doc to match new LLM versions --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0baa9a72..7c3508a7 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ curl \ - [x] Customizable prompts - [x] Disengaging from a human agent when needed - [x] Filter out inappropriate content from the LLM, like profanity or concurrence company names -- [x] Fine understanding of the customer request with GPT-4 Turbo +- [x] Fine understanding of the customer request with GPT-4o and GPT 4o-mini - [x] Follow a specific data schema for the claim - [x] Has access to a documentation database (few-shot training / RAG) - [x] Help the user to find the information needed to complete the claim @@ -380,16 +380,16 @@ llm: azure_openai: api_key: xxx context: 16385 - deployment: gpt-35-turbo-0125 + deployment: gpt-4o-mini-2024-07-18 endpoint: https://xxx.openai.azure.com - model: gpt-35-turbo + model: gpt-4o-mini streaming: true slow: mode: azure_openai azure_openai: api_key: xxx context: 128000 - deployment: gpt-4o-2024-05-13 + deployment: gpt-4o-2024-08-06 endpoint: https://xxx.openai.azure.com model: gpt-4o streaming: true @@ -620,9 +620,9 @@ llm: mode: openai openai: api_key: xxx - context: 16385 + context: 128000 endpoint: https://api.openai.com - model: gpt-35-turbo + model: gpt-4o-mini streaming: true slow: mode: openai @@ -630,7 +630,7 @@ llm: api_key: xxx context: 128000 endpoint: https://api.openai.com - model: gpt-4 + model: gpt-4o streaming: true ```