-
Notifications
You must be signed in to change notification settings - Fork 16
/
pyproject.toml
124 lines (110 loc) · 3.55 KB
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
[project]
authors = [
{name = "Clémence Lesné", email = "[email protected]"}
]
maintainers = [
{name = "Clémence Lesné", email = "[email protected]"}
]
description = "Web scraper made for AI and simplicity in mind. It runs as a CLI that can be parallelized and outputs high-quality markdown content."
keywords = ["web", "scraper", "markdown", "ai", "parallel", "cli", "automation", "data-extraction", "web-crawling", "content-indexing"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Operating System :: MacOS",
"Operating System :: Microsoft :: Windows",
"Operating System :: OS Independent",
"Operating System :: POSIX :: Linux",
"Operating System :: POSIX",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Internet :: WWW/HTTP :: Dynamic Content :: Content Management System",
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
"Topic :: System :: Archiving :: Backup",
"Topic :: Text Processing :: Markup :: Markdown",
"Topic :: Utilities",
"Typing :: Typed",
]
dynamic = ["version"]
license = {file = "LICENSE"}
name = "scrape-it-now"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"aiodns~=3.2",
"aiofiles~=24.1",
"aiohttp~=3.10",
"aiojobs~=1.3",
"aiosqlite~=0.20",
"azure-identity~=1.17",
"azure-monitor-opentelemetry~=1.6",
"azure-search-documents~=11.6a0",
"azure-storage-blob~=12.22",
"azure-storage-queue~=12.11",
"click~=8.1",
"openai~=1.42",
"opentelemetry-instrumentation-aiohttp-client~=0.0a0",
"opentelemetry-instrumentation-httpx~=0.0a0",
"opentelemetry-instrumentation-openai~=0.0a0",
"opentelemetry-instrumentation-redis~=0.0a0",
"opentelemetry-instrumentation-sqlite3~=0.0a0",
"playwright~=1.46",
"pydantic~=2.8",
"pypandoc~=1.13",
"python-dotenv~=1.0",
"structlog~=24.4",
"tenacity~=9.0",
"tiktoken~=0.7",
"truststore~=0.10",
]
[project.optional-dependencies]
dev = [
"deptry~=0.19", # Dependency tree testing
"pyinstaller~=6.11", # Create standalone executable
"pyright~=1.1", # Static type checker
"pytest-asyncio~=0.23", # Pytest plugin for async tests
"pytest-repeat~=0.9", # Pytest plugin for repeating tests
"pytest-xdist[psutil]~=3.6", # Pytest plugin for parallel testing
"pytest~=8.3", # Testing framework
"ruff~=0.6", # Linter
]
[project.urls]
Homepage = "https://github.com/clemlesne/scrape-it-now"
Issues = "https://github.com/clemlesne/scrape-it-now/issues"
Repository = "https://github.com/clemlesne/scrape-it-now"
[project.scripts]
scrape-it-now = "scrape_it_now.cli:main"
[build-system]
requires = ["setuptools~=75.3"]
build-backend = "setuptools.build_meta"
[tool.setuptools.dynamic]
version = {attr = "scrape_it_now.__version__"}
[tool.setuptools.packages.find]
where = ["src"]
[tool.setuptools.package-data]
scrape_it_now = ["resources/*"]
[tool.pytest.ini_options]
asyncio_default_fixture_loop_scope="function"
asyncio_mode = "auto"
junit_suite_name = "scrape-it-now"
log_file = "test-reports/last-logs.txt"
log_file_level = "INFO"
pythonpath = ["src"]
testpaths = ["tests"]
[tool.deptry]
ignore_notebooks = true
pep621_dev_dependency_groups = ["dev"]
[tool.deptry.per_rule_ignores]
DEP002 = [
"aiodns", # Async DNS resolver for aiohttp
"aiohttp", # Async HTTP client for Azure SDKs
]
[tool.ruff]
target-version = "py311"
[tool.ruff.lint.isort]
combine-as-imports = true
[tool.ruff.format]
docstring-code-format = true
[tool.pyright]
pythonVersion = "3.11"