Skip to content

Commit

Permalink
perf(web): avoid CookieJar's overhead if no cookie
Browse files Browse the repository at this point in the history
See also aio-libs/aiohttp#7583

Signed-off-by: Rongrong <[email protected]>
  • Loading branch information
Rongronggg9 committed Nov 12, 2023
1 parent c5a6f02 commit 664da37
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/web/req.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ..compat import nullcontext, ssl_create_default_context, AiohttpUvloopTransportHotfix
from ..aio_helper import run_async
from ..errors_collection import RetryInIpv4
from .utils import WebResponse, proxy_filter, logger, sentinel
from .utils import YummyCookieJar, WebResponse, proxy_filter, logger, sentinel

DEFAULT_READ_BUFFER_SIZE: Final = 2 ** 16

Expand Down Expand Up @@ -132,7 +132,7 @@ async def _get(url: str, resp_callback: Callable, timeout: Optional[float] = sen

async def _fetch():
async with aiohttp.ClientSession(connector=proxy_connector, timeout=aiohttp.ClientTimeout(total=timeout),
headers=_headers) as session:
headers=_headers, cookie_jar=YummyCookieJar()) as session:
async with session.get(url, read_bufsize=read_bufsize, read_until_eof=read_until_eof) as response:
async with AiohttpUvloopTransportHotfix(response):
status_url_history = [(resp.status, resp.url) for resp in response.history]
Expand Down
43 changes: 43 additions & 0 deletions src/web/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from typing import Union, Optional, AnyStr
from typing_extensions import Final

import aiohttp
import aiohttp.abc
import feedparser
from contextlib import suppress
from dataclasses import dataclass
Expand All @@ -24,6 +26,47 @@
sentinel = object()


class YummyCookieJar(aiohttp.abc.AbstractCookieJar):
"""
A cookie jar that acts as a DummyCookieJar in the initial state.
Then it only switches to CookieJar when there is any cookie (``update_cookies`` is called).
In our use case, it is common that the response does not contain any cookie, as we mostly fetch RSS feeds and
multimedia files.
As a result, the cookie jar is mostly empty, and the overhead of CookieJar, which is expensive, is unnecessary.
So it is expected that YummyCookieJar will seldom switch to CookieJar, acting as a DummyCookieJar most of the time.
See also https://github.com/aio-libs/aiohttp/issues/7583
"""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__real_cookie_jar = aiohttp.DummyCookieJar(*args, **kwargs)
self.__init_args = args
self.__init_kwargs = kwargs
self.__is_dummy = True

def update_cookies(self, *args, **kwargs):
if self.__is_dummy:
self.__real_cookie_jar = aiohttp.CookieJar(*self.__init_args, **self.__init_kwargs)
self.__is_dummy = False
return self.__real_cookie_jar.update_cookies(*args, **kwargs)

def __iter__(self):
return self.__real_cookie_jar.__iter__()

def __len__(self) -> int:
return self.__real_cookie_jar.__len__()

def clear(self, *args, **kwargs):
return self.__real_cookie_jar.clear(*args, **kwargs)

def clear_domain(self, *args, **kwargs):
return self.__real_cookie_jar.clear_domain(*args, **kwargs)

def filter_cookies(self, *args, **kwargs):
return self.__real_cookie_jar.filter_cookies(*args, **kwargs)


class WebError(Exception):
def __init__(self, error_name: str, status: Union[int, str] = None, url: str = None,
base_error: Exception = None, hide_base_error: bool = False, log_level: int = log.DEBUG):
Expand Down

0 comments on commit 664da37

Please sign in to comment.