From 137281a1804fc21ff7a02ff040f63fc8f7011f56 Mon Sep 17 00:00:00 2001 From: Rongrong Date: Tue, 5 Nov 2024 02:19:34 +0800 Subject: [PATCH] refactor: move calc_next_check_as_per_server_side_cache into web.WebFeed Signed-off-by: Rongrong --- src/monitor/_monitor.py | 31 +------------------------------ src/web/utils.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/src/monitor/_monitor.py b/src/monitor/_monitor.py index 750bf1ff5f..e4ad81f3b6 100644 --- a/src/monitor/_monitor.py +++ b/src/monitor/_monitor.py @@ -313,7 +313,7 @@ async def _do_monitor_a_feed(self, feed: db.Feed, now: datetime): feed.etag = etag feed_updated_fields.add('etag') - new_next_check_time = _defer_next_check_as_per_server_side_cache(wf) + new_next_check_time = wf.calc_next_check_as_per_server_side_cache() if not rss_d.entries: # empty logger.debug(f'Fetched (not updated, empty): {feed.link}') @@ -362,32 +362,3 @@ async def _do_monitor_a_feed(self, feed: db.Feed, now: datetime): await Notifier(feed=feed, subs=subs, entries=updated_entries).notify_all() stat.updated() return - - -def _defer_next_check_as_per_server_side_cache(wf: web.WebFeed) -> Optional[datetime]: - wr = wf.web_response - assert wr is not None - expires = wr.expires - now = wr.now - - # defer next check as per Cloudflare cache - # https://developers.cloudflare.com/cache/concepts/cache-responses/ - # https://developers.cloudflare.com/cache/how-to/edge-browser-cache-ttl/ - if expires and wf.headers.get('cf-cache-status') in {'HIT', 'MISS', 'EXPIRED', 'REVALIDATED'} and expires > now: - return expires - - # defer next check as per RSSHub TTL (or Cache-Control max-age) - # only apply when TTL > 5min, - # as it is the default value of RSSHub and disabling cache won't change it in some legacy versions - rss_d = wf.rss_d - if rss_d.feed.get('generator') == 'RSSHub' and (updated_str := rss_d.feed.get('updated')): - ttl_in_minute_str: str = rss_d.feed.get('ttl', '') - ttl_in_second = int(ttl_in_minute_str) * 60 if ttl_in_minute_str.isdecimal() else None - if ttl_in_second is None: - ttl_in_second = wr.max_age - if ttl_in_second and ttl_in_second > 300: - updated = web.utils.rfc_2822_8601_to_datetime(updated_str) - if updated and (next_check_time := updated + timedelta(seconds=ttl_in_second)) > now: - return next_check_time - - return None diff --git a/src/web/utils.py b/src/web/utils.py index 4d7a0703ad..5814da00a2 100644 --- a/src/web/utils.py +++ b/src/web/utils.py @@ -221,6 +221,44 @@ class WebFeed: web_response: Optional[WebResponse] = None + def calc_next_check_as_per_server_side_cache(self) -> Optional[datetime]: + wr = self.web_response + if wr is None: + return None + now = wr.now + + # defer next check as per Cloudflare cache + # https://developers.cloudflare.com/cache/concepts/cache-responses/ + # https://developers.cloudflare.com/cache/how-to/edge-browser-cache-ttl/ + if ( + self.headers.get('cf-cache-status') in {'HIT', 'MISS', 'EXPIRED', 'REVALIDATED'} + and + wr.expires > now + ): + return wr.expires + + # defer next check as per RSSHub TTL (or Cache-Control max-age) + # only apply when TTL > 5min, + # as it is the default value of RSSHub and disabling cache won't change it in some legacy versions + rss_d = self.rss_d + if ( + rss_d.feed.get('generator') == 'RSSHub' + and + (updated_str := rss_d.feed.get('updated')) + ): + ttl_in_minute_str: str = rss_d.feed.get('ttl', '') + ttl_in_second = ( + int(ttl_in_minute_str) * 60 + if ttl_in_minute_str.isdecimal() + else wr.max_age + ) or -1 + if ttl_in_second > 300: + updated = rfc_2822_8601_to_datetime(updated_str) + if updated and (next_check_time := updated + timedelta(seconds=ttl_in_second)) > now: + return next_check_time + + return None + def proxy_filter(url: str, parse: bool = True) -> bool: if not (env.PROXY_BYPASS_PRIVATE or env.PROXY_BYPASS_DOMAINS):