From f61541de56399e6f429b737d30378a90d0e6fdc2 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Tue, 9 Jun 2026 11:47:05 +0200 Subject: [PATCH 1/2] fix(scrapy): make logging configuration idempotent --- src/apify/scrapy/_logging_config.py | 32 +++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/apify/scrapy/_logging_config.py b/src/apify/scrapy/_logging_config.py index fca3d4f7b..bcdb0e7ef 100644 --- a/src/apify/scrapy/_logging_config.py +++ b/src/apify/scrapy/_logging_config.py @@ -13,6 +13,12 @@ _SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'protego', 'twisted'] _ALL_LOGGERS = _PRIMARY_LOGGERS + _SUPPLEMENTAL_LOGGERS +# Mutable module state shared with the Scrapy logging monkey-patch installed by `initialize_logging`. +# `initialize_logging` refreshes `level`/`handler` on each call, and the patch (installed at most +# once) reads them so it always re-applies the latest configuration instead of values captured the +# first time it ran. Stored in a dict so the patch can read them without rebinding module globals. +_state: dict[str, Any] = {'level': 'INFO', 'handler': None, 'patched': False} + def _configure_logger(name: str | None, logging_level: str, handler: logging.Handler) -> None: """Clear and reconfigure the logger.""" @@ -23,26 +29,40 @@ def _configure_logger(name: str | None, logging_level: str, handler: logging.Han logger.propagate = False +def _configure_all_loggers() -> None: + """Apply the Apify handler and level to the root logger and all defined loggers.""" + handler = _state['handler'] + if handler is None: + return + for logger_name in [None, *_ALL_LOGGERS]: + _configure_logger(logger_name, _state['level'], handler) + + def initialize_logging() -> None: """Configure logging for Apify Actors and adjust Scrapy's logging settings.""" # Retrieve Scrapy project settings and determine the logging level. settings = get_project_settings() - logging_level = settings.get('LOG_LEVEL', 'INFO') # Default to INFO. + _state['level'] = settings.get('LOG_LEVEL', 'INFO') # Default to INFO. # Create a custom handler with the Apify log formatter. handler = logging.StreamHandler() handler.setFormatter(ActorLogFormatter(include_logger_name=True)) + _state['handler'] = handler # Configure the root logger and all other defined loggers. - for logger_name in [None, *_ALL_LOGGERS]: - _configure_logger(logger_name, logging_level, handler) + _configure_all_loggers() + + # Monkey-patch Scrapy's logging configuration to re-apply our settings whenever Scrapy + # reconfigures logging. Install the wrapper at most once; wrapping again on every call would + # nest wrappers on top of each other. + if _state['patched']: + return - # Monkey-patch Scrapy's logging configuration to re-apply our settings. original_configure_logging = scrapy_logging.configure_logging def new_configure_logging(*args: Any, **kwargs: Any) -> None: original_configure_logging(*args, **kwargs) - for logger_name in [None, *_ALL_LOGGERS]: - _configure_logger(logger_name, logging_level, handler) + _configure_all_loggers() scrapy_logging.configure_logging = new_configure_logging # ty: ignore[invalid-assignment] + _state['patched'] = True From 4c2c22c44a5f03a801844dc814f16dadb33a6f01 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Tue, 9 Jun 2026 12:55:48 +0200 Subject: [PATCH 2/2] style(scrapy): tighten comments and docstrings --- src/apify/scrapy/_logging_config.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/apify/scrapy/_logging_config.py b/src/apify/scrapy/_logging_config.py index bcdb0e7ef..2f51074a9 100644 --- a/src/apify/scrapy/_logging_config.py +++ b/src/apify/scrapy/_logging_config.py @@ -13,10 +13,9 @@ _SUPPLEMENTAL_LOGGERS = ['filelock', 'hpack', 'httpcore', 'protego', 'twisted'] _ALL_LOGGERS = _PRIMARY_LOGGERS + _SUPPLEMENTAL_LOGGERS -# Mutable module state shared with the Scrapy logging monkey-patch installed by `initialize_logging`. -# `initialize_logging` refreshes `level`/`handler` on each call, and the patch (installed at most -# once) reads them so it always re-applies the latest configuration instead of values captured the -# first time it ran. Stored in a dict so the patch can read them without rebinding module globals. +# Mutable state shared with the Scrapy monkey-patch below. `initialize_logging` refreshes +# `level`/`handler` on each call; the patch (installed once) reads them so it always applies the +# latest configuration rather than values captured the first time it ran. _state: dict[str, Any] = {'level': 'INFO', 'handler': None, 'patched': False} @@ -52,9 +51,8 @@ def initialize_logging() -> None: # Configure the root logger and all other defined loggers. _configure_all_loggers() - # Monkey-patch Scrapy's logging configuration to re-apply our settings whenever Scrapy - # reconfigures logging. Install the wrapper at most once; wrapping again on every call would - # nest wrappers on top of each other. + # Monkey-patch Scrapy's logging to re-apply our settings whenever it reconfigures logging. + # Install the wrapper at most once, otherwise repeated calls would nest wrappers. if _state['patched']: return