Repository Analysis

scrapy/scrapy

Scrapy, a fast high-level web crawling & scraping framework for Python.

28.4 Moderate AI signal View on GitHub
28.4
Adjusted Score
28.4
Raw Score
100%
Time Factor
2026-05-20
Last Push
61,980
Stars
Python
Language
79,694
Lines of Code
477
Files
1655
Pattern Hits
2026-05-31
Scan Date

Score History

Severity Breakdown

CRITICAL 10HIGH 91MEDIUM 83LOW 1471

Pattern Findings

1655 matches across 15 categories. Click a row to expand file-level details.

Hyper-Verbose Identifiers1085 hits · 1057 pts
SeverityFileLineSnippet
LOWtests/test_request_dict.py75 def test_callback_serialization(self):
LOWtests/test_request_dict.py83 def test_reference_callback_serialization(self):
LOWtests/test_request_dict.py94 def test_private_reference_callback_serialization(self):
LOWtests/test_request_dict.py105 def test_private_callback_serialization(self):
LOWtests/test_request_dict.py113 def test_mixin_private_callback_serialization(self):
LOWtests/test_request_dict.py121 def test_delegated_callback_serialization(self):
LOWtests/test_request_dict.py156 def test_callback_not_available(self):
LOWtests/test_utils_signal.py134 def test_error_logged_if_deferred_not_supported(self):
LOWtests/test_http_response_text.py147 def test_declared_encoding_invalid(self):
LOWtests/test_http_response_text.py178 def test_bom_is_removed_from_body(self):
LOWtests/test_http_response_text.py206 def test_replace_wrong_encoding(self):
LOWtests/test_http_response_text.py257 def test_selector_shortcuts_kwargs(self):
LOWtests/test_http_response_text.py278 def test_urljoin_with_base_url(self):
LOWtests/test_http_response_text.py336 def test_follow_selector_list(self):
LOWtests/test_http_response_text.py341 def test_follow_selector_invalid(self):
LOWtests/test_http_response_text.py346 def test_follow_selector_attribute(self):
LOWtests/test_http_response_text.py351 def test_follow_selector_no_href(self):
LOWtests/test_http_response_text.py359 def test_follow_whitespace_selector(self):
LOWtests/test_http_response_text.py425 def test_follow_all_css_skip_invalid(self):
LOWtests/test_http_response_text.py446 def test_follow_all_xpath_skip_invalid(self):
LOWtests/test_http_response_text.py463 def test_follow_all_too_many_arguments(self):
LOWtests/test_http_response_text.py589 def test_selector_shortcuts_kwargs(self):
LOWtests/test_contracts.py91 def returns_request_cb_kwargs(self, response, url):
LOWtests/test_contracts.py107 def returns_item_cb_kwargs_error_unexpected_keyword(self, response):
LOWtests/test_contracts.py115 def returns_item_cb_kwargs_error_missing_argument(self, response, arg):
LOWtests/test_contracts.py175 def scrapes_multiple_missing_fields(self, response):
LOWtests/test_contracts.py199 def invalid_regex_with_valid_contract(self, response):
LOWtests/test_contracts.py223 def returns_error_missing_meta(self, response):
LOWtests/test_contracts.py542 def test_custom_tagged_request_contract(self):
LOWtests/test_contracts.py577 def test_pre_hook_keyboard_interrupt(self):
LOWtests/test_contracts.py591 def test_post_hook_keyboard_interrupt(self):
LOWtests/test_command_parse.py115 def parse_request_with_cb_kwargs(self, response, foo=None, key=None):
LOWtests/test_command_parse.py121 def parse_request_without_meta(self, response):
LOWtests/test_command_parse.py232 def test_request_with_cb_kwargs(
LOWtests/test_command_parse.py254 def test_request_without_meta(
LOWtests/test_command_parse.py283 def test_async_def_asyncio_parse_items_list(
LOWtests/test_command_parse.py299 def test_async_def_asyncio_parse_items_single_element(
LOWtests/test_command_parse.py314 def test_async_def_asyncgen_parse_loop(
LOWtests/test_command_parse.py330 def test_async_def_asyncgen_parse_exc(
LOWtests/test_command_parse.py346 def test_async_def_asyncio_parse(
LOWtests/test_command_parse.py372 def test_parse_items_no_callback_passed(
LOWtests/test_command_parse.py384 def test_wrong_callback_passed(
LOWtests/test_command_parse.py399 def test_crawlspider_matching_rule_callback_set(
LOWtests/test_command_parse.py413 def test_crawlspider_matching_rule_default_callback(
LOWtests/test_command_parse.py427 def test_spider_with_no_rules_attribute(
LOWtests/test_command_parse.py442 def test_crawlspider_missing_callback(
LOWtests/test_command_parse.py455 def test_crawlspider_no_matching_rule(
LOWtests/test_command_parse.py470 def test_crawlspider_not_exists_with_not_matched_url(
LOWtests/test_logstats.py59 def test_stats_calculations_no_time(self):
LOWtests/test_logstats.py68 def test_stats_calculation_no_elapsed_time(self):
LOWtests/test_http_request.py231 def test_copy_inherited_classes(self):
LOWtests/test_http_request.py269 def test_immutable_attributes(self):
LOWtests/test_http_request.py276 def test_callback_and_errback(self):
LOWtests/test_http_request.py308 def test_callback_and_errback_type(self):
LOWtests/test_http_request.py336 def test_setter_mutable_lazy_loading(self):
LOWtests/test_http_request.py441 def test_from_curl_with_kwargs(self):
LOWtests/test_http_request.py448 def test_from_curl_ignore_unknown_options(self):
LOWtests/test_command_genspider.py66 def test_same_name_as_project(self, proj_path: Path) -> None:
LOWtests/test_command_genspider.py73 def test_same_filename_as_existing_spider(
LOWtests/test_command_genspider.py167 def test_generate_standalone_spider(self, tmp_path: Path) -> None:
1025 more matches not shown…
Cross-Language Confusion44 hits · 285 pts
SeverityFileLineSnippet
HIGHtests/test_squeues_request.py54 q.push(req)
HIGHtests/test_squeues_request.py88 q.push(req1)
HIGHtests/test_squeues_request.py89 q.push(req2)
HIGHtests/test_squeues_request.py90 q.push(req3)
HIGHtests/test_pqueues.py65 queue.push(req1)
HIGHtests/test_pqueues.py66 queue.push(req2)
HIGHtests/test_pqueues.py67 queue.push(req3)
HIGHtests/test_pqueues.py89 queue.push(req1)
HIGHtests/test_pqueues.py90 queue.push(req2)
HIGHtests/test_pqueues.py91 queue.push(req3)
HIGHtests/test_pqueues.py119 self.queue.push(req1)
HIGHtests/test_pqueues.py120 self.queue.push(req2)
HIGHtests/test_pqueues.py121 self.queue.push(req3)
HIGHtests/test_pqueues.py148 self.queue.push(req1)
HIGHtests/test_pqueues.py149 self.queue.push(req2)
HIGHtests/test_pqueues.py150 self.queue.push(req3)
HIGHtests/test_pqueues.py30 queue.push(req1)
HIGHtests/test_pqueues.py45 queue.push(Request("https://example.org"))
HIGHtests/test_pqueues.py134 self.queue.push(Request("https://example.org"))
HIGHtests/test_pqueues.py175 self.queue.push(request)
HIGHtests/test_pqueues.py199 self.queue.push(request)
HIGHtests/test_pqueues.py255 queue.push(request)
HIGHtests/test_squeues.py38 q.push(lambda x: x)
HIGHtests/test_squeues.py44 q.push(sel)
HIGHtests/test_squeues.py50 q.push("a")
HIGHtests/test_squeues.py51 q.push(123)
HIGHtests/test_squeues.py52 q.push({"a": "dict"})
HIGHtests/test_squeues.py92 q.push(i)
HIGHtests/test_squeues.py100 q.push(loader)
HIGHtests/test_squeues.py110 q.push(r)
HIGHtests/test_squeues.py122 q.push(lambda x: x)
HIGHtests/test_squeues.py131 q.push(sel)
HIGHtests/test_squeues.py155 q.push("a")
HIGHtests/test_squeues.py156 q.push(123)
HIGHtests/test_squeues.py157 q.push({"a": "dict"})
HIGHtests/test_squeues.py177 q.push(i)
HIGHtests/test_squeues.py185 q.push(loader)
HIGHtests/test_squeues.py195 q.push(r)
HIGHtests/test_settings/__init__.py898 {"FOO": '{"tests.test_settings.Component1": null}'},
HIGHscrapy/pqueues.py180 q.push(request) # this may fail (eg. serialization error)
HIGHscrapy/pqueues.py414 queue.push(request)
HIGHscrapy/core/scheduler.py418 self.dqs.push(request)
HIGHscrapy/core/scheduler.py439 self.mqs.push(request)
HIGHscrapy/spidermiddlewares/referer.py74 If url is null, return no referrer.
Unused Imports271 hits · 268 pts
SeverityFileLineSnippet
LOWconftest.py1
LOWconftest.py54
LOWtests/test_http_response_text.py1
LOWtests/test_command_parse.py1
LOWtests/test_command_genspider.py1
LOWtests/test_engine.py1
LOWtests/test_engine.py44
LOWtests/test_zz_resources.py3
LOWtests/test_spidermiddleware.py1
LOWtests/test_downloader_handler_twisted_http11.py3
LOWtests/test_stats.py1
LOWtests/test_linkextractors.py1
LOWtests/test_command_runspider.py1
LOWtests/test_loader.py1
LOWtests/test_middleware.py1
LOWtests/test_http_request_json.py1
LOWtests/test_http2_client_protocol.py1
LOWtests/test_utils_iterators.py1
LOWtests/test_utils_log.py1
LOWtests/test_spider_sitemap.py1
LOWtests/test_spidermiddleware_urllength.py1
LOWtests/test_downloader_handler_twisted_http2.py3
LOWtests/test_feedexport_postprocess.py1
LOWtests/test_feedexport_uri_params.py1
LOWtests/test_spidermiddleware_referer.py1
LOWtests/spiders.py5
LOWtests/test_downloadermiddleware_robotstxt.py1
LOWtests/test_crawler_subprocess.py1
LOWtests/test_robotstxt_interface.py16
LOWtests/test_engine_stop_download_bytes.py1
LOWtests/test_http_request_form.py1
LOWtests/test_core_downloader.py1
LOWtests/test_core_downloader.py40
LOWtests/test_scheduler_base.py1
LOWtests/test_downloadermiddleware.py1
LOWtests/test_spider.py1
LOWtests/test_squeues_request.py5
LOWtests/test_spidermiddleware_base.py1
LOWtests/test_scheduler.py1
LOWtests/test_downloader_handler_httpx.py3
LOWtests/test_extension_periodic_log.py1
LOWtests/test_utils_defer.py1
LOWtests/test_spider_crawl.py1
LOWtests/test_downloadermiddleware_httpcompression.py69
LOWtests/test_downloadermiddleware_httpcompression.py214
LOWtests/test_crawl.py1
LOWtests/test_downloadermiddleware_redirect_metarefresh.py1
LOWtests/test_downloader_handlers.py3
LOWtests/test_downloader_handlers.py195
LOWtests/test_downloader_handlers_http_base.py3
LOWtests/test_utils_python.py1
LOWtests/test_command_shell.py1
LOWtests/test_pipeline_images.py1
LOWtests/test_pipeline_crawl.py1
LOWtests/test_pipeline_crawl.py223
LOWtests/test_commands.py1
LOWtests/test_feedexport_batch.py1
LOWtests/test_spidermiddleware_depth.py1
LOWtests/test_pipeline_files.py706
LOWtests/test_feedexport_storages.py1
211 more matches not shown…
Cross-File Repetition47 hits · 235 pts
SeverityFileLineSnippet
HIGHtests/test_http_response_text.py0<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>
HIGHtests/test_http_response_text.py0<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>
HIGHtests/test_http_response_text.py0<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>
HIGHtests/test_contracts.py0method which returns item @url http://scrapy.org @returns items 1 1
HIGHtests/test_contracts.py0method which returns item @url http://scrapy.org @returns items 1 1
HIGHtests/test_contracts.py0method which returns item @url http://scrapy.org @returns items 1 1
HIGHtests/test_contracts.py0returns item with no name @url http://scrapy.org @returns items 1 1 @scrapes name url
HIGHtests/test_contracts.py0returns item with no name @url http://scrapy.org @returns items 1 1 @scrapes name url
HIGHtests/test_contracts.py0returns item with no name @url http://scrapy.org @returns items 1 1 @scrapes name url
HIGHtests/test_linkextractors.py0<div> <a href="/a">a1</a> <a href="/b?a=1&b=2">b1</a> </div> <div> <a href="/a">a2</a> <a href="/b?b=2&a=1">b2</a> </div
HIGHtests/test_linkextractors.py0<div> <a href="/a">a1</a> <a href="/b?a=1&b=2">b1</a> </div> <div> <a href="/a">a2</a> <a href="/b?b=2&a=1">b2</a> </div
HIGHtests/test_linkextractors.py0<div> <a href="/a">a1</a> <a href="/b?a=1&b=2">b1</a> </div> <div> <a href="/a">a2</a> <a href="/b?b=2&a=1">b2</a> </div
HIGHtests/test_linkextractors.py0<div> <a href="/a">a1</a> <a href="/b?a=1&b=2">b1</a> </div> <div> <a href="/a">a2</a> <a href="/b?b=2&a=1">b2</a> </div
HIGHtests/test_command_runspider.py0import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('feeds: {}'.form
HIGHtests/test_command_runspider.py0import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('feeds: {}'.form
HIGHtests/test_command_crawl.py0import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('feeds: {}'.form
HIGHtests/test_loader.py0values added after initialization should be appended
HIGHtests/test_loader.py0values added after initialization should be appended
HIGHtests/test_loader.py0values added after initialization should be appended
HIGHtests/test_loader.py0values added after initialization should be appended
HIGHtests/test_utils_iterators.py0<?xml version="1.0" encoding="utf-8"?> <rss version="2.0" xmlns:g="http://base.google.com/ns/1.0"> <channel> <title>my d
HIGHtests/test_utils_iterators.py0<?xml version="1.0" encoding="utf-8"?> <rss version="2.0" xmlns:g="http://base.google.com/ns/1.0"> <channel> <title>my d
HIGHtests/test_utils_iterators.py0<?xml version="1.0" encoding="utf-8"?> <rss version="2.0" xmlns:g="http://base.google.com/ns/1.0"> <channel> <title>my d
HIGHtests/test_http_request_form.py0<form action="get.php" method="post"> <input type="hidden" name="one" value="1"> <input type="hidden" name="two" value="
HIGHtests/test_http_request_form.py0<form action="get.php" method="post"> <input type="hidden" name="one" value="1"> <input type="hidden" name="two" value="
HIGHtests/test_http_request_form.py0<form action="get.php" method="post"> <input type="hidden" name="one" value="1"> <input type="hidden" name="two" value="
HIGHtests/test_http_request_form.py0<form action="get.php" method="get"> <input type="submit" name="clickable1" value="clicked1"> <input type="hidden" name=
HIGHtests/test_http_request_form.py0<form action="get.php" method="get"> <input type="submit" name="clickable1" value="clicked1"> <input type="hidden" name=
HIGHtests/test_http_request_form.py0<form action="get.php" method="get"> <input type="submit" name="clickable1" value="clicked1"> <input type="hidden" name=
HIGHtests/test_http_request_form.py0<form action="get.php" method="get"> <input type="submit" name="clickable1" value="clicked1"> <input type="hidden" name=
HIGHtests/test_command_crawl.py0import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('it works!') ret
HIGHtests/test_command_crawl.py0import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('it works!') ret
HIGHtests/test_command_crawl.py0import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('it works!') ret
HIGHtests/test_feedexport.py0<?xml version="1.0" encoding="utf-8"?> <items> <item> <foo> <value>bar</value> </foo> </item> <item> <key>value</key> </
HIGHtests/test_feedexport.py0<?xml version="1.0" encoding="utf-8"?> <items> <item> <foo> <value>bar</value> </foo> </item> <item> <key>value</key> </
HIGHtests/test_feedexport.py0<?xml version="1.0" encoding="utf-8"?> <items> <item> <foo> <value>bar</value> </foo> </item> <item> <key>value</key> </
HIGHtests/test_settings/__init__.py0{ "tests.test_settings.component1": 0, "tests.test_settings.component1alias": 1, "tests.test_settings.component1subclass
HIGHtests/test_settings/__init__.py0{ "tests.test_settings.component1": 0, "tests.test_settings.component1alias": 1, "tests.test_settings.component1subclass
HIGHtests/test_settings/__init__.py0{ "tests.test_settings.component1": 0, "tests.test_settings.component1alias": 1, "tests.test_settings.component1subclass
HIGHscrapy/pqueues.py0returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement
HIGHscrapy/pqueues.py0returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement
HIGHscrapy/squeues.py0returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement
HIGHscrapy/squeues.py0returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement
HIGHscrapy/squeues.py0returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement
HIGHscrapy/http/response/__init__.py0shortcut method implemented only by responses whose content is text (subclasses of textresponse).
HIGHscrapy/http/response/__init__.py0shortcut method implemented only by responses whose content is text (subclasses of textresponse).
HIGHscrapy/http/response/__init__.py0shortcut method implemented only by responses whose content is text (subclasses of textresponse).
Hallucination Indicators10 hits · 125 pts
SeverityFileLineSnippet
CRITICALtests/test_engine_loop.py38 self.crawler.engine._slot.scheduler.pause()
CRITICALtests/test_engine_loop.py39 self.crawler.engine._slot.scheduler.enqueue_request(Request("data:,b"))
CRITICALtests/test_engine_loop.py45 self.crawler.engine._slot.scheduler.unpause()
CRITICALtests/test_engine_loop.py54 self.crawler.engine._slot.scheduler.pause()
CRITICALtests/test_engine_loop.py55 self.crawler.engine._slot.scheduler.enqueue_request(Request("data:,d"))
CRITICALtests/test_engine_loop.py203 spider.crawler.engine._slot.scheduler.enqueue_request(request)
CRITICALtests/test_engine_loop.py243 spider.crawler.engine._slot.scheduler.enqueue_request(request)
CRITICALtests/test_engine_loop.py291 spider.crawler.engine._slot.scheduler.enqueue_request(request)
CRITICALtests/test_engine_loop.py304 spider.crawler.engine._slot.scheduler.enqueue_request(request)
CRITICALscrapy/extensions/throttle.py102 return key, self.crawler.engine.downloader.slots.get(key)
Dead Code59 hits · 118 pts
SeverityFileLineSnippet
MEDIUMtests/test_engine.py628
MEDIUMtests/test_spidermiddleware.py77
MEDIUMtests/test_utils_python.py41
MEDIUMtests/test_crawler.py666
MEDIUMtests/test_crawler.py190
MEDIUMtests/test_crawler.py270
MEDIUMtests/test_crawler.py350
MEDIUMtests/test_crawler.py430
MEDIUMtests/test_crawler.py534
MEDIUMtests/test_engine_loop.py210
MEDIUMtests/test_engine_loop.py250
MEDIUMtests/test_engine_loop.py308
MEDIUMtests/AsyncCrawlerRunner/reactorless_simple.py15
MEDIUMtests/AsyncCrawlerRunner/custom_loop_same.py20
MEDIUMtests/AsyncCrawlerRunner/reactorless_custom_settings.py18
MEDIUMtests/AsyncCrawlerRunner/custom_loop_different.py20
MEDIUMtests/AsyncCrawlerRunner/multi_parallel.py15
MEDIUMtests/AsyncCrawlerRunner/reactorless_reactor.py14
MEDIUMtests/AsyncCrawlerRunner/no_reactor.py13
MEDIUMtests/AsyncCrawlerRunner/simple_default_reactor.py14
MEDIUMtests/AsyncCrawlerRunner/simple.py17
MEDIUMtests/AsyncCrawlerRunner/multi_seq.py15
MEDIUMtests/CrawlerProcess/asyncio_enabled_no_reactor.py17
MEDIUMtests/CrawlerProcess/args_settings.py19
MEDIUMtests/CrawlerProcess/reactor_select.py15
MEDIUMtests/CrawlerProcess/multi.py10
MEDIUMtests/CrawlerProcess/asyncio_enabled_reactor.py53
MEDIUM…ts/CrawlerProcess/asyncio_enabled_reactor_same_loop.py22
MEDIUM…awlerProcess/reactor_default_twisted_reactor_select.py13
MEDIUM…awlerProcess/asyncio_enabled_reactor_different_loop.py20
MEDIUM…cess/reactor_select_subclass_twisted_reactor_select.py22
MEDIUMtests/CrawlerProcess/reactor_default.py13
MEDIUMtests/CrawlerProcess/simple.py12
MEDIUM…rawlerProcess/reactor_select_twisted_reactor_select.py14
MEDIUMtests/CrawlerProcess/asyncio_custom_loop.py10
MEDIUMtests/CrawlerRunner/explicit_default_reactor.py17
MEDIUMtests/CrawlerRunner/custom_loop_same.py19
MEDIUMtests/CrawlerRunner/change_reactor.py15
MEDIUMtests/CrawlerRunner/custom_loop_different.py19
MEDIUMtests/CrawlerRunner/multi_parallel.py14
MEDIUMtests/CrawlerRunner/no_reactor.py13
MEDIUMtests/CrawlerRunner/simple.py16
MEDIUMtests/CrawlerRunner/multi_seq.py15
MEDIUMtests/AsyncCrawlerProcess/asyncio_enabled_no_reactor.py17
MEDIUMtests/AsyncCrawlerProcess/reactorless_simple.py12
MEDIUMtests/AsyncCrawlerProcess/args_settings.py19
MEDIUM…sts/AsyncCrawlerProcess/reactorless_custom_settings.py23
MEDIUMtests/AsyncCrawlerProcess/multi.py10
MEDIUMtests/AsyncCrawlerProcess/asyncio_enabled_reactor.py43
MEDIUM…yncCrawlerProcess/asyncio_enabled_reactor_same_loop.py21
MEDIUM…ncCrawlerProcess/reactorless_telnetconsole_disabled.py10
MEDIUM…ocess/asyncio_custom_loop_custom_settings_different.py13
MEDIUM…lerProcess/asyncio_custom_loop_custom_settings_same.py13
MEDIUM…awlerProcess/asyncio_enabled_reactor_different_loop.py19
MEDIUM…yncCrawlerProcess/reactorless_telnetconsole_enabled.py10
MEDIUMtests/AsyncCrawlerProcess/reactorless_import_hook.py12
MEDIUMtests/AsyncCrawlerProcess/reactor_default.py12
MEDIUMtests/AsyncCrawlerProcess/simple.py12
MEDIUMtests/AsyncCrawlerProcess/asyncio_custom_loop.py10
Excessive Try-Catch Wrapping61 hits · 69 pts
SeverityFileLineSnippet
LOWtests/test_downloadermiddleware_httpcache.py260 except Exception:
LOWtests/test_pipeline_media.py115 except Exception as exc:
MEDIUMtests/test_crawler.py234def from_crawler(cls, crawler):
MEDIUMtests/test_crawler.py314def from_crawler(cls, crawler):
MEDIUMtests/test_crawler.py394def from_crawler(cls, crawler):
MEDIUMtests/test_crawler.py474def from_crawler(cls, crawler):
LOWtests/test_crawler.py237 except Exception as e:
LOWtests/test_crawler.py317 except Exception as e:
LOWtests/test_crawler.py397 except Exception as e:
LOWtests/test_crawler.py477 except Exception as e:
LOW…awlerProcess/twisted_reactor_custom_settings_select.py23 except Exception:
LOW…sts/AsyncCrawlerProcess/reactorless_custom_settings.py29 except Exception:
LOWscrapy/crawler.py194 except Exception:
LOWscrapy/crawler.py224 except Exception:
LOWscrapy/crawler.py466 except Exception:
LOWscrapy/crawler.py577 except Exception:
LOWscrapy/downloadermiddlewares/robotstxt.py102 except Exception as e:
LOWscrapy/core/scraper.py226 except Exception:
LOWscrapy/core/scraper.py258 except Exception:
LOWscrapy/core/scraper.py267 except Exception as spider_exc:
LOWscrapy/core/scraper.py290 except Exception:
LOWscrapy/core/scraper.py513 except Exception as ex:
LOWscrapy/core/spidermw.py96 except Exception:
LOWscrapy/core/spidermw.py110 except Exception as ex:
LOWscrapy/core/spidermw.py226 except Exception as ex:
LOWscrapy/core/engine.py150 except Exception:
LOWscrapy/core/engine.py280 except Exception as exception:
LOWscrapy/core/engine.py320 except Exception:
LOWscrapy/core/engine.py414 except Exception:
LOWscrapy/core/engine.py618 except Exception:
LOWscrapy/core/engine.py623 except Exception:
LOWscrapy/core/engine.py630 except Exception:
LOWscrapy/core/engine.py639 except Exception:
LOWscrapy/core/engine.py650 except Exception:
LOWscrapy/core/engine.py671 except Exception:
LOWscrapy/core/engine.py685 except Exception:
LOWscrapy/core/downloader/__init__.py238 except Exception:
LOWscrapy/core/downloader/__init__.py259 except Exception:
LOWscrapy/core/downloader/middleware.py83 except Exception as ex:
LOWscrapy/core/downloader/handlers/_base_streaming.py222 except Exception as e:
LOWscrapy/core/downloader/handlers/__init__.py110 except Exception as ex:
LOWscrapy/contracts/__init__.py48 except Exception:
LOWscrapy/contracts/__init__.py78 except Exception:
LOWscrapy/contracts/__init__.py131 except Exception:
LOWscrapy/contracts/__init__.py187 except Exception:
LOWscrapy/pipelines/files.py416 except Exception:
LOWscrapy/pipelines/files.py667 except Exception as exc:
LOWscrapy/pipelines/media.py194 except Exception:
LOWscrapy/pipelines/media.py216 except Exception:
LOWscrapy/utils/deprecate.py128 except Exception as e:
LOWscrapy/utils/signal.py64 except Exception:
LOWscrapy/utils/signal.py202 except Exception as ex:
LOWscrapy/utils/asyncio.py206 except Exception:
LOWscrapy/utils/engine.py36 except Exception as e:
LOWscrapy/utils/defer.py154 except Exception:
LOWscrapy/utils/defer.py360 except Exception:
LOWscrapy/utils/defer.py379 except Exception:
LOWscrapy/extensions/feedexport.py563 except Exception:
LOWscrapy/extensions/httpcache.py418 except Exception:
MEDIUMscrapy/commands/startproject.py56 print(f"Error: Module {project_name!r} already exists")
1 more matches not shown…
Over-Commented Block25 hits · 25 pts
SeverityFileLineSnippet
LOWtests/test_http_request.py141 # See https://datatracker.ietf.org/doc/html/rfc3987#section-3.2
LOWdocs/requirements.txt1# This file was autogenerated by uv via the following command:
LOWdocs/requirements.txt21constantly==23.10.4
LOWdocs/requirements.txt41h2==4.3.0
LOWdocs/requirements.txt61 # scrapy
LOWdocs/requirements.txt81 # scrapy-spider-metadata
LOWdocs/requirements.txt101 # scrapy-spider-metadata
LOWdocs/requirements.txt121scrapy==2.14.2
LOWdocs/requirements.txt141 # via sphinx-scrapy
LOWdocs/requirements.txt181 # via
LOWdocs/conf.py101
LOWdocs/conf.py121 # Base classes of downloader middlewares are implementation details that
LOWscrapy/shell.py41 from collections.abc import Callable
LOWscrapy/shell.py61# Thus the only thing Shell needs an event loop for is fetch(). More machinery
LOWscrapy/shell.py81#
LOWscrapy/cmdline.py241 # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() on exit:
LOWscrapy/crawler.py901 # 2. _run_loop() calls loop.run_until_complete(main_task)
LOWscrapy/crawler.py921 # 3. _shutdown_graceful_reactorless() calls stop()
LOWscrapy/core/http2/protocol.py121
LOWscrapy/pipelines/media.py241 # the Response status code is not 200 OK, is that the original
LOWscrapy/http/request/__init__.py161 if not (callable(errback) or errback is None):
LOWscrapy/http/request/__init__.py181 #: <topics-spider-middleware>`, :ref:`downloader middleware
LOWscrapy/http/request/__init__.py201 self.errback: Callable[[Failure], Any] | None = errback
LOWscrapy/http/request/__init__.py221 #: - :class:`~scrapy.downloadermiddlewares.offsite.OffsiteMiddleware`
LOWscrapy/templates/project/module/spiders/__init__.py1# This package will contain the spiders of your Scrapy project
Decorative Section Separators8 hits · 24 pts
SeverityFileLineSnippet
MEDIUMtests/test_spidermiddleware_output_chain.py25# ================================================================================
MEDIUMtests/test_spidermiddleware_output_chain.py65# ================================================================================
MEDIUMtests/test_spidermiddleware_output_chain.py104# ================================================================================
MEDIUMtests/test_spidermiddleware_output_chain.py130# ================================================================================
MEDIUMtests/test_spidermiddleware_output_chain.py141# ================================================================================
MEDIUMtests/test_spidermiddleware_output_chain.py158# ================================================================================
MEDIUMtests/test_spidermiddleware_output_chain.py169# ================================================================================
MEDIUMtests/test_spidermiddleware_output_chain.py240# ================================================================================
Deep Nesting21 hits · 21 pts
SeverityFileLineSnippet
LOWtests/test_engine.py323
LOWtests/test_downloadermiddleware_httpcache.py433
LOWdocs/utils/linkfix.py20
LOWscrapy/pqueues.py184
LOWscrapy/pqueues.py358
LOWscrapy/downloadermiddlewares/httpproxy.py81
LOWscrapy/core/http2/protocol.py376
LOWscrapy/core/http2/stream.py209
LOWscrapy/core/http2/stream.py397
LOWscrapy/spiders/feed.py74
LOWscrapy/pipelines/media.py129
LOWscrapy/utils/sitemap.py106
LOWscrapy/utils/sitemap.py62
LOWscrapy/utils/request.py35
LOWscrapy/utils/iterators.py23
LOWscrapy/utils/iterators.py81
LOWscrapy/utils/ssl.py99
LOWscrapy/http/request/form.py124
LOWscrapy/commands/check.py73
LOWscrapy/commands/settings.py49
LOWscrapy/commands/parse.py305
Self-Referential Comments8 hits · 16 pts
SeverityFileLineSnippet
MEDIUMscrapy/core/scraper.py1"""This module implements the Scraper component which parses responses and
MEDIUMscrapy/core/spidermw.py249 # This method is only needed until _async compatibility methods are removed.
MEDIUMscrapy/core/downloader/tls.py118 """This method is needed to override the verify callback."""
MEDIUMscrapy/core/http2/agent.py55 # Create a deferred which will fire with the H2ClientProtocol
MEDIUMscrapy/spiders/feed.py63 """This method is called for the nodes matching the provided tag name
MEDIUMscrapy/utils/trackref.py1"""This module provides some functions and classes to record and report
MEDIUMscrapy/commands/shell.py98 # Create the engine and run start_async() in the main thread
MEDIUMscrapy/commands/shell.py104 # Create the engine and run start_async() in the event loop thread
Redundant / Tautological Comments6 hits · 9 pts
SeverityFileLineSnippet
LOWtests/test_http2_client_protocol.py381 # Check if headers were sent successfully
LOWscrapy/core/http2/stream.py372 # Check if we exceed the allowed max data size which can be received
LOWscrapy/core/http2/agent.py61 # Check if we already have a connection to the remote
LOWscrapy/pipelines/media.py176 # Check if request is downloading right now to avoid doing it twice
LOWscrapy/utils/asyncio.py73 # Check if there is a running asyncio loop.
LOWscrapy/utils/asyncio.py84 # Check if there is an installed asyncio reactor (it doesn't need to be
AI Slop Vocabulary4 hits · 5 pts
SeverityFileLineSnippet
LOWtests/mockserver/http_resources.py368 # just set this to trigger a test failure if no valid accept-encoding header was set
MEDIUMscrapy/spiders/feed.py50 """You can override this function in order to make any changes you want
MEDIUMscrapy/utils/signal.py42 """Like ``pydispatcher.robust.sendRobust()`` but it also logs errors and returns
LOWscrapy/http/request/form.py247 # If we don't have clickdata, we just use the first clickable element
Fake / Example Data3 hits · 4 pts
SeverityFileLineSnippet
LOWtests/test_item.py57 i["name"] = "John Doe"
LOWtests/test_item.py61 assert itemrepr == "{'name': 'John Doe', 'number': 123}"
LOWtests/test_item.py64 assert i2["name"] == "John Doe"
Overly Generic Function Names3 hits · 2 pts
SeverityFileLineSnippet
LOW…ils_misc/test_return_with_argument_inside_generator.py74 def helper():
LOW…ils_misc/test_return_with_argument_inside_generator.py137 def helper():
LOW…ils_misc/test_return_with_argument_inside_generator.py216 def helper():