Scrapy, a fast high-level web crawling & scraping framework for Python.
1655 matches across 15 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_request_dict.py | 75 | def test_callback_serialization(self): |
| LOW | tests/test_request_dict.py | 83 | def test_reference_callback_serialization(self): |
| LOW | tests/test_request_dict.py | 94 | def test_private_reference_callback_serialization(self): |
| LOW | tests/test_request_dict.py | 105 | def test_private_callback_serialization(self): |
| LOW | tests/test_request_dict.py | 113 | def test_mixin_private_callback_serialization(self): |
| LOW | tests/test_request_dict.py | 121 | def test_delegated_callback_serialization(self): |
| LOW | tests/test_request_dict.py | 156 | def test_callback_not_available(self): |
| LOW | tests/test_utils_signal.py | 134 | def test_error_logged_if_deferred_not_supported(self): |
| LOW | tests/test_http_response_text.py | 147 | def test_declared_encoding_invalid(self): |
| LOW | tests/test_http_response_text.py | 178 | def test_bom_is_removed_from_body(self): |
| LOW | tests/test_http_response_text.py | 206 | def test_replace_wrong_encoding(self): |
| LOW | tests/test_http_response_text.py | 257 | def test_selector_shortcuts_kwargs(self): |
| LOW | tests/test_http_response_text.py | 278 | def test_urljoin_with_base_url(self): |
| LOW | tests/test_http_response_text.py | 336 | def test_follow_selector_list(self): |
| LOW | tests/test_http_response_text.py | 341 | def test_follow_selector_invalid(self): |
| LOW | tests/test_http_response_text.py | 346 | def test_follow_selector_attribute(self): |
| LOW | tests/test_http_response_text.py | 351 | def test_follow_selector_no_href(self): |
| LOW | tests/test_http_response_text.py | 359 | def test_follow_whitespace_selector(self): |
| LOW | tests/test_http_response_text.py | 425 | def test_follow_all_css_skip_invalid(self): |
| LOW | tests/test_http_response_text.py | 446 | def test_follow_all_xpath_skip_invalid(self): |
| LOW | tests/test_http_response_text.py | 463 | def test_follow_all_too_many_arguments(self): |
| LOW | tests/test_http_response_text.py | 589 | def test_selector_shortcuts_kwargs(self): |
| LOW | tests/test_contracts.py | 91 | def returns_request_cb_kwargs(self, response, url): |
| LOW | tests/test_contracts.py | 107 | def returns_item_cb_kwargs_error_unexpected_keyword(self, response): |
| LOW | tests/test_contracts.py | 115 | def returns_item_cb_kwargs_error_missing_argument(self, response, arg): |
| LOW | tests/test_contracts.py | 175 | def scrapes_multiple_missing_fields(self, response): |
| LOW | tests/test_contracts.py | 199 | def invalid_regex_with_valid_contract(self, response): |
| LOW | tests/test_contracts.py | 223 | def returns_error_missing_meta(self, response): |
| LOW | tests/test_contracts.py | 542 | def test_custom_tagged_request_contract(self): |
| LOW | tests/test_contracts.py | 577 | def test_pre_hook_keyboard_interrupt(self): |
| LOW | tests/test_contracts.py | 591 | def test_post_hook_keyboard_interrupt(self): |
| LOW | tests/test_command_parse.py | 115 | def parse_request_with_cb_kwargs(self, response, foo=None, key=None): |
| LOW | tests/test_command_parse.py | 121 | def parse_request_without_meta(self, response): |
| LOW | tests/test_command_parse.py | 232 | def test_request_with_cb_kwargs( |
| LOW | tests/test_command_parse.py | 254 | def test_request_without_meta( |
| LOW | tests/test_command_parse.py | 283 | def test_async_def_asyncio_parse_items_list( |
| LOW | tests/test_command_parse.py | 299 | def test_async_def_asyncio_parse_items_single_element( |
| LOW | tests/test_command_parse.py | 314 | def test_async_def_asyncgen_parse_loop( |
| LOW | tests/test_command_parse.py | 330 | def test_async_def_asyncgen_parse_exc( |
| LOW | tests/test_command_parse.py | 346 | def test_async_def_asyncio_parse( |
| LOW | tests/test_command_parse.py | 372 | def test_parse_items_no_callback_passed( |
| LOW | tests/test_command_parse.py | 384 | def test_wrong_callback_passed( |
| LOW | tests/test_command_parse.py | 399 | def test_crawlspider_matching_rule_callback_set( |
| LOW | tests/test_command_parse.py | 413 | def test_crawlspider_matching_rule_default_callback( |
| LOW | tests/test_command_parse.py | 427 | def test_spider_with_no_rules_attribute( |
| LOW | tests/test_command_parse.py | 442 | def test_crawlspider_missing_callback( |
| LOW | tests/test_command_parse.py | 455 | def test_crawlspider_no_matching_rule( |
| LOW | tests/test_command_parse.py | 470 | def test_crawlspider_not_exists_with_not_matched_url( |
| LOW | tests/test_logstats.py | 59 | def test_stats_calculations_no_time(self): |
| LOW | tests/test_logstats.py | 68 | def test_stats_calculation_no_elapsed_time(self): |
| LOW | tests/test_http_request.py | 231 | def test_copy_inherited_classes(self): |
| LOW | tests/test_http_request.py | 269 | def test_immutable_attributes(self): |
| LOW | tests/test_http_request.py | 276 | def test_callback_and_errback(self): |
| LOW | tests/test_http_request.py | 308 | def test_callback_and_errback_type(self): |
| LOW | tests/test_http_request.py | 336 | def test_setter_mutable_lazy_loading(self): |
| LOW | tests/test_http_request.py | 441 | def test_from_curl_with_kwargs(self): |
| LOW | tests/test_http_request.py | 448 | def test_from_curl_ignore_unknown_options(self): |
| LOW | tests/test_command_genspider.py | 66 | def test_same_name_as_project(self, proj_path: Path) -> None: |
| LOW | tests/test_command_genspider.py | 73 | def test_same_filename_as_existing_spider( |
| LOW | tests/test_command_genspider.py | 167 | def test_generate_standalone_spider(self, tmp_path: Path) -> None: |
| 1025 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | tests/test_squeues_request.py | 54 | q.push(req) |
| HIGH | tests/test_squeues_request.py | 88 | q.push(req1) |
| HIGH | tests/test_squeues_request.py | 89 | q.push(req2) |
| HIGH | tests/test_squeues_request.py | 90 | q.push(req3) |
| HIGH | tests/test_pqueues.py | 65 | queue.push(req1) |
| HIGH | tests/test_pqueues.py | 66 | queue.push(req2) |
| HIGH | tests/test_pqueues.py | 67 | queue.push(req3) |
| HIGH | tests/test_pqueues.py | 89 | queue.push(req1) |
| HIGH | tests/test_pqueues.py | 90 | queue.push(req2) |
| HIGH | tests/test_pqueues.py | 91 | queue.push(req3) |
| HIGH | tests/test_pqueues.py | 119 | self.queue.push(req1) |
| HIGH | tests/test_pqueues.py | 120 | self.queue.push(req2) |
| HIGH | tests/test_pqueues.py | 121 | self.queue.push(req3) |
| HIGH | tests/test_pqueues.py | 148 | self.queue.push(req1) |
| HIGH | tests/test_pqueues.py | 149 | self.queue.push(req2) |
| HIGH | tests/test_pqueues.py | 150 | self.queue.push(req3) |
| HIGH | tests/test_pqueues.py | 30 | queue.push(req1) |
| HIGH | tests/test_pqueues.py | 45 | queue.push(Request("https://example.org")) |
| HIGH | tests/test_pqueues.py | 134 | self.queue.push(Request("https://example.org")) |
| HIGH | tests/test_pqueues.py | 175 | self.queue.push(request) |
| HIGH | tests/test_pqueues.py | 199 | self.queue.push(request) |
| HIGH | tests/test_pqueues.py | 255 | queue.push(request) |
| HIGH | tests/test_squeues.py | 38 | q.push(lambda x: x) |
| HIGH | tests/test_squeues.py | 44 | q.push(sel) |
| HIGH | tests/test_squeues.py | 50 | q.push("a") |
| HIGH | tests/test_squeues.py | 51 | q.push(123) |
| HIGH | tests/test_squeues.py | 52 | q.push({"a": "dict"}) |
| HIGH | tests/test_squeues.py | 92 | q.push(i) |
| HIGH | tests/test_squeues.py | 100 | q.push(loader) |
| HIGH | tests/test_squeues.py | 110 | q.push(r) |
| HIGH | tests/test_squeues.py | 122 | q.push(lambda x: x) |
| HIGH | tests/test_squeues.py | 131 | q.push(sel) |
| HIGH | tests/test_squeues.py | 155 | q.push("a") |
| HIGH | tests/test_squeues.py | 156 | q.push(123) |
| HIGH | tests/test_squeues.py | 157 | q.push({"a": "dict"}) |
| HIGH | tests/test_squeues.py | 177 | q.push(i) |
| HIGH | tests/test_squeues.py | 185 | q.push(loader) |
| HIGH | tests/test_squeues.py | 195 | q.push(r) |
| HIGH | tests/test_settings/__init__.py | 898 | {"FOO": '{"tests.test_settings.Component1": null}'}, |
| HIGH | scrapy/pqueues.py | 180 | q.push(request) # this may fail (eg. serialization error) |
| HIGH | scrapy/pqueues.py | 414 | queue.push(request) |
| HIGH | scrapy/core/scheduler.py | 418 | self.dqs.push(request) |
| HIGH | scrapy/core/scheduler.py | 439 | self.mqs.push(request) |
| HIGH | scrapy/spidermiddlewares/referer.py | 74 | If url is null, return no referrer. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | conftest.py | 1 | |
| LOW | conftest.py | 54 | |
| LOW | tests/test_http_response_text.py | 1 | |
| LOW | tests/test_command_parse.py | 1 | |
| LOW | tests/test_command_genspider.py | 1 | |
| LOW | tests/test_engine.py | 1 | |
| LOW | tests/test_engine.py | 44 | |
| LOW | tests/test_zz_resources.py | 3 | |
| LOW | tests/test_spidermiddleware.py | 1 | |
| LOW | tests/test_downloader_handler_twisted_http11.py | 3 | |
| LOW | tests/test_stats.py | 1 | |
| LOW | tests/test_linkextractors.py | 1 | |
| LOW | tests/test_command_runspider.py | 1 | |
| LOW | tests/test_loader.py | 1 | |
| LOW | tests/test_middleware.py | 1 | |
| LOW | tests/test_http_request_json.py | 1 | |
| LOW | tests/test_http2_client_protocol.py | 1 | |
| LOW | tests/test_utils_iterators.py | 1 | |
| LOW | tests/test_utils_log.py | 1 | |
| LOW | tests/test_spider_sitemap.py | 1 | |
| LOW | tests/test_spidermiddleware_urllength.py | 1 | |
| LOW | tests/test_downloader_handler_twisted_http2.py | 3 | |
| LOW | tests/test_feedexport_postprocess.py | 1 | |
| LOW | tests/test_feedexport_uri_params.py | 1 | |
| LOW | tests/test_spidermiddleware_referer.py | 1 | |
| LOW | tests/spiders.py | 5 | |
| LOW | tests/test_downloadermiddleware_robotstxt.py | 1 | |
| LOW | tests/test_crawler_subprocess.py | 1 | |
| LOW | tests/test_robotstxt_interface.py | 16 | |
| LOW | tests/test_engine_stop_download_bytes.py | 1 | |
| LOW | tests/test_http_request_form.py | 1 | |
| LOW | tests/test_core_downloader.py | 1 | |
| LOW | tests/test_core_downloader.py | 40 | |
| LOW | tests/test_scheduler_base.py | 1 | |
| LOW | tests/test_downloadermiddleware.py | 1 | |
| LOW | tests/test_spider.py | 1 | |
| LOW | tests/test_squeues_request.py | 5 | |
| LOW | tests/test_spidermiddleware_base.py | 1 | |
| LOW | tests/test_scheduler.py | 1 | |
| LOW | tests/test_downloader_handler_httpx.py | 3 | |
| LOW | tests/test_extension_periodic_log.py | 1 | |
| LOW | tests/test_utils_defer.py | 1 | |
| LOW | tests/test_spider_crawl.py | 1 | |
| LOW | tests/test_downloadermiddleware_httpcompression.py | 69 | |
| LOW | tests/test_downloadermiddleware_httpcompression.py | 214 | |
| LOW | tests/test_crawl.py | 1 | |
| LOW | tests/test_downloadermiddleware_redirect_metarefresh.py | 1 | |
| LOW | tests/test_downloader_handlers.py | 3 | |
| LOW | tests/test_downloader_handlers.py | 195 | |
| LOW | tests/test_downloader_handlers_http_base.py | 3 | |
| LOW | tests/test_utils_python.py | 1 | |
| LOW | tests/test_command_shell.py | 1 | |
| LOW | tests/test_pipeline_images.py | 1 | |
| LOW | tests/test_pipeline_crawl.py | 1 | |
| LOW | tests/test_pipeline_crawl.py | 223 | |
| LOW | tests/test_commands.py | 1 | |
| LOW | tests/test_feedexport_batch.py | 1 | |
| LOW | tests/test_spidermiddleware_depth.py | 1 | |
| LOW | tests/test_pipeline_files.py | 706 | |
| LOW | tests/test_feedexport_storages.py | 1 | |
| 211 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | tests/test_http_response_text.py | 0 | <?xml version="1.0" encoding="iso-8859-1"?><xml></xml> |
| HIGH | tests/test_http_response_text.py | 0 | <?xml version="1.0" encoding="iso-8859-1"?><xml></xml> |
| HIGH | tests/test_http_response_text.py | 0 | <?xml version="1.0" encoding="iso-8859-1"?><xml></xml> |
| HIGH | tests/test_contracts.py | 0 | method which returns item @url http://scrapy.org @returns items 1 1 |
| HIGH | tests/test_contracts.py | 0 | method which returns item @url http://scrapy.org @returns items 1 1 |
| HIGH | tests/test_contracts.py | 0 | method which returns item @url http://scrapy.org @returns items 1 1 |
| HIGH | tests/test_contracts.py | 0 | returns item with no name @url http://scrapy.org @returns items 1 1 @scrapes name url |
| HIGH | tests/test_contracts.py | 0 | returns item with no name @url http://scrapy.org @returns items 1 1 @scrapes name url |
| HIGH | tests/test_contracts.py | 0 | returns item with no name @url http://scrapy.org @returns items 1 1 @scrapes name url |
| HIGH | tests/test_linkextractors.py | 0 | <div> <a href="/a">a1</a> <a href="/b?a=1&b=2">b1</a> </div> <div> <a href="/a">a2</a> <a href="/b?b=2&a=1">b2</a> </div |
| HIGH | tests/test_linkextractors.py | 0 | <div> <a href="/a">a1</a> <a href="/b?a=1&b=2">b1</a> </div> <div> <a href="/a">a2</a> <a href="/b?b=2&a=1">b2</a> </div |
| HIGH | tests/test_linkextractors.py | 0 | <div> <a href="/a">a1</a> <a href="/b?a=1&b=2">b1</a> </div> <div> <a href="/a">a2</a> <a href="/b?b=2&a=1">b2</a> </div |
| HIGH | tests/test_linkextractors.py | 0 | <div> <a href="/a">a1</a> <a href="/b?a=1&b=2">b1</a> </div> <div> <a href="/a">a2</a> <a href="/b?b=2&a=1">b2</a> </div |
| HIGH | tests/test_command_runspider.py | 0 | import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('feeds: {}'.form |
| HIGH | tests/test_command_runspider.py | 0 | import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('feeds: {}'.form |
| HIGH | tests/test_command_crawl.py | 0 | import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('feeds: {}'.form |
| HIGH | tests/test_loader.py | 0 | values added after initialization should be appended |
| HIGH | tests/test_loader.py | 0 | values added after initialization should be appended |
| HIGH | tests/test_loader.py | 0 | values added after initialization should be appended |
| HIGH | tests/test_loader.py | 0 | values added after initialization should be appended |
| HIGH | tests/test_utils_iterators.py | 0 | <?xml version="1.0" encoding="utf-8"?> <rss version="2.0" xmlns:g="http://base.google.com/ns/1.0"> <channel> <title>my d |
| HIGH | tests/test_utils_iterators.py | 0 | <?xml version="1.0" encoding="utf-8"?> <rss version="2.0" xmlns:g="http://base.google.com/ns/1.0"> <channel> <title>my d |
| HIGH | tests/test_utils_iterators.py | 0 | <?xml version="1.0" encoding="utf-8"?> <rss version="2.0" xmlns:g="http://base.google.com/ns/1.0"> <channel> <title>my d |
| HIGH | tests/test_http_request_form.py | 0 | <form action="get.php" method="post"> <input type="hidden" name="one" value="1"> <input type="hidden" name="two" value=" |
| HIGH | tests/test_http_request_form.py | 0 | <form action="get.php" method="post"> <input type="hidden" name="one" value="1"> <input type="hidden" name="two" value=" |
| HIGH | tests/test_http_request_form.py | 0 | <form action="get.php" method="post"> <input type="hidden" name="one" value="1"> <input type="hidden" name="two" value=" |
| HIGH | tests/test_http_request_form.py | 0 | <form action="get.php" method="get"> <input type="submit" name="clickable1" value="clicked1"> <input type="hidden" name= |
| HIGH | tests/test_http_request_form.py | 0 | <form action="get.php" method="get"> <input type="submit" name="clickable1" value="clicked1"> <input type="hidden" name= |
| HIGH | tests/test_http_request_form.py | 0 | <form action="get.php" method="get"> <input type="submit" name="clickable1" value="clicked1"> <input type="hidden" name= |
| HIGH | tests/test_http_request_form.py | 0 | <form action="get.php" method="get"> <input type="submit" name="clickable1" value="clicked1"> <input type="hidden" name= |
| HIGH | tests/test_command_crawl.py | 0 | import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('it works!') ret |
| HIGH | tests/test_command_crawl.py | 0 | import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('it works!') ret |
| HIGH | tests/test_command_crawl.py | 0 | import scrapy class myspider(scrapy.spider): name = 'myspider' async def start(self): self.logger.debug('it works!') ret |
| HIGH | tests/test_feedexport.py | 0 | <?xml version="1.0" encoding="utf-8"?> <items> <item> <foo> <value>bar</value> </foo> </item> <item> <key>value</key> </ |
| HIGH | tests/test_feedexport.py | 0 | <?xml version="1.0" encoding="utf-8"?> <items> <item> <foo> <value>bar</value> </foo> </item> <item> <key>value</key> </ |
| HIGH | tests/test_feedexport.py | 0 | <?xml version="1.0" encoding="utf-8"?> <items> <item> <foo> <value>bar</value> </foo> </item> <item> <key>value</key> </ |
| HIGH | tests/test_settings/__init__.py | 0 | { "tests.test_settings.component1": 0, "tests.test_settings.component1alias": 1, "tests.test_settings.component1subclass |
| HIGH | tests/test_settings/__init__.py | 0 | { "tests.test_settings.component1": 0, "tests.test_settings.component1alias": 1, "tests.test_settings.component1subclass |
| HIGH | tests/test_settings/__init__.py | 0 | { "tests.test_settings.component1": 0, "tests.test_settings.component1alias": 1, "tests.test_settings.component1subclass |
| HIGH | scrapy/pqueues.py | 0 | returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement |
| HIGH | scrapy/pqueues.py | 0 | returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement |
| HIGH | scrapy/squeues.py | 0 | returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement |
| HIGH | scrapy/squeues.py | 0 | returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement |
| HIGH | scrapy/squeues.py | 0 | returns the next object to be returned by :meth:`pop`, but without removing it from the queue. raises :exc:`notimplement |
| HIGH | scrapy/http/response/__init__.py | 0 | shortcut method implemented only by responses whose content is text (subclasses of textresponse). |
| HIGH | scrapy/http/response/__init__.py | 0 | shortcut method implemented only by responses whose content is text (subclasses of textresponse). |
| HIGH | scrapy/http/response/__init__.py | 0 | shortcut method implemented only by responses whose content is text (subclasses of textresponse). |
| Severity | File | Line | Snippet |
|---|---|---|---|
| CRITICAL | tests/test_engine_loop.py | 38 | self.crawler.engine._slot.scheduler.pause() |
| CRITICAL | tests/test_engine_loop.py | 39 | self.crawler.engine._slot.scheduler.enqueue_request(Request("data:,b")) |
| CRITICAL | tests/test_engine_loop.py | 45 | self.crawler.engine._slot.scheduler.unpause() |
| CRITICAL | tests/test_engine_loop.py | 54 | self.crawler.engine._slot.scheduler.pause() |
| CRITICAL | tests/test_engine_loop.py | 55 | self.crawler.engine._slot.scheduler.enqueue_request(Request("data:,d")) |
| CRITICAL | tests/test_engine_loop.py | 203 | spider.crawler.engine._slot.scheduler.enqueue_request(request) |
| CRITICAL | tests/test_engine_loop.py | 243 | spider.crawler.engine._slot.scheduler.enqueue_request(request) |
| CRITICAL | tests/test_engine_loop.py | 291 | spider.crawler.engine._slot.scheduler.enqueue_request(request) |
| CRITICAL | tests/test_engine_loop.py | 304 | spider.crawler.engine._slot.scheduler.enqueue_request(request) |
| CRITICAL | scrapy/extensions/throttle.py | 102 | return key, self.crawler.engine.downloader.slots.get(key) |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/test_engine.py | 628 | |
| MEDIUM | tests/test_spidermiddleware.py | 77 | |
| MEDIUM | tests/test_utils_python.py | 41 | |
| MEDIUM | tests/test_crawler.py | 666 | |
| MEDIUM | tests/test_crawler.py | 190 | |
| MEDIUM | tests/test_crawler.py | 270 | |
| MEDIUM | tests/test_crawler.py | 350 | |
| MEDIUM | tests/test_crawler.py | 430 | |
| MEDIUM | tests/test_crawler.py | 534 | |
| MEDIUM | tests/test_engine_loop.py | 210 | |
| MEDIUM | tests/test_engine_loop.py | 250 | |
| MEDIUM | tests/test_engine_loop.py | 308 | |
| MEDIUM | tests/AsyncCrawlerRunner/reactorless_simple.py | 15 | |
| MEDIUM | tests/AsyncCrawlerRunner/custom_loop_same.py | 20 | |
| MEDIUM | tests/AsyncCrawlerRunner/reactorless_custom_settings.py | 18 | |
| MEDIUM | tests/AsyncCrawlerRunner/custom_loop_different.py | 20 | |
| MEDIUM | tests/AsyncCrawlerRunner/multi_parallel.py | 15 | |
| MEDIUM | tests/AsyncCrawlerRunner/reactorless_reactor.py | 14 | |
| MEDIUM | tests/AsyncCrawlerRunner/no_reactor.py | 13 | |
| MEDIUM | tests/AsyncCrawlerRunner/simple_default_reactor.py | 14 | |
| MEDIUM | tests/AsyncCrawlerRunner/simple.py | 17 | |
| MEDIUM | tests/AsyncCrawlerRunner/multi_seq.py | 15 | |
| MEDIUM | tests/CrawlerProcess/asyncio_enabled_no_reactor.py | 17 | |
| MEDIUM | tests/CrawlerProcess/args_settings.py | 19 | |
| MEDIUM | tests/CrawlerProcess/reactor_select.py | 15 | |
| MEDIUM | tests/CrawlerProcess/multi.py | 10 | |
| MEDIUM | tests/CrawlerProcess/asyncio_enabled_reactor.py | 53 | |
| MEDIUM | …ts/CrawlerProcess/asyncio_enabled_reactor_same_loop.py | 22 | |
| MEDIUM | …awlerProcess/reactor_default_twisted_reactor_select.py | 13 | |
| MEDIUM | …awlerProcess/asyncio_enabled_reactor_different_loop.py | 20 | |
| MEDIUM | …cess/reactor_select_subclass_twisted_reactor_select.py | 22 | |
| MEDIUM | tests/CrawlerProcess/reactor_default.py | 13 | |
| MEDIUM | tests/CrawlerProcess/simple.py | 12 | |
| MEDIUM | …rawlerProcess/reactor_select_twisted_reactor_select.py | 14 | |
| MEDIUM | tests/CrawlerProcess/asyncio_custom_loop.py | 10 | |
| MEDIUM | tests/CrawlerRunner/explicit_default_reactor.py | 17 | |
| MEDIUM | tests/CrawlerRunner/custom_loop_same.py | 19 | |
| MEDIUM | tests/CrawlerRunner/change_reactor.py | 15 | |
| MEDIUM | tests/CrawlerRunner/custom_loop_different.py | 19 | |
| MEDIUM | tests/CrawlerRunner/multi_parallel.py | 14 | |
| MEDIUM | tests/CrawlerRunner/no_reactor.py | 13 | |
| MEDIUM | tests/CrawlerRunner/simple.py | 16 | |
| MEDIUM | tests/CrawlerRunner/multi_seq.py | 15 | |
| MEDIUM | tests/AsyncCrawlerProcess/asyncio_enabled_no_reactor.py | 17 | |
| MEDIUM | tests/AsyncCrawlerProcess/reactorless_simple.py | 12 | |
| MEDIUM | tests/AsyncCrawlerProcess/args_settings.py | 19 | |
| MEDIUM | …sts/AsyncCrawlerProcess/reactorless_custom_settings.py | 23 | |
| MEDIUM | tests/AsyncCrawlerProcess/multi.py | 10 | |
| MEDIUM | tests/AsyncCrawlerProcess/asyncio_enabled_reactor.py | 43 | |
| MEDIUM | …yncCrawlerProcess/asyncio_enabled_reactor_same_loop.py | 21 | |
| MEDIUM | …ncCrawlerProcess/reactorless_telnetconsole_disabled.py | 10 | |
| MEDIUM | …ocess/asyncio_custom_loop_custom_settings_different.py | 13 | |
| MEDIUM | …lerProcess/asyncio_custom_loop_custom_settings_same.py | 13 | |
| MEDIUM | …awlerProcess/asyncio_enabled_reactor_different_loop.py | 19 | |
| MEDIUM | …yncCrawlerProcess/reactorless_telnetconsole_enabled.py | 10 | |
| MEDIUM | tests/AsyncCrawlerProcess/reactorless_import_hook.py | 12 | |
| MEDIUM | tests/AsyncCrawlerProcess/reactor_default.py | 12 | |
| MEDIUM | tests/AsyncCrawlerProcess/simple.py | 12 | |
| MEDIUM | tests/AsyncCrawlerProcess/asyncio_custom_loop.py | 10 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_downloadermiddleware_httpcache.py | 260 | except Exception: |
| LOW | tests/test_pipeline_media.py | 115 | except Exception as exc: |
| MEDIUM | tests/test_crawler.py | 234 | def from_crawler(cls, crawler): |
| MEDIUM | tests/test_crawler.py | 314 | def from_crawler(cls, crawler): |
| MEDIUM | tests/test_crawler.py | 394 | def from_crawler(cls, crawler): |
| MEDIUM | tests/test_crawler.py | 474 | def from_crawler(cls, crawler): |
| LOW | tests/test_crawler.py | 237 | except Exception as e: |
| LOW | tests/test_crawler.py | 317 | except Exception as e: |
| LOW | tests/test_crawler.py | 397 | except Exception as e: |
| LOW | tests/test_crawler.py | 477 | except Exception as e: |
| LOW | …awlerProcess/twisted_reactor_custom_settings_select.py | 23 | except Exception: |
| LOW | …sts/AsyncCrawlerProcess/reactorless_custom_settings.py | 29 | except Exception: |
| LOW | scrapy/crawler.py | 194 | except Exception: |
| LOW | scrapy/crawler.py | 224 | except Exception: |
| LOW | scrapy/crawler.py | 466 | except Exception: |
| LOW | scrapy/crawler.py | 577 | except Exception: |
| LOW | scrapy/downloadermiddlewares/robotstxt.py | 102 | except Exception as e: |
| LOW | scrapy/core/scraper.py | 226 | except Exception: |
| LOW | scrapy/core/scraper.py | 258 | except Exception: |
| LOW | scrapy/core/scraper.py | 267 | except Exception as spider_exc: |
| LOW | scrapy/core/scraper.py | 290 | except Exception: |
| LOW | scrapy/core/scraper.py | 513 | except Exception as ex: |
| LOW | scrapy/core/spidermw.py | 96 | except Exception: |
| LOW | scrapy/core/spidermw.py | 110 | except Exception as ex: |
| LOW | scrapy/core/spidermw.py | 226 | except Exception as ex: |
| LOW | scrapy/core/engine.py | 150 | except Exception: |
| LOW | scrapy/core/engine.py | 280 | except Exception as exception: |
| LOW | scrapy/core/engine.py | 320 | except Exception: |
| LOW | scrapy/core/engine.py | 414 | except Exception: |
| LOW | scrapy/core/engine.py | 618 | except Exception: |
| LOW | scrapy/core/engine.py | 623 | except Exception: |
| LOW | scrapy/core/engine.py | 630 | except Exception: |
| LOW | scrapy/core/engine.py | 639 | except Exception: |
| LOW | scrapy/core/engine.py | 650 | except Exception: |
| LOW | scrapy/core/engine.py | 671 | except Exception: |
| LOW | scrapy/core/engine.py | 685 | except Exception: |
| LOW | scrapy/core/downloader/__init__.py | 238 | except Exception: |
| LOW | scrapy/core/downloader/__init__.py | 259 | except Exception: |
| LOW | scrapy/core/downloader/middleware.py | 83 | except Exception as ex: |
| LOW | scrapy/core/downloader/handlers/_base_streaming.py | 222 | except Exception as e: |
| LOW | scrapy/core/downloader/handlers/__init__.py | 110 | except Exception as ex: |
| LOW | scrapy/contracts/__init__.py | 48 | except Exception: |
| LOW | scrapy/contracts/__init__.py | 78 | except Exception: |
| LOW | scrapy/contracts/__init__.py | 131 | except Exception: |
| LOW | scrapy/contracts/__init__.py | 187 | except Exception: |
| LOW | scrapy/pipelines/files.py | 416 | except Exception: |
| LOW | scrapy/pipelines/files.py | 667 | except Exception as exc: |
| LOW | scrapy/pipelines/media.py | 194 | except Exception: |
| LOW | scrapy/pipelines/media.py | 216 | except Exception: |
| LOW | scrapy/utils/deprecate.py | 128 | except Exception as e: |
| LOW | scrapy/utils/signal.py | 64 | except Exception: |
| LOW | scrapy/utils/signal.py | 202 | except Exception as ex: |
| LOW | scrapy/utils/asyncio.py | 206 | except Exception: |
| LOW | scrapy/utils/engine.py | 36 | except Exception as e: |
| LOW | scrapy/utils/defer.py | 154 | except Exception: |
| LOW | scrapy/utils/defer.py | 360 | except Exception: |
| LOW | scrapy/utils/defer.py | 379 | except Exception: |
| LOW | scrapy/extensions/feedexport.py | 563 | except Exception: |
| LOW | scrapy/extensions/httpcache.py | 418 | except Exception: |
| MEDIUM | scrapy/commands/startproject.py | 56 | print(f"Error: Module {project_name!r} already exists") |
| 1 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_http_request.py | 141 | # See https://datatracker.ietf.org/doc/html/rfc3987#section-3.2 |
| LOW | docs/requirements.txt | 1 | # This file was autogenerated by uv via the following command: |
| LOW | docs/requirements.txt | 21 | constantly==23.10.4 |
| LOW | docs/requirements.txt | 41 | h2==4.3.0 |
| LOW | docs/requirements.txt | 61 | # scrapy |
| LOW | docs/requirements.txt | 81 | # scrapy-spider-metadata |
| LOW | docs/requirements.txt | 101 | # scrapy-spider-metadata |
| LOW | docs/requirements.txt | 121 | scrapy==2.14.2 |
| LOW | docs/requirements.txt | 141 | # via sphinx-scrapy |
| LOW | docs/requirements.txt | 181 | # via |
| LOW | docs/conf.py | 101 | |
| LOW | docs/conf.py | 121 | # Base classes of downloader middlewares are implementation details that |
| LOW | scrapy/shell.py | 41 | from collections.abc import Callable |
| LOW | scrapy/shell.py | 61 | # Thus the only thing Shell needs an event loop for is fetch(). More machinery |
| LOW | scrapy/shell.py | 81 | # |
| LOW | scrapy/cmdline.py | 241 | # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() on exit: |
| LOW | scrapy/crawler.py | 901 | # 2. _run_loop() calls loop.run_until_complete(main_task) |
| LOW | scrapy/crawler.py | 921 | # 3. _shutdown_graceful_reactorless() calls stop() |
| LOW | scrapy/core/http2/protocol.py | 121 | |
| LOW | scrapy/pipelines/media.py | 241 | # the Response status code is not 200 OK, is that the original |
| LOW | scrapy/http/request/__init__.py | 161 | if not (callable(errback) or errback is None): |
| LOW | scrapy/http/request/__init__.py | 181 | #: <topics-spider-middleware>`, :ref:`downloader middleware |
| LOW | scrapy/http/request/__init__.py | 201 | self.errback: Callable[[Failure], Any] | None = errback |
| LOW | scrapy/http/request/__init__.py | 221 | #: - :class:`~scrapy.downloadermiddlewares.offsite.OffsiteMiddleware` |
| LOW | scrapy/templates/project/module/spiders/__init__.py | 1 | # This package will contain the spiders of your Scrapy project |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | tests/test_spidermiddleware_output_chain.py | 25 | # ================================================================================ |
| MEDIUM | tests/test_spidermiddleware_output_chain.py | 65 | # ================================================================================ |
| MEDIUM | tests/test_spidermiddleware_output_chain.py | 104 | # ================================================================================ |
| MEDIUM | tests/test_spidermiddleware_output_chain.py | 130 | # ================================================================================ |
| MEDIUM | tests/test_spidermiddleware_output_chain.py | 141 | # ================================================================================ |
| MEDIUM | tests/test_spidermiddleware_output_chain.py | 158 | # ================================================================================ |
| MEDIUM | tests/test_spidermiddleware_output_chain.py | 169 | # ================================================================================ |
| MEDIUM | tests/test_spidermiddleware_output_chain.py | 240 | # ================================================================================ |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_engine.py | 323 | |
| LOW | tests/test_downloadermiddleware_httpcache.py | 433 | |
| LOW | docs/utils/linkfix.py | 20 | |
| LOW | scrapy/pqueues.py | 184 | |
| LOW | scrapy/pqueues.py | 358 | |
| LOW | scrapy/downloadermiddlewares/httpproxy.py | 81 | |
| LOW | scrapy/core/http2/protocol.py | 376 | |
| LOW | scrapy/core/http2/stream.py | 209 | |
| LOW | scrapy/core/http2/stream.py | 397 | |
| LOW | scrapy/spiders/feed.py | 74 | |
| LOW | scrapy/pipelines/media.py | 129 | |
| LOW | scrapy/utils/sitemap.py | 106 | |
| LOW | scrapy/utils/sitemap.py | 62 | |
| LOW | scrapy/utils/request.py | 35 | |
| LOW | scrapy/utils/iterators.py | 23 | |
| LOW | scrapy/utils/iterators.py | 81 | |
| LOW | scrapy/utils/ssl.py | 99 | |
| LOW | scrapy/http/request/form.py | 124 | |
| LOW | scrapy/commands/check.py | 73 | |
| LOW | scrapy/commands/settings.py | 49 | |
| LOW | scrapy/commands/parse.py | 305 |
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | scrapy/core/scraper.py | 1 | """This module implements the Scraper component which parses responses and |
| MEDIUM | scrapy/core/spidermw.py | 249 | # This method is only needed until _async compatibility methods are removed. |
| MEDIUM | scrapy/core/downloader/tls.py | 118 | """This method is needed to override the verify callback.""" |
| MEDIUM | scrapy/core/http2/agent.py | 55 | # Create a deferred which will fire with the H2ClientProtocol |
| MEDIUM | scrapy/spiders/feed.py | 63 | """This method is called for the nodes matching the provided tag name |
| MEDIUM | scrapy/utils/trackref.py | 1 | """This module provides some functions and classes to record and report |
| MEDIUM | scrapy/commands/shell.py | 98 | # Create the engine and run start_async() in the main thread |
| MEDIUM | scrapy/commands/shell.py | 104 | # Create the engine and run start_async() in the event loop thread |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_http2_client_protocol.py | 381 | # Check if headers were sent successfully |
| LOW | scrapy/core/http2/stream.py | 372 | # Check if we exceed the allowed max data size which can be received |
| LOW | scrapy/core/http2/agent.py | 61 | # Check if we already have a connection to the remote |
| LOW | scrapy/pipelines/media.py | 176 | # Check if request is downloading right now to avoid doing it twice |
| LOW | scrapy/utils/asyncio.py | 73 | # Check if there is a running asyncio loop. |
| LOW | scrapy/utils/asyncio.py | 84 | # Check if there is an installed asyncio reactor (it doesn't need to be |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/mockserver/http_resources.py | 368 | # just set this to trigger a test failure if no valid accept-encoding header was set |
| MEDIUM | scrapy/spiders/feed.py | 50 | """You can override this function in order to make any changes you want |
| MEDIUM | scrapy/utils/signal.py | 42 | """Like ``pydispatcher.robust.sendRobust()`` but it also logs errors and returns |
| LOW | scrapy/http/request/form.py | 247 | # If we don't have clickdata, we just use the first clickable element |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | tests/test_item.py | 57 | i["name"] = "John Doe" |
| LOW | tests/test_item.py | 61 | assert itemrepr == "{'name': 'John Doe', 'number': 123}" |
| LOW | tests/test_item.py | 64 | assert i2["name"] == "John Doe" |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …ils_misc/test_return_with_argument_inside_generator.py | 74 | def helper(): |
| LOW | …ils_misc/test_return_with_argument_inside_generator.py | 137 | def helper(): |
| LOW | …ils_misc/test_return_with_argument_inside_generator.py | 216 | def helper(): |