Repository Analysis

apify/crawlee

Crawlee—A web scraping and browser automation library for Node.js to build reliable crawlers. In JavaScript and TypeScript. Extract data for AI, LLMs, RAG, or GPTs. Download HTML, PDF, JPG, PNG, and other files from websites. Works with Puppeteer, Playwright, Cheerio, JSDOM, and raw HTTP. Both headful and headless mode. With proxy rotation.

0.8 Likely human-written View on GitHub
0.8
Adjusted Score
0.8
Raw Score
100%
Time Factor
2026-05-29
Last Push
23,555
Stars
TypeScript
Language
141,450
Lines of Code
1634
Files
81
Pattern Hits
2026-05-31
Scan Date

Score History

Severity Breakdown

CRITICAL 1HIGH 3MEDIUM 3LOW 74

Pattern Findings

81 matches across 9 categories. Click a row to expand file-level details.

Hyper-Verbose Identifiers53 hits · 53 pts
SeverityFileLineSnippet
LOWwebsite/blog/2025/07-14-scrape-youtube/index.md271 async def handle_transcript_request(route: PlaywrightRoute, request: PlaywrightRequest) -> None:
LOWwebsite/src/theme/Navbar/MobileSidebar/Layout/index.js5export default function NavbarMobileSidebarLayout({
LOWwebsite/src/theme/Navbar/MobileSidebar/Header/index.js29export default function NavbarMobileSidebarHeader() {
LOW…te/src/theme/BlogPostItem/Footer/ReadMoreLink/index.js15export default function BlogPostItemFooterReadMoreLink(props) {
LOWwebsite/src/theme/BlogPostItem/Header/Authors/index.js7export default function BlogPostItemHeaderAuthors({ className }) {
LOWwebsite/src/pages/index.js20function LanguageGetStartedSection() {
LOWpackages/core/src/cookie_utils.ts34export function getDefaultCookieExpirationDate(maxAgeSecs: number) {
LOWpackages/core/src/cookie_utils.ts44export function toughCookieToBrowserPoolCookie(toughCookie: Cookie): CookieObject {
LOWpackages/core/src/cookie_utils.ts66export function browserPoolCookieToToughCookie(cookieObject: CookieObject, maxAgeSecs: number) {
LOWpackages/core/src/cookie_utils.ts90export function cookieStringToToughCookie(cookieString: string) {
LOWpackages/core/src/autoscaling/event_loop_load_signal.ts19export function createEventLoopLoadSignal(options: EventLoopLoadSignalOptions = {}) {
LOWpackages/core/src/enqueue_links/shared.ts57export function updateEnqueueLinksPatternCache(
LOWpackages/core/src/enqueue_links/shared.ts73export function constructRegExpObjectsFromPseudoUrls(pseudoUrls: readonly PseudoUrlInput[]): RegExpObject[] {
LOWpackages/core/src/enqueue_links/shared.ts97export function constructGlobObjectsFromGlobs(globs: readonly GlobInput[]): GlobObject[] {
LOWpackages/core/src/enqueue_links/shared.ts148export function constructRegExpObjectsFromRegExps(regexps: readonly RegExpInput[]): RegExpObject[] {
LOWpackages/core/src/enqueue_links/shared.ts281function createPatternObjectMatcher(urlPatternObject: UrlPatternObject) {
LOWpackages/core/src/enqueue_links/enqueue_links.ts525export function resolveBaseUrlForEnqueueLinksFiltering({
LOW…kages/browser-crawler/src/internals/browser-crawler.ts824export async function browserCrawlerEnqueueLinks(
LOWpackages/http-crawler/src/internals/http-crawler.ts973function addResponsePropertiesToStream(stream: Readable, response: StreamingHttpResponse) {
LOWpackages/http-crawler/src/internals/http-crawler.ts1011function parseContentTypeFromResponse(response: unknown): { type: string; charset: BufferEncoding } {
LOWpackages/utils/src/internals/general.ts178 function replaceShadowDomsWithHtml(rootElement: any) {
LOWpackages/cli/src/commands/CreateProjectCommand.ts70async function downloadTemplateFilesToDisk(template: Template, destinationDirectory: string) {
LOWpackages/browser-pool/src/container-proxy-server.ts9export async function createProxyServerForContainers(fallbackProxyUrl?: string) {
LOWpackages/browser-pool/src/fingerprinting/hooks.ts14export function createFingerprintPreLaunchHook(browserPool: BrowserPool<any, any, any, any, any>) {
LOW…kages/cheerio-crawler/src/internals/cheerio-crawler.ts270export async function cheerioCrawlerEnqueueLinks(
LOWpackages/memory-storage/src/utils.ts236 function getOrCreate(): Promise<TPromise> {
LOWpackages/memory-storage/src/body-parser.ts55function isContentTypeStringifiable(contentType: string): boolean {
LOWpackages/memory-storage/src/cache-helpers.ts15export async function findOrCacheDatasetByPossibleId(client: MemoryStorage, entryNameOrId: string) {
LOWpackages/memory-storage/src/cache-helpers.ts118export async function findOrCacheKeyValueStoreByPossibleId(client: MemoryStorage, entryNameOrId: string) {
LOWpackages/memory-storage/src/cache-helpers.ts271export async function findRequestQueueByPossibleId(client: MemoryStorage, entryNameOrId: string) {
LOWpackages/memory-storage/src/fs/key-value-store/index.ts6export function createKeyValueStorageImplementation(
LOWpackages/memory-storage/src/fs/request-queue/index.ts4export function createRequestQueueStorageImplementation(options: CreateStorageImplementationOptions) {
LOW…/src/internals/utils/puppeteer_request_interception.ts160export async function addInterceptRequestHandler(page: Page, handler: InterceptHandler): Promise<void> {
LOW…/src/internals/utils/puppeteer_request_interception.ts203export async function removeInterceptRequestHandler(page: Page, handler: InterceptHandler): Promise<void> {
LOW…/src/internals/utils/puppeteer_request_interception.ts234async function disableRequestInterception(page: Page): Promise<void> {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts225export async function enqueueLinksByClickingElements(
LOW…-crawler/src/internals/enqueue-links/click-elements.ts329export async function clickElementsAndInterceptNavigationRequests(
LOW…-crawler/src/internals/enqueue-links/click-elements.ts363function createInterceptRequestHandler(page: Page, requests: Set<string>): (req: PuppeteerRequest) => Promise<void> {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts389function isTopFrameNavigationRequest(page: Page, req: PuppeteerRequest): boolean {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts396function createTargetCreatedHandler(page: Page, requests: Set<string>): (target: Target) => Promise<void> {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts424function createFrameNavigatedHandler(page: Page, requests: Set<string>): (frame: Frame) => void {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts509function updateElementCssToEnableMouseClick(el: Element, zIndex: number): void {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts584async function restoreHistoryNavigationAndSaveCapturedUrls(page: Page, requests: Set<string>): Promise<void> {
LOW…ges/linkedom-crawler/src/internals/linkedom-crawler.ts255export async function linkedomCrawlerEnqueueLinks(
LOW…wright-crawler/src/internals/utils/playwright-utils.ts732async function handleCloudflareChallenge(
LOW…-crawler/src/internals/enqueue-links/click-elements.ts225export async function enqueueLinksByClickingElements(
LOW…-crawler/src/internals/enqueue-links/click-elements.ts329export async function clickElementsAndInterceptNavigationRequests(
LOW…-crawler/src/internals/enqueue-links/click-elements.ts364function createInterceptRequestHandler(
LOW…-crawler/src/internals/enqueue-links/click-elements.ts389function createTargetCreatedHandler(requests: Set<string>): (popup: Page) => Promise<void> {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts407function isTopFrameNavigationRequest(page: Page, req: Request): boolean {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts414function createFrameNavigatedHandler(page: Page, requests: Set<string>): (frame: Frame) => void {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts462function updateElementCssToEnableMouseClick(el: Element, zIndex: number): void {
LOW…-crawler/src/internals/enqueue-links/click-elements.ts568async function restoreHistoryNavigationAndSaveCapturedUrls(page: Page, requests: Set<string>): Promise<void> {
Magic Placeholder Names3 hits · 18 pts
SeverityFileLineSnippet
HIGHwebsite/blog/2026/02-06/index.md41 apiKey: 'your-api-key', // Your OpenAI API key (or use OPENAI_API_KEY env var)
HIGHpackages/stagehand-crawler/README.md19 apiKey: 'your-api-key', // LLM API key for LOCAL env
HIGH…s/stagehand-crawler/src/internals/stagehand-crawler.ts63 * apiKey: 'your-api-key',
Excessive Try-Catch Wrapping9 hits · 11 pts
SeverityFileLineSnippet
LOW…e/blog/2025/03-20-scrape-bluesky-using-python/index.md344 except Exception:
LOW…e/blog/2025/03-20-scrape-bluesky-using-python/index.md582 except Exception as e:
LOWwebsite/blog/2024/12-02-scrape-google-search/index.md243 except Exception as e:
LOW…og/2024/12-13-scrape-google-maps-using-python/index.md304 except Exception as e:
LOW…og/2024/12-13-scrape-google-maps-using-python/index.md356 except Exception as e:
LOW…og/2024/12-13-scrape-google-maps-using-python/index.md402 except Exception as e:
MEDIUM…og/2024/12-13-scrape-google-maps-using-python/index.md403 print(f"Error in scraping: {str(e)}")
LOW…og/2024/12-13-scrape-google-maps-using-python/index.md428 except Exception as e:
MEDIUM…og/2024/12-13-scrape-google-maps-using-python/index.md429 print(f"Error running scraper: {str(e)}")
Hallucination Indicators1 hit · 10 pts
SeverityFileLineSnippet
CRITICALpackages/basic-crawler/src/internals/basic-crawler.ts619 maxRequestsPerMinute: ow.optional.number.integerOrInfinite.positive.greaterThanOrEqual(1),
Fake / Example Data5 hits · 8 pts
SeverityFileLineSnippet
LOWtest/shared/data/lipsum.txt1Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce aliquet, nibh sit amet volutpat rhoncus, nisl eros sagitt
LOWtest/shared/data/lipsum.txt1Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce aliquet, nibh sit amet volutpat rhoncus, nisl eros sagitt
LOWtest/shared/data/lipsum.txt7Cras sit amet sem sit amet mi pulvinar varius non sed dolor. Nulla eu libero ultricies, aliquam mi eget, elementum velit
LOWtest/shared/data/lipsum.txt9Sed elit arcu, aliquam et tempor vel, ultrices sit amet turpis. Nullam et lorem magna. Nulla at risus nec dui aliquet or
LOWtest/shared/data/lipsum.txt9Sed elit arcu, aliquam et tempor vel, ultrices sit amet turpis. Nullam et lorem magna. Nulla at risus nec dui aliquet or
Over-Commented Block7 hits · 7 pts
SeverityFileLineSnippet
LOWtest/core/request_list.test.ts821 });
LOWtest/core/request_list.test.ts841 // process.env.APIFY_LOCAL_STORAGE_DIR = 'tmp';
LOWtest/core/crawlers/browser_crawler.test.ts881 // test('browser should launch with correct proxyUrl', async () => {
LOWtest/core/crawlers/browser_crawler.test.ts901 // useSessionPool: false,
LOWtest/core/crawlers/browser_crawler.test.ts921 // const status = { connected: true };
LOWtest/core/crawlers/browser_crawler.test.ts941 // requestList,
LOWpackages/core/src/storages/request_list.ts561 if (index >= state.nextIndex) {
Self-Referential Comments1 hit · 3 pts
SeverityFileLineSnippet
MEDIUMwebsite/src/pages/home_page_example.py11 # Define the default request handler, which will be called for every request.
Example Usage Blocks1 hit · 2 pts
SeverityFileLineSnippet
LOWpackages/http-crawler/src/internals/file-download.ts172 * ## Example usage
Overly Generic Function Names1 hit · 1 pts
SeverityFileLineSnippet
LOW…/src/internals/utils/puppeteer_request_interception.ts66async function handleRequest(request: PuppeteerRequest, interceptRequestHandlers?: InterceptHandler[]): Promise<void> {