Repository Analysis

ArchiveBox/ArchiveBox

🗃 Open source self-hosted web archiving. Takes URLs/browser history/bookmarks/Pocket/Pinboard/etc., saves HTML, JS, PDFs, media, and more...

15.2 Moderate AI signal View on GitHub
15.2
Adjusted Score
15.2
Raw Score
100%
Time Factor
2026-05-30
Last Push
27,572
Stars
Python
Language
169,800
Lines of Code
563
Files
1852
Pattern Hits
2026-05-31
Scan Date

Score History

Severity Breakdown

CRITICAL 6HIGH 74MEDIUM 212LOW 1560

Pattern Findings

1852 matches across 17 categories. Click a row to expand file-level details.

Hyper-Verbose Identifiers1067 hits · 999 pts
SeverityFileLineSnippet
LOWCLAUDE.md139def test_migration_preserves_snapshots(self):
LOWold/TODO_hook_statemachine_cleanup.md127 def has_running_background_hooks(self) -> bool:
LOWold/TODO_fs_migrations.md305def reconcile_with_index_json(self):
LOWold/TODO_fs_migrations.md465def move_directory_to_invalid(snapshot_dir: Path):
LOWold/TODO_fs_migrations.md113def get_storage_path_for_version(self, version: str) -> Path:
LOWold/TODO_fs_migrations.md284def _detect_fs_version_from_index(data: dict) -> str:
LOWold/TODO_fs_migrations.md368def _merge_archive_results_from_index(self, index_data: dict):
LOWold/TODO_fs_migrations.md387def _create_archive_result_if_missing(self, result_data: dict, existing: dict):
LOWold/TODO_fs_migrations.md489def find_and_merge_duplicates(cls) -> int:
LOWold/TODO_fs_migrations.md662def _cleanup_old_migration_dir(self, old_dir: Path):
LOWold/TODO_fs_migrations.md787def import_orphans_from_archive(resume_from: str = None, batch_size: int = 100) -> dict:
LOWold/TODO_fs_migrations.md891def process_filtered_snapshots(
LOWold/TODO_fs_migrations.md1135def fix_invalid_folder_locations(out_dir: Path = DATA_DIR) -> Tuple[List[str], List[str]]:
LOWold/TODO_process_tracking.md1108def has_running_background_hooks(self) -> bool:
LOWold/TODO_process_tracking.md1126def has_running_background_hooks(self) -> bool:
LOWold/TODO_process_tracking.md1828def stop_existing_supervisord_process():
LOWold/TODO_process_tracking.md1840def start_new_supervisord_process():
LOWold/TODO_process_tracking.md1852def stop_existing_supervisord_process():
LOWold/TODO_archivebox_jsonl_cli.md591def assert_jsonl_contains_type(stdout: str, record_type: str, min_count: int = 1):
LOWold/TODO_archivebox_jsonl_cli.md600def assert_jsonl_pass_through(stdout: str, input_records: List[Dict[str, Any]]):
LOWold/TODO_archivebox_jsonl_cli.md662def create_test_snapshot_json(url: str = None, **kwargs) -> Dict[str, Any]:
LOWold/archivebox.ts6051// async function setupScreenrecordingWithAudio(page, wss) {
LOWold/archivebox.ts1039async function getChromeExtensionsFromPersona({CHROME_EXTENSIONS, CHROME_EXTENSIONS_DIR}) {
LOWold/archivebox.ts1061async function getChromeExtensionsFromCache({browser, extensions=CHROME_EXTENSIONS, extensions_dir=CHROME_EXTENSIONS_DIR
LOWold/archivebox.ts1715async function collectSnapshotDirVersionFiles(snap_dir) {
LOWold/archivebox.ts1825async function symlinkBestSnapshotResults(snap_dir) {
LOWold/archivebox.ts3380async function saveAIQualityAssuranceResult(page, {original_url, version}) {
LOWold/TODO_hook_architecture.md1427def check_background_hook_completed(archiveresult: 'ArchiveResult') -> bool:
LOWold/TODO_hook_architecture.md1583def test_background_hook_detection():
LOWold/TODO_hook_architecture.md1588def test_find_binary_by_abspath():
LOWold/TODO_hook_architecture.md1627def test_foreground_hook_execution(snapshot):
LOWold/TODO_hook_architecture.md1646def test_background_hook_execution(snapshot):
LOWold/TODO_hook_architecture.md1667def test_background_hook_finalization(snapshot):
LOWarchivebox/hooks.py122def is_finite_background_hook(hook_name: str) -> bool:
LOWarchivebox/hooks.py142def normalize_hook_event_name(event_name: str) -> str | None:
LOWarchivebox/hooks.py162def _model_output_dir_from_child_path(path: Path, marker: str) -> Path | None:
LOWarchivebox/hooks.py528def extract_records_from_process(process: "Process") -> list[dict[str, Any]]:
LOWarchivebox/hooks.py558def collect_urls_from_plugins(snapshot_dir: Path) -> list[dict[str, Any]]:
LOWarchivebox/hooks.py663 def normalize_enabled_plugins(value: Any) -> list[str]:
LOWarchivebox/hooks.py702def discover_plugins_that_provide_interface(
LOWarchivebox/hooks.py860def get_plugin_special_config(plugin_name: str, config: ConfigLookup, _visited: set[str] | None = None) -> PluginSpecial
LOWarchivebox/dead/hooks.py18def get_config_defaults_from_plugins() -> dict[str, Any]:
LOWarchivebox/dead/supervision_service.py2def ensure_single_orchestrator(*, data_dir: str | Path, takeover: bool, reason: str = ""):
LOWarchivebox/dead/supervision_service.py38def wait_until_replaced_or_signal(
LOWarchivebox/dead/folders.py22def fix_invalid_folder_locations(out_dir: Path = DATA_DIR, config=None, **config_kwargs) -> tuple[list[str], list[str]]:
LOWarchivebox/dead/process_plugin.py270 async def _maybe_dispatch_json_event(self, line: str, parent_event_id: str | None) -> None:
LOWarchivebox/misc/util.py216def split_comma_separated_urls(url: str):
LOWarchivebox/misc/checks.py166def check_not_inside_source_dir():
LOWarchivebox/misc/checks.py177def check_data_dir_permissions(config=None, **config_kwargs):
LOWarchivebox/misc/logging_util.py215def log_indexing_process_started(num_links: int):
LOWarchivebox/misc/logging_util.py227def log_indexing_process_finished():
LOWarchivebox/misc/logging_util.py324def log_snapshot_archiving_started(snapshot: "Snapshot", out_dir: str, is_new: bool):
LOWarchivebox/misc/logging_util.py347def log_snapshot_archiving_finished(snapshot: "Snapshot", out_dir: str, is_new: bool, stats: dict, start_ts: datetime):
LOWarchivebox/misc/logging_util.py373def log_archive_method_started(method: str):
LOWarchivebox/misc/logging_util.py377def log_archive_method_finished(result: dict):
LOWarchivebox/misc/logging_util.py734def printable_dependency_version(name: str, dependency: dict) -> str:
LOWarchivebox/misc/serve_static.py99def _render_mhtml_preview_document(filename: str, output_path: str) -> str:
LOWarchivebox/misc/serve_static.py118def _format_direntry_timestamp(stat_result: os.stat_result) -> str:
LOWarchivebox/misc/serve_static.py166def _build_directory_zip_response(
LOWarchivebox/misc/serve_static.py256async def _stream_ranged_file_async(ranged_file: "RangedFileReader"):
1007 more matches not shown…
Decorative Section Separators179 hits · 540 pts
SeverityFileLineSnippet
MEDIUMold/TODO_fs_migrations.md301# =========================================================================
MEDIUMold/TODO_fs_migrations.md303# =========================================================================
MEDIUMold/TODO_fs_migrations.md460# =========================================================================
MEDIUMold/TODO_fs_migrations.md462# =========================================================================
MEDIUMold/TODO_fs_migrations.md79# =========================================================================
MEDIUMold/TODO_fs_migrations.md81# =========================================================================
MEDIUMold/TODO_fs_migrations.md145# =========================================================================
MEDIUMold/TODO_fs_migrations.md147# =========================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md467# =============================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md469# =============================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md532# =============================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md534# =============================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md574# =============================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md576# =============================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md619# =============================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md621# =============================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md636# =============================================================================
MEDIUMold/TODO_archivebox_jsonl_cli.md638# =============================================================================
MEDIUMarchivebox/hooks.py97# =============================================================================
MEDIUMarchivebox/hooks.py99# =============================================================================
MEDIUMarchivebox/hooks.py976# =============================================================================
MEDIUMarchivebox/hooks.py978# =============================================================================
MEDIUMarchivebox/hooks.py1073# =============================================================================
MEDIUMarchivebox/hooks.py1075# =============================================================================
MEDIUMarchivebox/core/models.py995 # =========================================================================
MEDIUMarchivebox/core/models.py997 # =========================================================================
MEDIUMarchivebox/core/models.py1794 # =========================================================================
MEDIUMarchivebox/core/models.py1796 # =========================================================================
MEDIUMarchivebox/core/models.py3121 # =========================================================================
MEDIUMarchivebox/core/models.py3123 # =========================================================================
MEDIUMarchivebox/core/models.py246 # =========================================================================
MEDIUMarchivebox/core/models.py248 # =========================================================================
MEDIUMarchivebox/core/models.py296 # =========================================================================
MEDIUMarchivebox/core/models.py298 # =========================================================================
MEDIUMarchivebox/core/models.py390 # =========================================================================
MEDIUMarchivebox/core/models.py392 # =========================================================================
MEDIUMarchivebox/core/models.py794 # =========================================================================
MEDIUMarchivebox/core/models.py796 # =========================================================================
MEDIUMarchivebox/core/models.py1055 # =========================================================================
MEDIUMarchivebox/core/models.py1057 # =========================================================================
MEDIUMarchivebox/core/models.py1339 # =========================================================================
MEDIUMarchivebox/core/models.py1341 # =========================================================================
MEDIUMarchivebox/core/models.py1896 # =========================================================================
MEDIUMarchivebox/core/models.py1898 # =========================================================================
MEDIUMarchivebox/core/models.py2683 # =========================================================================
MEDIUMarchivebox/core/models.py2685 # =========================================================================
MEDIUMarchivebox/core/models.py2744 # =========================================================================
MEDIUMarchivebox/core/models.py2746 # =========================================================================
MEDIUMarchivebox/core/models.py2796 # =========================================================================
MEDIUMarchivebox/core/models.py2798 # =========================================================================
MEDIUMarchivebox/core/models.py2992 # =========================================================================
MEDIUMarchivebox/core/models.py2994 # =========================================================================
MEDIUMarchivebox/core/models.py3195# =============================================================================
MEDIUMarchivebox/core/models.py3197# =============================================================================
MEDIUMarchivebox/core/models.py4348# =============================================================================
MEDIUMarchivebox/core/models.py4350# =============================================================================
MEDIUMarchivebox/tests/conftest.py63# =============================================================================
MEDIUMarchivebox/tests/conftest.py65# =============================================================================
MEDIUMarchivebox/tests/conftest.py129# =============================================================================
MEDIUMarchivebox/tests/conftest.py131# =============================================================================
119 more matches not shown…
Cross-Language Confusion69 hits · 415 pts
SeverityFileLineSnippet
HIGHarchivebox/misc/checks.py74 print(" docker compose pull && docker compose up -d", file=sys.stderr)
HIGHarchivebox/misc/hashing.py250# "num_subpaths": null,
HIGHarchivebox/core/widgets.py516 return window.updateHiddenField_id_config || null;
HIGHarchivebox/core/widgets.py516 return window.updateHiddenField_id_config || null;
HIGHarchivebox/core/widgets.py522 return null;
HIGHarchivebox/core/widgets.py526 return keyInput && keyInput.value.trim() === key;
HIGHarchivebox/core/widgets.py528 return matches.length ? matches[0] : null;
HIGHarchivebox/core/widgets.py535 return rows ? rows.lastElementChild : null;
HIGHarchivebox/core/widgets.py537 return null;
HIGHarchivebox/core/widgets.py602 if (!domain || seen[domain]) {{
HIGHarchivebox/core/widgets.py607 domains.push(domain);
HIGHarchivebox/core/widgets.py611 var hostAndPort = String(parsed.host || parsed.hostname || '').toLowerCase();
HIGHarchivebox/core/widgets.py613 if (!hostAndPort || seen[pathKey]) {{
HIGHarchivebox/core/widgets.py617 paths.push({{ host: hostAndPort, path: pathname }});
HIGHarchivebox/core/widgets.py118 onkeypress="if(event.key==='Enter' || event.keyCode===13 || event.key===' ' || event.code==='Space' |
HIGHarchivebox/core/widgets.py128 var autocompleteTimeout_{widget_id} = null;
HIGHarchivebox/core/widgets.py165 if (!(options && options.skipHiddenUpdate)) {{
HIGHarchivebox/core/widgets.py309 if (isEnter || isSpace || isComma) {{
HIGHarchivebox/core/widgets.py319 }} else if (event.key === 'Backspace' && !value && currentTags_{widget_id}.length > 0) {{
HIGHarchivebox/core/widgets.py333 if (!query || query.length < 1) {{
HIGHarchivebox/core/widgets.py455 if (!trimmed || trimmed.charAt(0) === '#') {{
HIGHarchivebox/core/widgets.py497 if (!pathname || pathname === '/') {{
HIGHarchivebox/core/widgets.py591 var seen = Object.create(null);
HIGHarchivebox/core/admin_snapshots.py1167 "this.dataset.fallbacks && this.dataset.fallbacks.length ? "
HIGHarchivebox/core/admin_snapshots.py1192 "this.dataset.fallbacks && this.dataset.fallbacks.length ? "
HIGHarchivebox/tests/test_server_security_browser.py32 finalUrl: null,
HIGHarchivebox/tests/test_server_security_browser.py33 status: null,
HIGHarchivebox/tests/test_server_security_browser.py34 error: null,
HIGHarchivebox/tests/test_server_security_browser.py43 result.status = response ? response.status() : null;
HIGHarchivebox/tests/test_server_security_browser.py89 consoleMessages.push({type: message.type(), text: message.text()});
HIGHarchivebox/tests/test_server_security_browser.py92 consoleMessages.push({type: "pageerror", text: String(error)});
HIGHarchivebox/tests/test_server_security_browser.py95 requestFailures.push({
HIGHarchivebox/tests/test_server_security_browser.py107 () => window.__dangerousScriptRan !== true || window.__probeResults !== undefined,
HIGHarchivebox/tests/test_server_security_browser.py107 () => window.__dangerousScriptRan !== true || window.__probeResults !== undefined,
HIGHarchivebox/tests/test_server_security_browser.py114 probeResults: window.__probeResults || null,
HIGHarchivebox/tests/test_server_security_browser.py114 probeResults: window.__probeResults || null,
HIGHarchivebox/tests/test_server_security_browser.py122 status: response ? response.status() : null,
HIGHarchivebox/tests/test_server_security_browser.py124 contentSecurityPolicy: response ? response.headers()["content-security-policy"] || null : null,
HIGHarchivebox/tests/test_server_security_browser.py125 archiveboxSecurityMode: response ? response.headers()["x-archivebox-security-mode"] || null : null,
HIGHarchivebox/tests/test_hooks.py9 sudo -u testuser bash -c 'source .venv/bin/activate && python -m pytest archivebox/tests/test_hooks.py -v'
HIGHarchivebox/crawls/admin.py317 var cookieValue = null;
HIGHarchivebox/base_models/admin.py154 if (!meta || meta.type === undefined || meta.type === null) {{
HIGHarchivebox/base_models/admin.py154 if (!meta || meta.type === undefined || meta.type === null) {{
HIGHarchivebox/base_models/admin.py162 return null;
HIGHarchivebox/base_models/admin.py208 if (meta.minimum !== undefined || meta.maximum !== undefined) {{
HIGHarchivebox/base_models/admin.py208 if (meta.minimum !== undefined || meta.maximum !== undefined) {{
HIGHarchivebox/base_models/admin.py210 if (meta.minimum !== undefined) bounds.push('min ' + meta.minimum);
HIGHarchivebox/base_models/admin.py210 if (meta.minimum !== undefined) bounds.push('min ' + meta.minimum);
HIGHarchivebox/base_models/admin.py211 if (meta.maximum !== undefined) bounds.push('max ' + meta.maximum);
HIGHarchivebox/base_models/admin.py211 if (meta.maximum !== undefined) bounds.push('max ' + meta.maximum);
HIGHarchivebox/base_models/admin.py341 return left === right || JSON.stringify(left) === JSON.stringify(right);
HIGHarchivebox/base_models/admin.py348 if (typeName && meta && meta.key && isRegexConfigKey_{widget_id}(meta.key)) {{
HIGHarchivebox/base_models/admin.py352 if (typeName === 'string' && meta && meta.key && meta.key.endsWith('_BINARY')) {{
HIGHarchivebox/base_models/admin.py190 return null;
HIGHarchivebox/base_models/admin.py252 if (!pattern || isSimpleFilterPattern_{widget_id}(pattern)) {{
HIGHarchivebox/base_models/admin.py259 return error && error.message ? error.message : 'Invalid regex';
HIGHarchivebox/base_models/admin.py272 if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {{
HIGHarchivebox/base_models/admin.py327 if (raw === 'null') return null;
HIGHarchivebox/base_models/admin.py372 if (meta.minimum !== undefined && numberValue < meta.minimum) {{
HIGHarchivebox/base_models/admin.py375 if (meta.maximum !== undefined && numberValue > meta.maximum) {{
9 more matches not shown…
Excessive Try-Catch Wrapping202 hits · 214 pts
SeverityFileLineSnippet
LOWbin/test_plugins.sh71except Exception:
MEDIUMold/TODO_fs_migrations.md252def is_valid_timestamp(ts):
LOWold/TODO_fs_migrations.md110 except Exception:
LOWold/TODO_fs_migrations.md673 except Exception as e:
LOWold/TODO_fs_migrations.md1153 except Exception:
LOWold/TODO_process_tracking.md1405 except Exception as e:
LOWold/TODO_hook_architecture.md145 except Exception:
LOWold/TODO_hook_architecture.md232 except Exception as e:
LOWold/TODO_hook_architecture.md1102 except Exception as e:
LOWdocs/conf.py48except Exception:
LOWarchivebox/hooks.py345 except Exception:
LOWarchivebox/hooks.py421 except Exception:
LOWarchivebox/hooks.py512 except Exception as e:
LOWarchivebox/hooks.py596 except Exception:
LOWarchivebox/hooks.py778 except Exception:
LOWarchivebox/hooks.py1148 except Exception as e:
LOWarchivebox/dead/util.py104 except Exception:
LOWarchivebox/dead/supervision_service.py24 except Exception:
LOWarchivebox/dead/archivebox_persona.py92 except Exception:
LOWarchivebox/dead/archivebox_persona.py124 except Exception as e:
LOWarchivebox/dead/folders.py46 except Exception:
LOWarchivebox/dead/process_plugin.py22except Exception as exc: # pragma: no cover - optional dependency
LOWarchivebox/dead/process_plugin.py27except Exception: # pragma: no cover - optional dependency
LOWarchivebox/dead/process_plugin.py285 except Exception:
LOWarchivebox/dead/process_plugin.py312 except Exception:
LOWarchivebox/dead/process_plugin.py315 except Exception:
LOWarchivebox/misc/db.py141 except Exception as err:
LOWarchivebox/misc/system.py82 except Exception:
LOWarchivebox/misc/util.py504 except Exception:
LOWarchivebox/misc/util.py509 except Exception:
LOWarchivebox/misc/util.py514 except Exception:
LOWarchivebox/misc/checks.py229 except Exception:
LOWarchivebox/misc/checks.py234 except Exception:
LOWarchivebox/misc/checks.py271 except Exception as e:
LOWarchivebox/misc/checks.py324 except Exception as e:
LOWarchivebox/misc/paginators.py75 except Exception:
LOWarchivebox/misc/logging_util.py94 except Exception:
LOWarchivebox/misc/logging_util.py362 except Exception:
LOWarchivebox/misc/toml_util.py32 except Exception:
LOWarchivebox/misc/toml_util.py37 except Exception:
LOWarchivebox/misc/toml_util.py101 except Exception:
LOWarchivebox/misc/serve_static.py49 except Exception:
LOWarchivebox/misc/serve_static.py490 except Exception:
LOWarchivebox/misc/serve_static.py668 except Exception:
LOWarchivebox/misc/serve_static.py831 except Exception:
LOWarchivebox/misc/serve_static.py859 except Exception:
LOWarchivebox/misc/serve_static.py892 except Exception:
LOWarchivebox/misc/serve_static.py945 except Exception:
LOWarchivebox/core/admin_archiveresults.py127 except Exception:
LOWarchivebox/core/models.py990 except Exception as e:
LOWarchivebox/core/models.py195 except Exception:
LOWarchivebox/core/models.py976 except Exception as e:
LOWarchivebox/core/models.py1026 except Exception:
LOWarchivebox/core/models.py1819 except Exception:
LOWarchivebox/core/models.py1844 except Exception:
LOWarchivebox/core/models.py1880 except Exception:
LOWarchivebox/core/models.py2066 except Exception:
LOWarchivebox/core/models.py2223 except Exception:
LOWarchivebox/core/models.py2240 except Exception:
LOWarchivebox/core/models.py4209 except Exception:
142 more matches not shown…
Deep Nesting123 hits · 96 pts
SeverityFileLineSnippet
LOWarchivebox/hooks.py282
LOWarchivebox/hooks.py558
LOWarchivebox/hooks.py702
LOWarchivebox/hooks.py860
LOWarchivebox/hooks.py1078
LOWarchivebox/dead/legacy.py62
LOWarchivebox/dead/util.py72
LOWarchivebox/dead/supervisord_util.py17
LOWarchivebox/dead/archivebox_persona.py44
LOWarchivebox/dead/folders.py22
LOWarchivebox/misc/db.py103
LOWarchivebox/misc/system.py20
LOWarchivebox/misc/system.py89
LOWarchivebox/misc/system.py127
LOWarchivebox/misc/util.py477
LOWarchivebox/misc/logging_util.py377
LOWarchivebox/misc/logging_util.py556
LOWarchivebox/misc/jsonl.py139
LOWarchivebox/misc/hashing.py153
LOWarchivebox/misc/serve_static.py482
LOWarchivebox/misc/serve_static.py711
LOWarchivebox/core/admin_archiveresults.py80
LOWarchivebox/core/models.py151
LOWarchivebox/core/models.py1000
LOWarchivebox/core/models.py1060
LOWarchivebox/core/models.py1160
LOWarchivebox/core/models.py1343
LOWarchivebox/core/models.py1441
LOWarchivebox/core/models.py1647
LOWarchivebox/core/models.py1688
LOWarchivebox/core/models.py1850
LOWarchivebox/core/models.py2059
LOWarchivebox/core/models.py2117
LOWarchivebox/core/models.py2361
LOWarchivebox/core/models.py2817
LOWarchivebox/core/models.py4014
LOWarchivebox/core/tag_utils.py58
LOWarchivebox/core/widgets.py51
LOWarchivebox/core/widgets.py670
LOWarchivebox/core/forms.py351
LOWarchivebox/core/admin_site.py75
LOWarchivebox/core/middleware.py69
LOWarchivebox/core/middleware.py143
LOWarchivebox/core/middleware.py73
LOWarchivebox/core/middleware.py148
LOWarchivebox/core/views.py1407
LOWarchivebox/core/views.py336
LOWarchivebox/core/views.py528
LOWarchivebox/core/views.py1446
LOWarchivebox/core/views.py1483
LOWarchivebox/core/templatetags/core_tags.py114
LOWarchivebox/core/sqlite_backend/base.py31
LOWarchivebox/config/django.py34
LOWarchivebox/config/paths.py42
LOWarchivebox/config/common.py403
LOWarchivebox/config/collection.py10
LOWarchivebox/config/views.py311
LOWarchivebox/config/views.py374
LOWarchivebox/tests/conftest.py271
LOWarchivebox/tests/test_search_backends_e2e.py9
63 more matches not shown…
Unused Imports64 hits · 63 pts
SeverityFileLineSnippet
LOWarchivebox/__main__.py6
LOWarchivebox/dead/live_ui.py2
LOWarchivebox/misc/shell_welcome_message.py6
LOWarchivebox/misc/shell_welcome_message.py7
LOWarchivebox/misc/shell_welcome_message.py8
LOWarchivebox/misc/shell_welcome_message.py9
LOWarchivebox/misc/shell_welcome_message.py10
LOWarchivebox/misc/shell_welcome_message.py11
LOWarchivebox/misc/shell_welcome_message.py12
LOWarchivebox/misc/shell_welcome_message.py13
LOWarchivebox/misc/shell_welcome_message.py14
LOWarchivebox/misc/shell_welcome_message.py16
LOWarchivebox/misc/shell_welcome_message.py17
LOWarchivebox/misc/shell_welcome_message.py17
LOWarchivebox/misc/shell_welcome_message.py19
LOWarchivebox/misc/shell_welcome_message.py20
LOWarchivebox/misc/shell_welcome_message.py22
LOWarchivebox/core/host_utils.py1
LOWarchivebox/core/models.py62
LOWarchivebox/core/tag_utils.py1
LOWarchivebox/core/apps.py26
LOWarchivebox/core/shutdown_util.py1
LOWarchivebox/core/admin_site.py17
LOWarchivebox/core/admin_site.py18
LOWarchivebox/core/admin_site.py19
LOWarchivebox/core/admin_site.py19
LOWarchivebox/core/admin_site.py21
LOWarchivebox/core/settings.py537
LOWarchivebox/core/settings.py571
LOWarchivebox/core/permissions.py1
LOWarchivebox/core/recovery_util.py1
LOWarchivebox/core/wsgi.py10
LOWarchivebox/core/sqlite_backend/base.py1
LOWarchivebox/config/django.py47
LOWarchivebox/config/paths.py17
LOWarchivebox/tests/test_server_security_browser.py4
LOWarchivebox/crawls/schedule_utils.py1
LOWarchivebox/crawls/apps.py15
LOWarchivebox/cli/archivebox_update.py2
LOWarchivebox/cli/archivebox_add.py2
LOWarchivebox/search/sonic_daemon.py1
LOWarchivebox/personas/importers.py8
LOWarchivebox/machine/models.py1
LOWarchivebox/machine/apps.py19
LOWarchivebox/services/binary_service.py1
LOWarchivebox/services/runner.py1
LOWarchivebox/services/process_service.py1
LOWarchivebox/services/crawl_service.py1
LOWarchivebox/services/tag_service.py1
LOWarchivebox/services/__init__.py1
LOWarchivebox/services/__init__.py2
LOWarchivebox/services/__init__.py3
LOWarchivebox/services/__init__.py4
LOWarchivebox/services/__init__.py5
LOWarchivebox/services/__init__.py6
LOWarchivebox/services/__init__.py6
LOWarchivebox/services/__init__.py6
LOWarchivebox/services/__init__.py6
LOWarchivebox/services/__init__.py7
LOWarchivebox/services/__init__.py8
4 more matches not shown…
Over-Commented Block69 hits · 60 pts
SeverityFileLineSnippet
LOWdocker-compose.yml1# Usage:
LOWdocker-compose.yml21 # - ADMIN_USERNAME=admin # creates an admin user on first run with the given user/pass combo
LOWdocker-compose.yml41 # - dns
LOWdocker-compose.yml61 ### This optional container runs xvfb+noVNC so you can watch the ArchiveBox browser as it archives things,
LOWdocker-compose.yml81 # You can also any other ingress provider for SSL like Apache, Caddy, Traefik, Cloudflare Tunnels, etc.
LOWdocker-compose.yml101 # environment:
LOWdocker-compose.yml121 # cap_add:
LOWdocker-compose.yml141 ### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
LOWdocker-compose.yml161# - subnet: 172.20.0.0/24
LOWdocker-compose.yml181# remote: 'examplegdrive:archivebox'
LOWbin/setup.sh1#!/usr/bin/env bash
LOWbin/export_browser_history.sh1#!/usr/bin/env bash
LOWbin/docker_entrypoint.sh1#!/bin/bash
LOWbin/docker_entrypoint.sh281 # e.g. "docker run archivebox archivebox init:
LOWbin/test_plugins.sh1#!/bin/bash
LOWold/archivebox.ts81 // 'https://browserleaks.com/canvas',
LOWold/archivebox.ts101 // 'https://docker-compose.archivebox.io',
LOWold/archivebox.ts121 // 'https://facebook.com/815781663692514/?comment_id=924451748966499',
LOWold/archivebox.ts221 isLandscape: false,
LOWold/archivebox.ts261 // {
LOWold/archivebox.ts321// if (snap_id.startsWith('.')) continue
LOWold/archivebox.ts341// if (fs.existsSync(dest_path)) {
LOWold/archivebox.ts361
LOWold/archivebox.ts481 {webstore_id: 'ifibfemgeogfhoebkmokieepdoobkbpo', name: 'twocaptcha'}, // https://2captcha.com/blog/
LOWold/archivebox.ts661 '--autoplay-policy=no-user-gesture-required', // auto-start videos so they trigger network requests + show up i
LOWold/archivebox.ts1761 if (snapshot_entry.sha256 == version_entry.sha256) {
LOWold/archivebox.ts1781 // mv ./data/archive/<snap_id>/example.txt -> ./data/archive/<snap_id>/versions/<version_id>/example.txt
LOWold/archivebox.ts2101 // ffmpeg_Path: '<path of ffmpeg_path>' || null,
LOWold/archivebox.ts2421 // await page.setCookie(...cookies)
LOWold/archivebox.ts3121 const recorder = page_state.recorder
LOWold/archivebox.ts3241 // const screenshot_jpg = SCREENSHOT_JPG_PATH(page)
LOWold/archivebox.ts3261 // cropped_bytes[idx + 1] = this.data[idx + 1];
LOWold/archivebox.ts3501// spawn(
LOWold/archivebox.ts4541
LOWold/archivebox.ts5341
LOWold/archivebox.ts5361 // mimeType: 'inode/directory'
LOWold/archivebox.ts5461// {
LOWold/archivebox.ts5501 // example: detectFilename({url: 'https://example.com/favicon.png', extension: 'ico'}) outputs 'favicon.ico'
LOWold/archivebox.ts6041
LOWold/archivebox.ts6061// video: true,
LOWdocs/.readthedocs.yaml1# Read the Docs configuration file for ArchiveBox docs
LOW.github/workflows/codeql.yml1# For most projects, this workflow file will not need changing; you simply need
LOW.github/workflows/codeql.yml61 # Initializes the CodeQL tools for scanning.
LOWarchivebox/hooks.py981# Templates are discovered by filename convention inside each plugin's templates/ dir:
LOWarchivebox/dead/django.py1# ruff: noqa
LOWarchivebox/dead/views.py1# ruff: noqa
LOWarchivebox/misc/paginators.py81
LOWarchivebox/misc/hashing.py241# "num_bytes": 214677,
LOWarchivebox/core/models.py781 # log_worker_event(
LOWarchivebox/core/models.py3601 if self.delete_at is not None and update_fields is not None:
LOWarchivebox/core/asgi.py21# from channels.security.websocket import AllowedHostsOriginValidator
LOWarchivebox/core/admin_snapshots.py941 html = Template("""{{bookmarked_date}} (<code>{{timestamp}}</code>)""")
LOWarchivebox/core/settings.py181 # str(plugin_dir / 'static')
LOWarchivebox/core/settings.py261 "default": {
LOWarchivebox/core/settings.py281
LOWarchivebox/core/settings.py301# return 'default'
LOWarchivebox/core/settings.py341 # "OPTIONS": {
LOWarchivebox/core/urls.py101# path('/accounts', django.contrib.auth.urls)
LOWarchivebox/core/urls.py121# future, just an idea:
LOWarchivebox/config/version.py81# # when its most relevant, e.g. when the user runs a long-running command
9 more matches not shown…
Hallucination Indicators6 hits · 60 pts
SeverityFileLineSnippet
CRITICALarchivebox/core/admin_archiveresults.py17from django.core.exceptions import ValidationError
CRITICALarchivebox/crawls/models.py18from django.core.exceptions import ValidationError
CRITICALarchivebox/api/v1_core.py17from django.core.exceptions import ValidationError
CRITICALarchivebox/templates/static/jquery-3.7.1.slim.min.js2!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):f
CRITICALarchivebox/templates/static/jquery.min.js2!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):f
CRITICALarchivebox/services/snapshot_service.py7from django.core.exceptions import ValidationError
Redundant / Tautological Comments31 hits · 42 pts
SeverityFileLineSnippet
LOWbin/kill_chrome.sh50 # Check if still alive
LOWbin/kill_chrome.sh59 # Check if it's a zombie/uninterruptible process BEFORE trying to kill
LOWbin/build_docker.sh93# Check if docker is ready for cross-plaform builds, if not, recreate builder
LOWbin/docker_entrypoint.sh100# Check if user attempted to run it in the root of their home folder or hard drive (common mistake)
LOWbin/test_plugins.sh264 # Check if there are any Python test files
LOWarchivebox/hooks.py263 # Check if this is a plugin directory (not the root plugins dir)
LOWarchivebox/hooks.py270 # Check if plugin is enabled
LOWarchivebox/hooks.py897 # Check if PLUGINS whitelist is specified (e.g., --plugins=wget,favicon)
LOWarchivebox/hooks.py1118 # Check if discovered snapshot exceeds crawl max_depth
LOWarchivebox/misc/jsonl.py147 # Check if it's a file path
LOWarchivebox/core/models.py2593 # Check if any ARs are still pending/started
LOWarchivebox/core/models.py2634 # Check if sealed
LOWarchivebox/core/views.py2411 # Check if it's from archivebox.config.file
LOWarchivebox/core/views.py2170 # Check if crawl can start (for debugging stuck crawls)
LOWarchivebox/core/views.py2195 # Check if retry_at is in the future (would prevent worker from claiming)
LOWarchivebox/mcp/server.py162 # Check if this is a positional Argument (not an Option)
LOWarchivebox/crawls/models.py440 # Check if crawl already exists by ID
LOWarchivebox/crawls/models.py834 # Check if already in urls (parse existing JSONL entries)
LOWarchivebox/crawls/models.py1390 # Check if any snapshots exist for this crawl
LOWarchivebox/crawls/models.py1553 # Set retry_at to near future so tick() can poll and check is_finished()
LOWarchivebox/cli/archivebox_update.py579 # Check if needs migration (0.8.x → 0.9.x)
LOWarchivebox/cli/archivebox_extract.py336 # Output results as JSONL (when piped) or human-readable (when TTY)
LOWarchivebox/cli/archivebox_extract.py393 # Check if input looks like existing ArchiveResult IDs to process
LOWarchivebox/cli/archivebox_version.py109 # Check if LDAP is enabled (simple config lookup)
LOWarchivebox/cli/archivebox_persona.py265 # Check if resolved_path is a child of personas_dir
LOWarchivebox/machine/models.py827 # Check if it's already pointing to the right place
LOWarchivebox/machine/models.py1607 # Check if too old (PID definitely reused)
LOWarchivebox/machine/models.py1611 # Check if OS process still exists with matching start time
LOWarchivebox/machine/models.py2315 # Check if process exited
LOWarchivebox/machine/models.py2608 # Check if installation succeeded by looking at updated status
LOWarchivebox/workers/supervisord_util.py1047 # Check if the monitored process has exited
Self-Referential Comments14 hits · 40 pts
SeverityFileLineSnippet
MEDIUMbin/build_brew.sh21# Create a temporary virtualenv for generating the formula
MEDIUMpkg/debian/install.sh31# Create the virtualenv if it doesn't exist
MEDIUMarchivebox/hooks.py350 # Create a failed Process record for hooks that don't exist
MEDIUMarchivebox/hooks.py513 # Create a failed Process record for exceptions
MEDIUMarchivebox/misc/logging_util.py650 # Create a Rich Text object for proper formatting
MEDIUMarchivebox/core/templatetags/core_tags.py416 # Create a mini template and render it with context
MEDIUMarchivebox/tests/test_cli_status.py133 # Create an orphaned directory
MEDIUMarchivebox/tests/test_cli_crawl.py192 # Create a crawl
MEDIUMarchivebox/tests/test_cli_add.py174 # Create a file with URLs
MEDIUMarchivebox/tests/migrations_helpers.py572 # Create a user
MEDIUMarchivebox/tests/migrations_helpers.py739 # Create a user
MEDIUMarchivebox/tests/test_cli_archiveresult.py34 # Create a snapshot first
MEDIUMarchivebox/cli/archivebox_update.py567 # Create a new crawl (created_by will default to system user)
MEDIUMarchivebox/cli/archivebox_persona.py15 # Create a new persona
Docstring Block Structure5 hits · 25 pts
SeverityFileLineSnippet
HIGHold/TODO_process_tracking.md711 Wait for process to exit, polling periodically. Args: timeout: Max seconds to wait (None =
HIGHarchivebox/hooks.py103 Check if a hook is a background hook (doesn't block foreground progression). Background hooks have '.bg.' in t
HIGHarchivebox/hooks.py187 Find all hook scripts for an event family. Searches both built-in and user plugin directories. Filters out
HIGHarchivebox/hooks.py861 Extract special config keys for a plugin following naming conventions. ArchiveBox recognizes 3 special config
HIGHarchivebox/machine/models.py2165 Wait for process to exit, polling periodically. Args: timeout: Max seconds to wait (None =
AI Slop Vocabulary6 hits · 10 pts
SeverityFileLineSnippet
MEDIUMbin/fuzz_test.sh4# This is intentionally not a test harness: it does not assert success/failure.
LOWarchivebox/misc/util.py428 # if response is non-test (e.g. image or other binary files), just return the filename instead
MEDIUMarchivebox/core/models.py4082 # these properties provide seamless access to Process data through ArchiveResult
LOWarchivebox/core/views.py1151 # We can't just call request.build_absolute_uri in the template, because it would include query parameters
LOWarchivebox/cli/archivebox_archiveresult.py115 # If no id, we could create it, but for now just pass through
LOWarchivebox/cli/archivebox_crawl.py93 # Handle existing Crawl records (just pass through with id)
Slop Phrases5 hits · 8 pts
SeverityFileLineSnippet
LOWpyproject.toml94 # to use sonic make sure you have a sonic server running in docker (archivebox/sonic) or locally:
MEDIUM.github/workflows/codeql.yml55 # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
LOWarchivebox/core/admin_snapshots.py1645 f"Successfully deleted {total} Snapshots ({deleted_count} total objects including related records). Don'
LOWarchivebox/config/collection.py22 # If you modify this file manually, make sure to update your archive after by running:
LOWarchivebox/cli/archivebox_install.py78 stderr("\n[+] Don't forget to create a new admin user for the Web UI...", color="green")
Example Usage Blocks4 hits · 6 pts
SeverityFileLineSnippet
LOWdocker-compose.yml1# Usage:
LOWbin/kill_chrome.sh5# Usage:
LOWbin/export_browser_history.sh4# Usage:
LOWarchivebox/machine/detect.py282# Example usage
Fake / Example Data4 hits · 4 pts
SeverityFileLineSnippet
LOWarchivebox/tests/test_server_security_browser.py211 defaults={"email": "admin@example.com", "is_staff": True, "is_superuser": True},
LOWarchivebox/tests/migrations_helpers.py577 'admin@example.com', 1, 1, datetime('now'))
LOWarchivebox/tests/migrations_helpers.py744 'admin@example.com', 1, 1, datetime('now'))
LOWarchivebox/tests/test_urls.py84 defaults={"email": "admin@example.com", "is_staff": True, "is_superuser": True},
Verbosity Indicators3 hits · 2 pts
SeverityFileLineSnippet
LOWarchivebox/machine/models.py2222 # Step 1: Send SIGTERM for graceful shutdown
LOWarchivebox/machine/models.py2225 # Step 2: Wait for graceful exit
LOWarchivebox/machine/models.py2238 # Step 3: Force kill with SIGKILL
Overly Generic Function Names1 hit · 1 pts
SeverityFileLineSnippet
LOWarchivebox/mcp/server.py311 def handle_request(self, request: dict) -> dict: