Apache Spark - A unified analytics engine for large-scale data processing
8688 matches across 17 categories. Click a row to expand file-level details.
| Severity | File | Line | Snippet |
|---|---|---|---|
| CRITICAL | …rg/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala | 115 | case e: Throwable if org.apache.commons.lang3.exception.ExceptionUtils.indexOfThrowable( |
| CRITICAL | …org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala | 49 | assert(sparkPod.pod.getSpec.getContainers.asScala.toList.map(_.getName) == List("first")) |
| CRITICAL | …org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala | 56 | assert(sparkPod.pod.getSpec.getContainers.asScala.toList.map(_.getName) == List("second")) |
| CRITICAL | …org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala | 63 | assert(sparkPod.pod.getSpec.getContainers.asScala.toList.map(_.getName) == List("second")) |
| CRITICAL | …k/scheduler/cluster/k8s/DeploymentAllocatorSuite.scala | 134 | assert(deployment.getSpec.getTemplate.getSpec.getContainers.asScala.exists( |
| CRITICAL | …heduler/cluster/k8s/ExecutorPVCResizePluginSuite.scala | 187 | captor.getValue.getSpec.getResources.getRequests.get("storage")).longValue() |
| CRITICAL | …k/scheduler/cluster/k8s/StatefulSetPodsAllocator.scala | 171 | val statefulSet = new io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder() |
| CRITICAL | …rverExpectations/stage_with_summaries_expectation.json | 5 | "details" : "org.apache.spark.sql.Dataset.foreach(Dataset.scala:2862)\n$line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.< |
| CRITICAL | …ectations/stage_with_accumulable_json_expectation.json | 9 | "details" : "org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$ |
| CRITICAL | …ions/stage_list_with_accumulable_json_expectation.json | 9 | "details" : "org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$ |
| CRITICAL | …oryServerExpectations/stage_list_json_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$l |
| CRITICAL | …oryServerExpectations/stage_list_json_expectation.json | 87 | "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$l |
| CRITICAL | …oryServerExpectations/stage_list_json_expectation.json | 93 | "failureReason" : "Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost tas |
| CRITICAL | …oryServerExpectations/stage_list_json_expectation.json | 170 | "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$lin |
| CRITICAL | …oryServerExpectations/stage_list_json_expectation.json | 252 | "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$li |
| CRITICAL | …ctations/stage_list_with_peak_metrics_expectation.json | 5 | "details" : "org.apache.spark.sql.Dataset.foreach(Dataset.scala:2862)\n$line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.< |
| CRITICAL | …erExpectations/one_stage_attempt_json_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$lin |
| CRITICAL | …attempt_json_details_with_failed_task_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$lin |
| CRITICAL | …toryServerExpectations/one_stage_json_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$lin |
| CRITICAL | …Expectations/complete_stage_list_json_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:17)\n$l |
| CRITICAL | …Expectations/complete_stage_list_json_expectation.json | 87 | "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$lin |
| CRITICAL | …Expectations/complete_stage_list_json_expectation.json | 169 | "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15)\n$li |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 5 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 71 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 137 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 203 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 269 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 335 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 401 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 467 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 533 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …xpectations/stage_task_list_w__status_expectation.json | 599 | "errorMessage" : "java.lang.RuntimeException: bad exec\n\tat $line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1. |
| CRITICAL | …pectations/excludeOnFailure_for_stage_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:370)\n$line17.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<consol |
| CRITICAL | …pectations/excludeOnFailure_for_stage_expectation.json | 176 | "errorMessage" : "java.lang.RuntimeException: Bad executor\n\tat $line17.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$an |
| CRITICAL | …pectations/excludeOnFailure_for_stage_expectation.json | 441 | "errorMessage" : "java.lang.RuntimeException: Bad executor\n\tat $line17.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$an |
| CRITICAL | …ations/stage_with_speculation_summary_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.collect(RDD.scala:1029)\n$line17.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<c |
| CRITICAL | …rExpectations/stage_with_peak_metrics_expectation.json | 5 | "details" : "org.apache.spark.sql.Dataset.foreach(Dataset.scala:2862)\n$line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.< |
| CRITICAL | …ectations/one_stage_json_with_details_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:14)\n$lin |
| CRITICAL | …tions/one_stage_json_with_partitionId_expectation.json | 5 | "details" : "org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<in |
| CRITICAL | …erExpectations/failed_stage_list_json_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20)\n$l |
| CRITICAL | …erExpectations/failed_stage_list_json_expectation.json | 11 | "failureReason" : "Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost tas |
| CRITICAL | …tions/excludeOnFailure_node_for_stage_expectation.json | 5 | "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:370)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<consol |
| CRITICAL | …tions/excludeOnFailure_node_for_stage_expectation.json | 371 | "errorMessage" : "java.lang.RuntimeException: Bad executor\n\tat $line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$an |
| CRITICAL | …tions/excludeOnFailure_node_for_stage_expectation.json | 834 | "errorMessage" : "java.lang.RuntimeException: Bad executor\n\tat $line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$an |
| CRITICAL | …tions/excludeOnFailure_node_for_stage_expectation.json | 901 | "errorMessage" : "java.lang.RuntimeException: Bad executor\n\tat $line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$an |
| CRITICAL | …tions/excludeOnFailure_node_for_stage_expectation.json | 968 | "errorMessage" : "java.lang.RuntimeException: Bad executor\n\tat $line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$an |
| CRITICAL | …/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala | 220 | val e1 = "Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 i |
| CRITICAL | …he/spark/shuffle/sort/ShuffleExternalSorterSuite.scala | 108 | // at org.apache.spark.memory.TaskMemoryManager.getPage(TaskMemoryManager.java:384) |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 721 | val rootLogger = org.apache.logging.log4j.LogManager.getRootLogger() |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1749 | // at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1529) |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1754 | // ----> at org.apache.spark.util.UtilsSuite.callGetTryFromNested(UtilsSuite.scala:1626) <---- STITCHED. |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1762 | // at org.apache.spark.util.UtilsSuite.callDoTryNested(UtilsSuite.scala:1630) |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1766 | // at org.apache.spark.util.UtilsSuite.callDoTryNestedNested(UtilsSuite.scala:1654) |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1799 | // at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1529) |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1803 | // at org.apache.spark.util.UtilsSuite.callDoTry(UtilsSuite.scala:1534) |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1808 | // ----> at org.apache.spark.util.UtilsSuite.callGetTryFromNestedNested(UtilsSuite.scala:1650) <---- STITCHED. |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1816 | // at org.apache.spark.util.UtilsSuite.callDoTryNestedNested(UtilsSuite.scala:1654) |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1844 | // at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1529) |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1848 | // at org.apache.spark.util.UtilsSuite.callDoTry(UtilsSuite.scala:1534) |
| CRITICAL | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1852 | // at org.apache.spark.util.UtilsSuite.callDoTryNested(UtilsSuite.scala:1630) |
| 364 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | python/pyspark/mllib/clustering.py | 0 | get the cluster centers, represented as a list of numpy arrays. |
| HIGH | python/pyspark/mllib/clustering.py | 0 | get the cluster centers, represented as a list of numpy arrays. |
| HIGH | python/pyspark/ml/clustering.py | 0 | get the cluster centers, represented as a list of numpy arrays. |
| HIGH | python/pyspark/ml/clustering.py | 0 | get the cluster centers, represented as a list of numpy arrays. |
| HIGH | python/pyspark/tests/test_rdd.py | 0 | executes a job with the group ``job_group``. each job waits for 3 seconds and then exits. |
| HIGH | python/pyspark/tests/test_pin_thread.py | 0 | executes a job with the group ``job_group``. each job waits for 3 seconds and then exits. |
| HIGH | python/pyspark/sql/tests/test_job_cancellation.py | 0 | executes a job with the group ``job_group``. each job waits for 3 seconds and then exits. |
| HIGH | python/pyspark/tests/test_appsubmit.py | 0 | |from pyspark import sparkcontext |from mylib import myfunc | |sc = sparkcontext() |print(sc.parallelize([1, 2, 3]).map( |
| HIGH | python/pyspark/tests/test_appsubmit.py | 0 | |from pyspark import sparkcontext |from mylib import myfunc | |sc = sparkcontext() |print(sc.parallelize([1, 2, 3]).map( |
| HIGH | python/pyspark/tests/test_appsubmit.py | 0 | |from pyspark import sparkcontext |from mylib import myfunc | |sc = sparkcontext() |print(sc.parallelize([1, 2, 3]).map( |
| HIGH | python/pyspark/tests/test_appsubmit.py | 0 | |from pyspark import sparkcontext |from mylib import myfunc | |sc = sparkcontext() |print(sc.parallelize([1, 2, 3]).map( |
| HIGH | python/pyspark/pipelines/tests/test_cli.py | 0 | { "catalog": "test_catalog", "configuration": {}, "libraries": [] } |
| HIGH | python/pyspark/pipelines/tests/test_cli.py | 0 | { "catalog": "test_catalog", "configuration": {}, "libraries": [] } |
| HIGH | python/pyspark/pipelines/tests/test_cli.py | 0 | { "catalog": "test_catalog", "configuration": {}, "libraries": [] } |
| HIGH | python/pyspark/ml/tree.py | 0 | trees in this ensemble. warning: these have null parent estimators. |
| HIGH | python/pyspark/ml/regression.py | 0 | trees in this ensemble. warning: these have null parent estimators. |
| HIGH | python/pyspark/ml/regression.py | 0 | trees in this ensemble. warning: these have null parent estimators. |
| HIGH | python/pyspark/ml/classification.py | 0 | trees in this ensemble. warning: these have null parent estimators. |
| HIGH | python/pyspark/ml/classification.py | 0 | trees in this ensemble. warning: these have null parent estimators. |
| HIGH | python/pyspark/ml/wrapper.py | 0 | returns the number of features the model was trained on. if unknown, returns -1 |
| HIGH | python/pyspark/ml/regression.py | 0 | returns the number of features the model was trained on. if unknown, returns -1 |
| HIGH | python/pyspark/ml/base.py | 0 | returns the number of features the model was trained on. if unknown, returns -1 |
| HIGH | python/pyspark/ml/connect/base.py | 0 | returns the number of features the model was trained on. if unknown, returns -1 |
| HIGH | python/pyspark/ml/regression.py | 0 | sets the value of :py:attr:`minweightfractionpernode`. |
| HIGH | python/pyspark/ml/regression.py | 0 | sets the value of :py:attr:`minweightfractionpernode`. |
| HIGH | python/pyspark/ml/regression.py | 0 | sets the value of :py:attr:`minweightfractionpernode`. |
| HIGH | python/pyspark/ml/classification.py | 0 | sets the value of :py:attr:`minweightfractionpernode`. |
| HIGH | python/pyspark/ml/classification.py | 0 | sets the value of :py:attr:`minweightfractionpernode`. |
| HIGH | python/pyspark/ml/classification.py | 0 | sets the value of :py:attr:`minweightfractionpernode`. |
| HIGH | python/pyspark/ml/regression.py | 0 | sets the value of :py:attr:`featuresubsetstrategy`. |
| HIGH | python/pyspark/ml/regression.py | 0 | sets the value of :py:attr:`featuresubsetstrategy`. |
| HIGH | python/pyspark/ml/classification.py | 0 | sets the value of :py:attr:`featuresubsetstrategy`. |
| HIGH | python/pyspark/ml/classification.py | 0 | sets the value of :py:attr:`featuresubsetstrategy`. |
| HIGH | python/pyspark/ml/clustering.py | 0 | number of features, i.e., length of vectors which this transforms. |
| HIGH | python/pyspark/ml/clustering.py | 0 | number of features, i.e., length of vectors which this transforms. |
| HIGH | python/pyspark/ml/clustering.py | 0 | number of features, i.e., length of vectors which this transforms. |
| HIGH | python/pyspark/ml/feature.py | 0 | number of features, i.e., length of vectors which this transforms. |
| HIGH | python/pyspark/ml/classification.py | 0 | gets summary (accuracy/precision/recall, objective history, total iterations) of model trained on the training set. an e |
| HIGH | python/pyspark/ml/classification.py | 0 | gets summary (accuracy/precision/recall, objective history, total iterations) of model trained on the training set. an e |
| HIGH | python/pyspark/ml/classification.py | 0 | gets summary (accuracy/precision/recall, objective history, total iterations) of model trained on the training set. an e |
| HIGH | python/pyspark/ml/classification.py | 0 | evaluates the model on a test dataset. .. versionadded:: 3.1.0 parameters ---------- dataset : :py:class:`pyspark.sql.da |
| HIGH | python/pyspark/ml/classification.py | 0 | evaluates the model on a test dataset. .. versionadded:: 3.1.0 parameters ---------- dataset : :py:class:`pyspark.sql.da |
| HIGH | python/pyspark/ml/classification.py | 0 | evaluates the model on a test dataset. .. versionadded:: 3.1.0 parameters ---------- dataset : :py:class:`pyspark.sql.da |
| HIGH | python/pyspark/ml/classification.py | 0 | evaluates the model on a test dataset. .. versionadded:: 3.1.0 parameters ---------- dataset : :py:class:`pyspark.sql.da |
| HIGH | …thon/pyspark/ml/tests/connect/test_connect_function.py | 0 | these test cases exercise the interface to the proto plan generation but do not call spark. |
| HIGH | …hon/pyspark/sql/tests/connect/test_connect_function.py | 0 | these test cases exercise the interface to the proto plan generation but do not call spark. |
| HIGH | python/pyspark/sql/tests/connect/test_connect_plan.py | 0 | these test cases exercise the interface to the proto plan generation but do not call spark. |
| HIGH | python/pyspark/pandas/window.py | 0 | wraps a function that handles spark column in order to support it in both pandas-on-spark series and dataframe. note tha |
| HIGH | python/pyspark/pandas/window.py | 0 | wraps a function that handles spark column in order to support it in both pandas-on-spark series and dataframe. note tha |
| HIGH | python/pyspark/pandas/window.py | 0 | wraps a function that handles spark column in order to support it in both pandas-on-spark series and dataframe. note tha |
| HIGH | python/pyspark/pandas/series.py | 0 | same as `to_pandas()`, without issuing the advice log for internal usage. |
| HIGH | python/pyspark/pandas/frame.py | 0 | same as `to_pandas()`, without issuing the advice log for internal usage. |
| HIGH | python/pyspark/pandas/indexes/multi.py | 0 | same as `to_pandas()`, without issuing the advice log for internal usage. |
| HIGH | python/pyspark/pandas/indexes/base.py | 0 | same as `to_pandas()`, without issuing the advice log for internal usage. |
| HIGH | …pyspark/pandas/tests/data_type_ops/test_num_reverse.py | 0 | unit tests for arithmetic operations of numeric data types. a few test cases are disabled because pandas-on-spark return |
| HIGH | …hon/pyspark/pandas/tests/data_type_ops/test_num_ops.py | 0 | unit tests for arithmetic operations of numeric data types. a few test cases are disabled because pandas-on-spark return |
| HIGH | …park/pandas/tests/data_type_ops/test_num_arithmetic.py | 0 | unit tests for arithmetic operations of numeric data types. a few test cases are disabled because pandas-on-spark return |
| HIGH | python/pyspark/pandas/spark/accessors.py | 0 | spark related features. usually, the features here are missing in pandas but spark has it. |
| HIGH | python/pyspark/pandas/spark/accessors.py | 0 | spark related features. usually, the features here are missing in pandas but spark has it. |
| HIGH | python/pyspark/pandas/spark/accessors.py | 0 | spark related features. usually, the features here are missing in pandas but spark has it. |
| 602 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …la/org/apache/spark/deploy/yarn/YarnClusterSuite.scala | 67 | private def getOrCreatePyConnectDepChecker( |
| LOW | …apache/spark/network/shuffle/ShuffleTestAccessor.scala | 136 | def getOrCreateAppShufflePartitionInfo( |
| LOW | …scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 277 | private def getOrUpdateAllocatedHostToContainersMapForRPId( |
| LOW | …scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 283 | private def getOrUpdateRunningExecutorForRPId(rpId: Int): mutable.Set[String] = synchronized { |
| LOW | …scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 287 | private def getOrUpdateNumExecutorsStartingForRPId(rpId: Int): AtomicInteger = synchronized { |
| LOW | …scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 291 | private def getOrUpdateTargetNumExecutorsForRPId(rpId: Int): Int = synchronized { |
| LOW | …a/org/apache/spark/storage/DiskBlockManagerSuite.scala | 144 | private def getAndSetUmask(posix: POSIX, mask: String): String = { |
| LOW | …/resources/org/apache/spark/ui/static/executorspage.js | 329 | function reselectCheckboxesBasedOnTaskTableState() { |
| LOW | …resources/org/apache/spark/ui/static/streaming-page.js | 62 | function getMaxMarginLeftForTimeline() { |
| LOW | …resources/org/apache/spark/ui/static/streaming-page.js | 69 | function getOnClickTimelineFunction() { |
| LOW | …/resources/org/apache/spark/ui/static/timeline-view.js | 171 | function getStageIdAndAttemptForStageEntry(baseElem) { |
| LOW | …/resources/org/apache/spark/ui/static/timeline-view.js | 239 | function drawTaskAssignmentTimeline(groupArray, eventObjArray, minLaunchTime, maxFinishTime, offset) { |
| LOW | …main/resources/org/apache/spark/ui/static/stagepage.js | 120 | function getColumnNameForTaskMetricSummary(columnKey) { |
| LOW | …main/resources/org/apache/spark/ui/static/stagepage.js | 175 | function displayRowsForSummaryMetricsTable(row, type, columnIndex) { |
| LOW | …main/resources/org/apache/spark/ui/static/stagepage.js | 218 | function createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTable) { |
| LOW | …main/resources/org/apache/spark/ui/static/stagepage.js | 277 | function createRowMetadataForColumn(colKey, data, checkboxId) { |
| LOW | …main/resources/org/apache/spark/ui/static/stagepage.js | 287 | function reselectCheckboxesBasedOnTaskTableState() { |
| LOW | …esources/org/apache/spark/ui/static/environmentpage.js | 47 | function createRESTEndPointForEnvironmentPage(appId) { |
| LOW | …/resources/org/apache/spark/ui/static/spark-dag-viz.js | 232 | function getMaxChildWidthAndPaddingTop(g, v, svg) { |
| LOW | …src/main/resources/org/apache/spark/ui/static/table.js | 52 | function expandAllThreadStackTrace(toggleButton) { |
| LOW | …src/main/resources/org/apache/spark/ui/static/table.js | 66 | function collapseAllThreadStackTrace(toggleButton) { |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 1081 | */function injectEdgeLabelProxies(g){_.forEach(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 1325 | */function findSmallestWidthAlignment(g,xss){return _.minBy(_.values(xss),function(xs){var max=Number.NEGATIVE_INFINITY |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 360 | function cartesianNormalizeInPlace(d){var l=sqrt(d[0]*d[0]+d[1]*d[1]+d[2]*d[2]);d[0]/=l,d[1]/=l,d[2]/=l}var lambda0$1,ph |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 439 | }}}function clipAntimeridianIntersect(lambda0,phi0,lambda1,phi1){var cosPhi0,cosPhi1,sinLambda0Lambda1=sin(lambda0-lambd |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 891 | percentRe=/^%/,requoteRe=/[\\^$*+?|[\]().{}]/g;function pad(value,fill,width){var sign=value<0?"-":"",string=(sign?-valu |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 1298 | scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.forEach(layer,function(v,i){var w=findOtherInnerSegm |
| LOW | …src/main/resources/org/apache/spark/ui/static/utils.js | 188 | function createRESTEndPointForExecutorsPage(appId) { |
| LOW | …src/main/resources/org/apache/spark/ui/static/utils.js | 211 | function createRESTEndPointForMiscellaneousProcess(appId) { |
| LOW | core/src/main/scala/org/apache/spark/SparkContext.scala | 3070 | def getOrCreate(config: SparkConf): SparkContext = { |
| LOW | core/src/main/scala/org/apache/spark/SparkContext.scala | 3094 | def getOrCreate(): SparkContext = { |
| LOW | core/src/main/scala/org/apache/spark/util/Utils.scala | 775 | private[spark] def getOrCreateLocalRootDirs(conf: ReadOnlySparkConf): Array[String] = { |
| LOW | core/src/main/scala/org/apache/spark/util/Utils.scala | 810 | private def getOrCreateLocalRootDirsImpl(conf: ReadOnlySparkConf): Array[String] = { |
| LOW | …cala/org/apache/spark/util/UninterruptibleThread.scala | 61 | def getAndSetUninterruptible(value: Boolean): Boolean = synchronized { |
| LOW | …c/main/scala/org/apache/spark/util/AccumulatorV2.scala | 486 | private def getOrCreate = { |
| LOW | …a/org/apache/spark/deploy/master/ApplicationInfo.scala | 82 | private[deploy] def getOrUpdateExecutorsForRPId(rpId: Int): mutable.Set[Int] = { |
| LOW | …in/scala/org/apache/spark/scheduler/DAGScheduler.scala | 528 | private def getOrCreateShuffleMapStage( |
| LOW | …in/scala/org/apache/spark/scheduler/DAGScheduler.scala | 728 | private def getOrCreateParentStages(shuffleDeps: HashSet[ShuffleDependency[_, _, _]], |
| LOW | …/scala/org/apache/spark/status/AppStatusListener.scala | 1144 | private def getOrCreateExecutor(executorId: String, addTime: Long): LiveExecutor = { |
| LOW | …/scala/org/apache/spark/status/AppStatusListener.scala | 1151 | private def getOrCreateOtherProcess(processId: String, |
| LOW | …/scala/org/apache/spark/status/AppStatusListener.scala | 1211 | private def getOrCreateStage(info: StageInfo): LiveStage = { |
| LOW | core/src/main/scala/org/apache/spark/rdd/RDD.scala | 369 | private[spark] def computeOrReadCheckpoint(split: Partition, context: TaskContext): Iterator[T] = |
| LOW | core/src/main/scala/org/apache/spark/rdd/RDD.scala | 381 | private[spark] def getOrCompute(partition: Partition, context: TaskContext): Iterator[T] = { |
| LOW | …main/scala/org/apache/spark/storage/BlockManager.scala | 1409 | def getOrElseUpdateRDDBlock[T]( |
| LOW | …main/scala/org/apache/spark/storage/BlockManager.scala | 1432 | private def getOrElseUpdate[T]( |
| LOW | …g/apache/spark/api/python/PythonWorkerLogCapture.scala | 97 | private def getOrCreateLogWriter(workerId: String): (RollingLogWriter, AtomicLong) = { |
| LOW | …in/scala/org/apache/spark/resource/ResourceUtils.scala | 323 | def getOrDiscoverAllResources( |
| LOW | …in/scala/org/apache/spark/resource/ResourceUtils.scala | 356 | def getOrDiscoverAllResourcesForResourceProfile( |
| LOW | …/scala/org/apache/spark/resource/ResourceProfile.scala | 376 | private[spark] def getOrCreateDefaultProfile(conf: SparkConf): ResourceProfile = { |
| LOW | python/run-tests.py | 236 | def run_individual_python_test(target_dir, test_name, pyspark_python, keep_test_output): |
| LOW | python/run-tests.py | 398 | def get_default_python_executables(): |
| LOW | python/pyspark/worker.py | 145 | def use_legacy_pandas_udf_conversion(self) -> bool: |
| LOW | python/pyspark/worker.py | 152 | def use_legacy_pandas_udtf_conversion(self) -> bool: |
| LOW | python/pyspark/worker.py | 167 | def int_to_decimal_coercion_enabled(self) -> bool: |
| LOW | python/pyspark/worker.py | 185 | def arrow_max_records_per_batch(self) -> int: |
| LOW | python/pyspark/worker.py | 189 | def arrow_max_bytes_per_batch(self) -> int: |
| LOW | python/pyspark/worker.py | 349 | def verify_iterator_exhausted(iterator: Iterator, error_class: str) -> None: |
| LOW | python/pyspark/worker.py | 405 | def wrap_pandas_batch_iter_udf(f, return_type, runner_conf): |
| LOW | python/pyspark/worker.py | 497 | def wrap_cogrouped_map_pandas_udf(f, return_type, argspec, runner_conf): |
| LOW | python/pyspark/worker.py | 568 | def wrap_grouped_transform_with_state_pandas_udf(f, return_type, runner_conf): |
| 2865 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | .asf.yaml | 1 | # Licensed to the Apache Software Foundation (ASF) under one or more |
| LOW | .pre-commit-config.yaml | 1 | # |
| LOW | pyproject.toml | 1 | # |
| LOW | …rg/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala | 121 | // There's a race in MiniYARNCluster in which start() may return before the RM has updated |
| LOW | …scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 321 | ResourceProfile.getResourcesForClusterManager(rp.id, rp.executorResources, |
| LOW | …c/main/scala/org/apache/spark/deploy/yarn/Client.scala | 801 | // conf archive will be handled by the AM differently so that we avoid having to send |
| LOW | …/kubernetes/docker/src/main/dockerfiles/spark/decom.sh | 1 | #!/usr/bin/env bash |
| LOW | …rnetes/docker/src/main/dockerfiles/spark/entrypoint.sh | 1 | #!/usr/bin/env bash |
| LOW | …s/core/src/test/resources/driver-podgroup-template.yml | 1 | # |
| LOW | …er/cluster/k8s/KubernetesClusterSchedulerBackend.scala | 301 | running.delete() |
| LOW | …/kubernetes/integration-tests/tests/pyfiles_connect.py | 1 | # |
| LOW | …/kubernetes/integration-tests/tests/decommissioning.py | 1 | # |
| LOW | …managers/kubernetes/integration-tests/tests/pyfiles.py | 1 | # |
| LOW | …ernetes/integration-tests/tests/worker_memory_check.py | 1 | # |
| LOW | …ernetes/integration-tests/tests/py_container_checks.py | 1 | # |
| LOW | …tes/integration-tests/tests/decommissioning_cleanup.py | 1 | # |
| LOW | …tes/integration-tests/tests/python_executable_check.py | 1 | # |
| LOW | …nagers/kubernetes/integration-tests/tests/autoscale.py | 1 | # |
| LOW | …ntegration-tests/scripts/setup-integration-test-env.sh | 1 | #!/usr/bin/env bash |
| LOW | …agers/kubernetes/integration-tests/dev/spark-rbac.yaml | 1 | # |
| LOW | …tes/integration-tests/dev/dev-run-integration-tests.sh | 1 | #!/usr/bin/env bash |
| LOW | …tegration-tests/src/test/resources/driver-template.yml | 1 | # |
| LOW | …gration-tests/src/test/resources/executor-template.yml | 1 | # |
| LOW | …-tests/src/test/resources/driver-schedule-template.yml | 1 | # |
| LOW | …st/resources/volcano/high-priority-driver-template.yml | 1 | # |
| LOW | …rces/volcano/low-priority-driver-podgroup-template.yml | 1 | # |
| LOW | …sources/volcano/driver-podgroup-template-memory-3g.yml | 1 | # |
| LOW | …/resources/volcano/queue0-driver-podgroup-template.yml | 1 | # |
| LOW | …n-tests/src/test/resources/volcano/priorityClasses.yml | 1 | # |
| LOW | …/resources/volcano/queue1-driver-podgroup-template.yml | 1 | # |
| LOW | …/resources/volcano/medium-priority-driver-template.yml | 1 | # |
| LOW | …t/resources/volcano/queue-driver-podgroup-template.yml | 1 | # |
| LOW | …s/volcano/medium-priority-driver-podgroup-template.yml | 1 | # |
| LOW | …est/resources/volcano/low-priority-driver-template.yml | 1 | # |
| LOW | …ces/volcano/high-priority-driver-podgroup-template.yml | 1 | # |
| LOW | …/org/apache/spark/launcher/AbstractCommandBuilder.java | 201 | if (isBeeLine && "1".equals(getenv("SPARK_CONNECT_BEELINE")) && |
| LOW | …/org/apache/spark/launcher/AbstractCommandBuilder.java | 321 | return scala; |
| LOW | …he/spark/shuffle/sort/ShuffleExternalSorterSuite.scala | 101 | // may happen. Here are some examples we have seen: |
| LOW | …t/scala/org/apache/spark/util/SizeEstimatorSuite.scala | 301 | // objectSize=8, fields=12 => shellSize=20, aligned to 24 |
| LOW | …t/scala/org/apache/spark/util/SizeEstimatorSuite.scala | 361 | // DummyString has: pointer(arr,8) + Int(hashCode,4) + Int(hash32,4) = 16 bytes of fields |
| LOW | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1661 | // java.lang.Exception: test |
| LOW | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1681 | val e2 = intercept[Exception] { |
| LOW | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1701 | assert(!st2.exists(_.getMethodName == "callDoTry")) |
| LOW | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1741 | |
| LOW | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1801 | // at scala.util.Try$.apply(Try.scala:217) |
| LOW | …/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 1841 | // |
| LOW | …la/org/apache/spark/scheduler/HealthTrackerSuite.scala | 361 | // This ensures that we don't trigger spurious excluding for long tasksets, when the taskset |
| LOW | …rg/apache/spark/scheduler/TaskSchedulerImplSuite.scala | 1161 | // We should be checking our node excludelist, but it should be within the bound we defined |
| LOW | …rg/apache/spark/scheduler/TaskSchedulerImplSuite.scala | 2541 | |
| LOW | …/test/scala/org/apache/spark/scheduler/PoolSuite.scala | 121 | scheduleTaskAndVerifyId(0, rootPool, 0) |
| LOW | …st/scala/org/apache/spark/executor/ExecutorSuite.scala | 101 | } |
| LOW | …sources/org/apache/spark/ui/static/graphlib-dot.min.js | 141 | // Label for the graph itself |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 201 | h=s?Math.atan2(k,bl)*rad2deg-120:NaN;return new Cubehelix(h<0?h+360:h,s,l,o.opacity)}function cubehelix(h,s,l,opacity){r |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 281 | // Limit forces for very close nodes; randomize direction if coincident. |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 301 | function formatTrim(s){out:for(var n=s.length,i=1,i0=-1,i1;i<n;++i){switch(s[i]){case".":i0=i1=i;break;case"0":if(i0===0 |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 321 | // Perform the initial formatting. |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 341 | (function(global,factory){typeof exports==="object"&&typeof module!=="undefined"?factory(exports,require("d3-array")):ty |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 401 | // along the clip edge. |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 461 | // Rejoin first and last segments if there were intersections and the first |
| LOW | …n/resources/org/apache/spark/ui/static/dagre-d3.min.js | 541 | throw new Error}function enclosesNot(a,b){var dr=a.r-b.r,dx=b.x-a.x,dy=b.y-a.y;return dr<0||dr*dr<dx*dx+dy*dy}function e |
| 2790 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | python/pyspark/core/rdd.py | 242 | return self._jrdd.toString() |
| HIGH | python/pyspark/mllib/tree.py | 90 | return self._java_model.toString() |
| HIGH | python/pyspark/mllib/tree.py | 150 | return self._java_model.toString() |
| HIGH | python/pyspark/mllib/stat/test.py | 64 | return self._java_model.toString() |
| HIGH | python/pyspark/tests/test_util.py | 73 | # This attempts java.lang.String(null) which throws an NPE. |
| HIGH | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 80 | decimal, date, timestamp, duration, time, null, and nested types. |
| HIGH | …park/tests/upstream/pyarrow/test_pyarrow_array_cast.py | 31 | - Success: [0, 1, null]@int16 - element values via scalar.as_py() and Arrow type after cast |
| HIGH | …park/tests/upstream/pyarrow/test_pyarrow_array_cast.py | 111 | as "[val1, val2, null]@arrow_type" using each scalar's as_py() value. |
| HIGH | …park/tests/upstream/pyarrow/test_pyarrow_array_cast.py | 126 | On success: "[val1, val2, null]@arrow_type" |
| HIGH | …park/tests/upstream/pyarrow/test_pyarrow_array_cast.py | 127 | e.g. "[0, 1, -1, 127, -128, null]@int16" |
| HIGH | python/pyspark/testing/utils.py | 371 | script = "$(test $(tput colors)) && $(test $(tput colors) -ge 8) && echo true || echo false" |
| HIGH | python/pyspark/ml/tests/test_wrapper.py | 54 | self.assertIn("LinearRegression_", model._java_obj.toString()) |
| HIGH | python/pyspark/ml/tests/test_wrapper.py | 55 | self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString()) |
| HIGH | python/pyspark/ml/tests/test_wrapper.py | 61 | model._java_obj.toString() |
| HIGH | python/pyspark/ml/tests/test_wrapper.py | 62 | self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString()) |
| HIGH | python/pyspark/ml/tests/test_wrapper.py | 74 | model._java_obj.toString() |
| HIGH | python/pyspark/ml/tests/test_wrapper.py | 76 | summary._java_obj.toString() |
| HIGH | python/pyspark/ml/tests/test_functions.py | 253 | self.assertTrue(df1.equals(df2)) |
| HIGH | python/pyspark/ml/tests/test_functions.py | 259 | self.assertFalse(df1.equals(df3)) |
| HIGH | python/pyspark/ml/tests/test_param.py | 262 | "inputCol: input column name. (undefined)", |
| HIGH | python/pyspark/errors/exceptions/captured.py | 240 | desc=e.toString(), |
| HIGH | python/pyspark/resource/requests.py | 321 | that the cluster manager doesn't support the result is undefined, it may error or may just |
| HIGH | python/pyspark/pandas/series.py | 6759 | if get_option("compute.eager_check") and not self.index.equals(other.index): |
| HIGH | python/pyspark/pandas/utils.py | 996 | return left._jc.equals(right._jc) |
| HIGH | python/pyspark/pandas/frame.py | 1714 | # | 2|[{0, null}, {1, n...| |
| HIGH | python/pyspark/pandas/indexing.py | 556 | cast(ClassicColumn, col)._jc.toString() for col in data_spark_columns |
| HIGH | python/pyspark/pandas/groupby.py | 1305 | Flag to ignore NA(nan/null) values during truth testing. |
| HIGH | python/pyspark/pandas/base.py | 1464 | # If even one StructField is null, that row should be dropped. |
| HIGH | python/pyspark/pandas/tests/computation/test_combine.py | 682 | # Only update where new value > 150 (and old is null) |
| HIGH | …hon/pyspark/pandas/tests/diff_frames_ops/test_error.py | 198 | psidx1.equals(psidx2) |
| HIGH | python/pyspark/pandas/indexes/base.py | 387 | and self.equals(other) |
| HIGH | python/pyspark/pandas/indexes/base.py | 411 | >>> idx.equals(idx) |
| HIGH | python/pyspark/pandas/indexes/base.py | 414 | ... idx.equals(ps.Index(['a', 'b', 'c'])) |
| HIGH | python/pyspark/pandas/indexes/base.py | 417 | ... idx.equals(ps.Index(['b', 'b', 'a'])) |
| HIGH | python/pyspark/pandas/indexes/base.py | 419 | >>> idx.equals(midx) |
| HIGH | python/pyspark/pandas/indexes/base.py | 424 | >>> midx.equals(midx) |
| HIGH | python/pyspark/pandas/indexes/base.py | 427 | ... midx.equals(ps.MultiIndex.from_tuples([('a', 'x'), ('b', 'y'), ('c', 'z')])) |
| HIGH | python/pyspark/pandas/indexes/base.py | 430 | ... midx.equals(ps.MultiIndex.from_tuples([('c', 'z'), ('b', 'y'), ('a', 'x')])) |
| HIGH | python/pyspark/pandas/indexes/base.py | 432 | >>> midx.equals(idx) |
| HIGH | python/pyspark/sql/conversion.py | 178 | if batch.schema.equals(arrow_schema, check_metadata=False): |
| HIGH | python/pyspark/sql/types.py | 1845 | return stringConcat.toString() |
| HIGH | python/pyspark/sql/types.py | 262 | null, UDTs, arrays, structs, and maps.""" |
| HIGH | python/pyspark/sql/context.py | 808 | '{"field1" : null, "field2": "row3", "field3":{"field4":33, "field5": []}}', |
| HIGH | python/pyspark/sql/group.py | 76 | jvm_string = self._jgd.toString() |
| HIGH | python/pyspark/sql/tvf.py | 427 | Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced. |
| HIGH | python/pyspark/sql/tvf.py | 570 | null, and any other variant values. |
| HIGH | python/pyspark/sql/tvf.py | 635 | SQL NULL, variant null, and any other variant values, then NULL is produced. |
| HIGH | python/pyspark/sql/classic/column.py | 661 | return "Column<'%s'>" % self._jc.toString() |
| HIGH | python/pyspark/sql/tests/test_session.py | 79 | self.assertTrue(jsession.equals(spark._jvm.SparkSession.getDefaultSession().get())) |
| HIGH | python/pyspark/sql/tests/test_udtf.py | 413 | df = self.spark.sql("SELECT * FROM testUDTF(null)") |
| HIGH | python/pyspark/sql/tests/test_collection.py | 416 | pdf.equals( |
| HIGH | python/pyspark/sql/tests/test_tvf.py | 59 | "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) " |
| HIGH | python/pyspark/sql/tests/test_tvf.py | 121 | "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) " |
| HIGH | python/pyspark/sql/tests/test_tvf.py | 173 | inline(array(named_struct('a', 1, 'b', 2), null, named_struct('a', 3, 'b', 4))) |
| HIGH | python/pyspark/sql/tests/test_tvf.py | 226 | inline_outer(array(named_struct('a', 1, 'b', 2), null, named_struct('a', 3, 'b', 4))) |
| HIGH | python/pyspark/sql/tests/test_tvf.py | 277 | ('5', '{"f1": null, "f5": ""}'), |
| HIGH | python/pyspark/sql/tests/test_tvf.py | 355 | "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) " |
| HIGH | python/pyspark/sql/tests/test_tvf.py | 415 | "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) " |
| HIGH | python/pyspark/sql/tests/test_tvf.py | 454 | "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) " |
| HIGH | python/pyspark/sql/tests/test_datasources.py | 350 | ["""{"a":null, "b":1, "c":3.0}"""], |
| 64 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | python/run-tests.py | 45 | |
| LOW | python/packaging/connect/pyspark_connect/__init__.py | 22 | |
| LOW | python/pyspark/worker.py | 51 | |
| LOW | python/pyspark/util.py | 61 | |
| LOW | python/pyspark/util.py | 63 | |
| LOW | python/pyspark/util.py | 64 | |
| LOW | python/pyspark/util.py | 66 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 67 | |
| LOW | python/pyspark/util.py | 92 | |
| LOW | python/pyspark/util.py | 92 | |
| LOW | python/pyspark/util.py | 92 | |
| LOW | python/pyspark/util.py | 92 | |
| LOW | python/pyspark/util.py | 92 | |
| LOW | python/pyspark/util.py | 99 | |
| LOW | python/pyspark/util.py | 923 | |
| LOW | python/pyspark/util.py | 943 | |
| LOW | python/pyspark/conf.py | 27 | |
| LOW | python/pyspark/shell.py | 33 | |
| LOW | python/pyspark/__init__.py | 68 | |
| LOW | python/pyspark/__init__.py | 69 | |
| LOW | python/pyspark/__init__.py | 69 | |
| LOW | python/pyspark/__init__.py | 70 | |
| LOW | python/pyspark/__init__.py | 71 | |
| LOW | python/pyspark/__init__.py | 71 | |
| LOW | python/pyspark/__init__.py | 72 | |
| LOW | python/pyspark/__init__.py | 72 | |
| LOW | python/pyspark/__init__.py | 73 | |
| LOW | python/pyspark/__init__.py | 73 | |
| LOW | python/pyspark/__init__.py | 73 | |
| LOW | python/pyspark/__init__.py | 74 | |
| LOW | python/pyspark/__init__.py | 74 | |
| LOW | python/pyspark/__init__.py | 75 | |
| LOW | python/pyspark/__init__.py | 76 | |
| LOW | python/pyspark/__init__.py | 131 | |
| LOW | python/pyspark/__init__.py | 56 | |
| LOW | python/pyspark/__init__.py | 56 | |
| LOW | python/pyspark/__init__.py | 57 | |
| LOW | python/pyspark/__init__.py | 58 | |
| 543 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | bin/docker-image-tool.sh | 80 | # Create a smaller build context for docker in dev builds to make the build faster. Docker |
| MEDIUM | python/run-tests.py | 268 | # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is |
| MEDIUM | python/run-tests.py | 544 | # Create the target directory before starting tasks to avoid races. |
| MEDIUM | python/pyspark/java_gateway.py | 77 | # Create a temporary directory where the gateway server should write the connection |
| MEDIUM | python/pyspark/statcounter.py | 18 | # This file is ported from spark/util/StatCounter.scala |
| MEDIUM | python/pyspark/daemon.py | 114 | # Create a new process group to corral our children |
| MEDIUM | python/pyspark/daemon.py | 120 | # Create a listening socket on the loopback interface |
| MEDIUM | python/pyspark/core/rdd.py | 245 | # This method is called when attempting to pickle an RDD, which is always an error: |
| MEDIUM | python/pyspark/core/rdd.py | 2827 | ... # Create the conf for writing |
| MEDIUM | python/pyspark/core/rdd.py | 2839 | ... # Create the conf for reading |
| MEDIUM | python/pyspark/core/rdd.py | 2986 | ... # Create the conf for writing |
| MEDIUM | python/pyspark/core/rdd.py | 2998 | ... # Create the conf for reading |
| MEDIUM | python/pyspark/core/context.py | 299 | # Create the Java SparkContext through Py4J |
| MEDIUM | python/pyspark/core/context.py | 304 | # Create a single Accumulator in Java that we'll send all our updates through; |
| MEDIUM | python/pyspark/core/context.py | 382 | # Create a temporary directory inside spark.local.dir: |
| MEDIUM | python/pyspark/core/context.py | 492 | # This method is called when attempting to pickle SparkContext, which is always an error: |
| MEDIUM | python/pyspark/core/context.py | 1493 | ... # Create the conf for writing |
| MEDIUM | python/pyspark/core/context.py | 1505 | ... # Create the conf for reading |
| MEDIUM | python/pyspark/core/context.py | 1690 | ... # Create the conf for writing |
| MEDIUM | python/pyspark/core/context.py | 1702 | ... # Create the conf for reading |
| MEDIUM | python/pyspark/mllib/tests/test_linalg.py | 534 | # Create a CSC matrix with non-sorted indices |
| MEDIUM | python/pyspark/mllib/tests/test_streaming_algorithms.py | 101 | # Create a toy dataset by setting a tiny offset for each point. |
| MEDIUM | python/pyspark/mllib/tests/test_streaming_algorithms.py | 396 | # Create a model with initial Weights equal to coefs |
| MEDIUM | python/pyspark/tests/test_rdd.py | 706 | # Create a DataFrame with many columns, call a Python function on each row, and take only |
| MEDIUM | python/pyspark/pipelines/init_cli.py | 52 | # Create the storage directory |
| MEDIUM | python/pyspark/pipelines/init_cli.py | 65 | # Create the transformations directory |
| MEDIUM | python/pyspark/pipelines/init_cli.py | 69 | # Create the Python example file |
| MEDIUM | python/pyspark/pipelines/init_cli.py | 74 | # Create the SQL example file |
| MEDIUM | python/pyspark/pipelines/tests/test_cli.py | 376 | # Create a minimal pipeline spec |
| MEDIUM | python/pyspark/pipelines/tests/test_cli.py | 400 | # Create a minimal pipeline spec |
| MEDIUM | python/pyspark/pipelines/tests/test_cli.py | 425 | # Create a minimal pipeline spec |
| MEDIUM | python/pyspark/ml/pipeline.py | 188 | # Create a new instance of this stage. |
| MEDIUM | python/pyspark/ml/pipeline.py | 346 | # Create a new instance of this stage. |
| MEDIUM | python/pyspark/ml/tuning.py | 981 | # Create a new instance of this stage. |
| MEDIUM | python/pyspark/ml/tuning.py | 1559 | # Create a new instance of this stage. |
| MEDIUM | python/pyspark/ml/tuning.py | 1684 | # Create a new instance of this stage. |
| MEDIUM | python/pyspark/ml/tests/test_feature.py | 362 | # Create a DataFrame |
| MEDIUM | python/pyspark/pandas/tests/io/test_io.py | 34 | # This file contains test cases for 'Serialization / IO / Conversion' |
| MEDIUM | python/pyspark/pandas/tests/frame/test_time_series.py | 26 | # This file contains test cases for 'Time series-related' |
| MEDIUM | python/pyspark/pandas/tests/frame/test_spark.py | 34 | # This file contains test cases for 'Spark-related' |
| MEDIUM | python/pyspark/pandas/tests/frame/test_attrs.py | 26 | # This file contains test cases for 'Attributes and underlying data' |
| MEDIUM | python/pyspark/pandas/tests/frame/test_constructor.py | 34 | # This file contains test cases for 'Constructor' |
| MEDIUM | python/pyspark/pandas/tests/frame/test_conversion.py | 25 | # This file contains test cases for 'Conversion' |
| MEDIUM | python/pyspark/pandas/tests/frame/test_reindexing.py | 31 | # This file contains test cases for 'Reindexing / Selection / Label manipulation' |
| MEDIUM | python/pyspark/pandas/tests/frame/test_reshaping.py | 27 | # This file contains test cases for 'Reshaping, sorting, transposing' |
| MEDIUM | python/pyspark/pandas/tests/computation/test_combine.py | 25 | # This file contains test cases for 'Combining / joining / merging' |
| MEDIUM | …on/pyspark/pandas/tests/computation/test_apply_func.py | 29 | # This file contains test cases for 'Function application, GroupBy & Window' |
| MEDIUM | …/pyspark/pandas/tests/computation/test_missing_data.py | 27 | # This file contains test cases for 'Missing data handling' |
| MEDIUM | …on/pyspark/pandas/tests/computation/test_binary_ops.py | 26 | # This file contains test cases for 'Binary operator functions' |
| MEDIUM | python/pyspark/pandas/tests/computation/test_compute.py | 26 | # This file contains test cases for 'Computations / Descriptive Stats' |
| MEDIUM | …thon/pyspark/pandas/tests/indexes/test_indexing_adv.py | 56 | # Create the equivalent of pdf.loc[3] as a Koalas Series |
| MEDIUM | …thon/pyspark/pandas/tests/indexes/test_indexing_adv.py | 142 | # Create the equivalent of pdf.loc[3] as a Koalas Series |
| MEDIUM | python/pyspark/pandas/tests/indexes/test_indexing.py | 26 | # This file contains test cases for 'Indexing, Iteration' |
| MEDIUM | python/pyspark/pandas/indexes/base.py | 263 | # This method is used via `DataFrame.info` API internally. |
| MEDIUM | python/pyspark/sql/dataframe.py | 563 | ... # Create a table with Rate source. |
| MEDIUM | python/pyspark/sql/dataframe.py | 6788 | >>> # Create a simple UDTF that processes table data |
| MEDIUM | python/pyspark/sql/dataframe.py | 6794 | >>> # Create a DataFrame |
| MEDIUM | python/pyspark/sql/session.py | 624 | # Create a new SparkSession in the JVM |
| MEDIUM | python/pyspark/sql/session.py | 1647 | # Create a DataFrame from pandas DataFrame. |
| MEDIUM | python/pyspark/sql/session.py | 1652 | # Create a DataFrame from PyArrow Table. |
| 83 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | python/pyspark/cloudpickle/cloudpickle.py | 662 | # ------------------------------------------------- |
| MEDIUM | python/pyspark/cloudpickle/cloudpickle.py | 698 | # ------------------------------------ |
| MEDIUM | python/pyspark/cloudpickle/cloudpickle.py | 704 | # ----------------------------------- |
| MEDIUM | python/pyspark/cloudpickle/cloudpickle.py | 816 | # ------------------------------- |
| MEDIUM | python/pyspark/cloudpickle/cloudpickle.py | 1125 | # ------------------------------------ |
| MEDIUM | python/pyspark/cloudpickle/cloudpickle.py | 1207 | # --------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 92 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 94 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 96 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 287 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 289 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 291 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 356 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 358 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 397 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 399 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 557 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 559 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 50 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 52 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 67 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 69 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 208 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 210 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 238 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 240 | # ------------------------------------------------------------------------- |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 435 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 437 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 525 | # ========================================================================= |
| MEDIUM | …/upstream/pyarrow/test_pyarrow_array_type_inference.py | 527 | # ========================================================================= |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 189 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 191 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 196 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 198 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 206 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 208 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 216 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 218 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 228 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 230 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 240 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 242 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 258 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 260 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 268 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 270 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 286 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 288 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 292 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 294 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 150 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 152 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 176 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 178 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 367 | # ===================================================================== |
| MEDIUM | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 369 | # ===================================================================== |
| MEDIUM | …k/tests/upstream/pyarrow/test_pyarrow_type_coercion.py | 56 | # ========================================================================= |
| MEDIUM | …k/tests/upstream/pyarrow/test_pyarrow_type_coercion.py | 58 | # ========================================================================= |
| MEDIUM | …k/tests/upstream/pyarrow/test_pyarrow_type_coercion.py | 85 | # ========================================================================= |
| MEDIUM | …k/tests/upstream/pyarrow/test_pyarrow_type_coercion.py | 87 | # ========================================================================= |
| 43 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | python/run-tests.py | 236 | |
| LOW | python/run-tests.py | 474 | |
| LOW | python/pyspark/worker.py | 853 | |
| LOW | python/pyspark/worker.py | 885 | |
| LOW | python/pyspark/worker.py | 929 | |
| LOW | python/pyspark/worker.py | 1038 | |
| LOW | python/pyspark/worker.py | 2184 | |
| LOW | python/pyspark/worker.py | 1565 | |
| LOW | python/pyspark/worker.py | 1141 | |
| LOW | python/pyspark/worker.py | 1303 | |
| LOW | python/pyspark/worker.py | 1579 | |
| LOW | python/pyspark/worker.py | 1659 | |
| LOW | python/pyspark/worker.py | 1677 | |
| LOW | python/pyspark/worker.py | 2607 | |
| LOW | python/pyspark/worker.py | 1726 | |
| LOW | python/pyspark/worker.py | 1799 | |
| LOW | python/pyspark/worker.py | 1587 | |
| LOW | python/pyspark/worker.py | 1855 | |
| LOW | python/pyspark/worker.py | 1605 | |
| LOW | python/pyspark/worker.py | 1615 | |
| LOW | python/pyspark/worker.py | 1637 | |
| LOW | python/pyspark/worker_message.py | 135 | |
| LOW | python/pyspark/util.py | 572 | |
| LOW | python/pyspark/conf.py | 180 | |
| LOW | python/pyspark/shuffle.py | 62 | |
| LOW | python/pyspark/shuffle.py | 779 | |
| LOW | python/pyspark/statcounter.py | 60 | |
| LOW | python/pyspark/install.py | 120 | |
| LOW | python/pyspark/accumulators.py | 263 | |
| LOW | python/pyspark/accumulators.py | 268 | |
| LOW | python/pyspark/profiler.py | 189 | |
| LOW | python/pyspark/daemon.py | 46 | |
| LOW | python/pyspark/daemon.py | 113 | |
| LOW | python/pyspark/core/rdd.py | 2210 | |
| LOW | python/pyspark/core/rdd.py | 3672 | |
| LOW | python/pyspark/core/rdd.py | 3724 | |
| LOW | python/pyspark/core/context.py | 226 | |
| LOW | python/pyspark/core/context.py | 1817 | |
| LOW | python/pyspark/logger/worker_io.py | 214 | |
| LOW | python/pyspark/cloudpickle/cloudpickle.py | 313 | |
| LOW | python/pyspark/cloudpickle/cloudpickle.py | 338 | |
| LOW | python/pyspark/cloudpickle/cloudpickle.py | 1069 | |
| LOW | python/pyspark/cloudpickle/cloudpickle.py | 1441 | |
| LOW | python/pyspark/mllib/classification.py | 236 | |
| LOW | python/pyspark/mllib/common.py | 75 | |
| LOW | python/pyspark/mllib/common.py | 96 | |
| LOW | python/pyspark/mllib/common.py | 160 | |
| LOW | python/pyspark/mllib/linalg/__init__.py | 96 | |
| LOW | python/pyspark/mllib/linalg/__init__.py | 114 | |
| LOW | python/pyspark/mllib/linalg/__init__.py | 415 | |
| LOW | python/pyspark/mllib/linalg/__init__.py | 824 | |
| LOW | python/pyspark/tests/test_serializers.py | 213 | |
| LOW | python/pyspark/tests/test_worker.py | 39 | |
| LOW | python/pyspark/tests/test_shuffle.py | 66 | |
| LOW | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 84 | |
| LOW | …park/tests/upstream/pyarrow/test_pyarrow_array_cast.py | 137 | |
| LOW | python/pyspark/pipelines/cli.py | 121 | |
| LOW | python/pyspark/pipelines/cli.py | 221 | |
| LOW | …yspark/pipelines/tests/test_block_session_mutations.py | 38 | |
| LOW | python/pyspark/testing/sqlutils.py | 289 | |
| 315 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | python/conf_vscode/sitecustomize.py | 38 | except Exception: |
| MEDIUM | python/pyspark/worker.py | 2123 | def evaluate(*a) -> tuple: |
| LOW | python/pyspark/worker.py | 1541 | except Exception as e: |
| LOW | python/pyspark/worker.py | 1720 | except Exception as e: |
| LOW | python/pyspark/worker.py | 1849 | except Exception as e: |
| LOW | python/pyspark/worker.py | 1909 | except Exception as e: |
| LOW | python/pyspark/worker.py | 2004 | except Exception as e: |
| LOW | python/pyspark/worker.py | 2128 | except Exception as e: |
| LOW | python/pyspark/worker.py | 3660 | except Exception: |
| LOW | python/pyspark/threaddump.py | 45 | except Exception as e: |
| MEDIUM | python/pyspark/threaddump.py | 46 | print(f"Error getting children of process {args.pid}: {e}") |
| LOW | python/pyspark/threaddump.py | 54 | except Exception: |
| MEDIUM | python/pyspark/threaddump.py | 28 | def main() -> int: |
| LOW | python/pyspark/util.py | 782 | except Exception: |
| LOW | python/pyspark/serializers.py | 446 | except Exception as e: |
| LOW | python/pyspark/serializers.py | 495 | except Exception: |
| LOW | python/pyspark/shell.py | 73 | except Exception: |
| LOW | python/pyspark/shell.py | 90 | except Exception: |
| LOW | python/pyspark/memory_profiler_ext.py | 32 | except Exception: |
| LOW | python/pyspark/memory_profiler_ext.py | 69 | except Exception: |
| LOW | python/pyspark/install.py | 169 | except Exception: |
| LOW | python/pyspark/install.py | 189 | except Exception: |
| LOW | python/pyspark/install.py | 221 | except Exception as e: |
| LOW | python/pyspark/instrumentation_utils.py | 49 | except Exception as ex: |
| LOW | python/pyspark/instrumentation_utils.py | 73 | except Exception as ex: |
| LOW | python/pyspark/daemon.py | 96 | except Exception: |
| LOW | python/pyspark/daemon.py | 270 | except Exception: |
| LOW | python/pyspark/core/context.py | 375 | except Exception: |
| LOW | python/pyspark/core/broadcast.py | 181 | except Exception as e: |
| LOW | python/pyspark/logger/worker_io.py | 252 | except Exception: |
| LOW | python/pyspark/cloudpickle/cloudpickle.py | 232 | except Exception: |
| LOW | python/pyspark/tests/test_rdd.py | 354 | except Exception: |
| LOW | python/pyspark/tests/test_rdd.py | 889 | except Exception: |
| LOW | python/pyspark/tests/test_taskcontext.py | 206 | except Exception: |
| LOW | python/pyspark/tests/test_taskcontext.py | 277 | except Exception: |
| MEDIUM | python/pyspark/tests/test_taskcontext.py | 203 | def f(iterator): |
| LOW | python/pyspark/tests/test_util.py | 180 | except Exception as e: |
| LOW | python/pyspark/tests/test_pin_thread.py | 68 | except Exception as e: |
| LOW | python/pyspark/tests/test_pin_thread.py | 123 | except Exception: |
| LOW | python/pyspark/tests/test_worker.py | 56 | except Exception: |
| LOW | python/pyspark/tests/test_worker.py | 156 | except Exception: |
| MEDIUM | python/pyspark/tests/test_worker.py | 53 | def run(): |
| MEDIUM | python/pyspark/tests/test_worker.py | 153 | def count(): |
| LOW | python/pyspark/tests/test_context.py | 237 | except Exception: |
| LOW | python/pyspark/tests/test_install_spark.py | 50 | except Exception: |
| LOW | …stream/pyarrow/test_pyarrow_arrow_to_pandas_default.py | 399 | except Exception as e: |
| LOW | …park/tests/upstream/pyarrow/test_pyarrow_array_cast.py | 134 | except Exception as e: |
| LOW | python/pyspark/testing/sqlutils.py | 109 | except Exception: |
| LOW | python/pyspark/testing/sqlutils.py | 167 | except Exception as e: |
| LOW | python/pyspark/testing/goldenutils.py | 189 | except Exception as e: |
| LOW | python/pyspark/testing/utils.py | 128 | except Exception as e: |
| LOW | python/pyspark/testing/utils.py | 140 | except Exception as e: |
| LOW | python/pyspark/testing/utils.py | 373 | except Exception: |
| MEDIUM | python/pyspark/testing/utils.py | 368 | def _terminal_color_support(): |
| LOW | python/pyspark/ml/functions.py | 848 | except Exception as e: |
| LOW | python/pyspark/ml/wrapper.py | 66 | except Exception: |
| MEDIUM | python/pyspark/ml/wrapper.py | 58 | def __del__(self) -> None: |
| LOW | python/pyspark/ml/util.py | 360 | except Exception: |
| LOW | python/pyspark/ml/util.py | 372 | except Exception: |
| LOW | python/pyspark/ml/torch/distributor.py | 57 | except Exception: |
| 137 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| MEDIUM | …/org/apache/spark/launcher/AbstractCommandBuilder.java | 244 | // Place slf4j-api-* jar first to be robust |
| MEDIUM | …c/test/scala/org/apache/spark/ui/UISeleniumSuite.scala | 414 | // Essentially, we want to check that none of the stage rows show |
| MEDIUM | …c/test/scala/org/apache/spark/ui/UISeleniumSuite.scala | 470 | // Essentially, we want to check that none of the stage rows show |
| MEDIUM | …ala/org/apache/spark/scheduler/DAGSchedulerSuite.scala | 2376 | // For a robust test assertion, limit number of job tasks to 1; that is, |
| MEDIUM | …apache/spark/scheduler/SchedulerIntegrationSuite.scala | 431 | // it really can only be "best-effort" in any case, and the scheduler should be robust to that. |
| MEDIUM | …rg/apache/spark/scheduler/TaskSchedulerImplSuite.scala | 492 | // Even though we launched a local task above, we still utilize non-local exec2. |
| MEDIUM | …he/spark/scheduler/HealthTrackerIntegrationSuite.scala | 80 | // robust to one bad node. |
| MEDIUM | …main/scala/org/apache/spark/storage/BlockManager.scala | 1224 | // BlockTransferService, which will leverage it to spill the block; if not, then passed-in |
| MEDIUM | python/packaging/classic/setup.py | 164 | # TODO(SPARK-32837) leverage pip's custom options |
| LOW | …spark/messages/socket/spark_socket_message_receiver.py | 49 | # For socket communication, we just pass along the underlying socket |
| LOW | python/pyspark/tests/test_install_spark.py | 57 | # we just use a hard-coded version. |
| LOW | python/pyspark/ml/tests/test_functions.py | 208 | # just return the batch size as the "prediction" |
| MEDIUM | python/pyspark/errors/utils.py | 378 | # Excluding Python magic methods that do not utilize JVM functions. |
| LOW | python/pyspark/pandas/resample.py | 359 | # here just use Pandas' resample on a 1-length series to get it. |
| LOW | python/pyspark/pandas/generic.py | 3101 | # If Series has only a single value, just return it as a scalar. |
| MEDIUM | python/pyspark/pandas/series.py | 6093 | # If `where` has duplicate items, leverage the pandas directly |
| LOW | python/pyspark/pandas/utils.py | 794 | # '+' is meaningless for writing methods, but pandas just pass it as 'w'. |
| LOW | python/pyspark/pandas/utils.py | 798 | # '+' is meaningless for writing methods, but pandas just pass it as 'a'. |
| LOW | python/pyspark/pandas/frame.py | 10126 | # In this case, we can simply use `summary` to calculate the stats. |
| MEDIUM | python/pyspark/pandas/tests/groupby/test_stat.py | 30 | # TODO: All statistical functions should leverage this utility |
| MEDIUM | python/pyspark/sql/session.py | 601 | # used in conjunction with Spark Connect mode. |
| MEDIUM | python/pyspark/sql/tests/test_functions.py | 3042 | """Test tuple_sketch_agg + operations + estimate comprehensive test - double""" |
| MEDIUM | python/pyspark/sql/tests/test_functions.py | 3097 | """Test tuple_sketch_agg + operations + estimate comprehensive test - integer""" |
| MEDIUM | …ing/test_pandas_transform_with_state_state_variable.py | 354 | # TODO SPARK-50908 holistic fix for TTL suite |
| MEDIUM | python/pyspark/sql/connect/client/core.py | 389 | # Rewrite the URL to use http as the scheme so that we can leverage |
| LOW | R/pkg/R/sparkR.R | 664 | #' To remove/unset property simply set `value` to NULL e.g. setLocalProperty("key", NULL) |
| MEDIUM | R/pkg/R/column.R | 296 | #' Can be used in conjunction with \code{when} to specify a default value for expressions. |
| MEDIUM | …apache/spark/streaming/ReceivedBlockTrackerSuite.scala | 320 | // deletion more robust rather than a parallelized operation where we fire and forget |
| MEDIUM | …cala/org/apache/spark/streaming/ui/StreamingPage.scala | 163 | // We leverage timeFormat as the value would be same as timeFormat. This means it is |
| MEDIUM | …rg/apache/spark/network/crypto/CtrTransportCipher.java | 229 | // to utilize two helper ByteArrayWritableChannel for streaming. One is used to receive raw data |
| MEDIUM | …network/shuffle/streaming/StreamingShuffleMessage.java | 68 | // Essentially, other message types from reader to writer won't have a valid sequence number. |
| MEDIUM | …scala/org/apache/spark/examples/mllib/LDAExample.scala | 139 | // add (1.0 / actualCorpusSize) to MiniBatchFraction be more robust on tiny datasets. |
| MEDIUM | …a/org/apache/spark/sql/StatisticsCollectionSuite.scala | 934 | // We can't leverage LogicalRDD.fromDataset here, since it triggers physical planning and |
| MEDIUM | …c/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 1637 | // We can't leverage LogicalRDD.fromDataset here, since it triggers physical planning and |
| MEDIUM | …apache/spark/sql/streaming/FileStreamSourceSuite.scala | 2342 | // file stream source will not leverage unread files - next batch will also trigger |
| MEDIUM | …org/apache/spark/sql/execution/UnionCodegenSuite.scala | 533 | // Explicit cap so the assertion is robust to future default changes. |
| MEDIUM | …ion/datasources/v2/state/StateDataSourceTestBase.scala | 103 | // check with more data - leverage full partitions |
| MEDIUM | …park/sql/catalyst/analysis/ResolveSessionCatalog.scala | 227 | // resolution was skipped) so the rewrite stays robust across analyzer ordering changes. |
| MEDIUM | …in/scala/org/apache/spark/sql/jdbc/OracleDialect.scala | 144 | // Not sure if there is a more robust way to identify the field as a float (or other |
| MEDIUM | …icpruning/RowLevelOperationRuntimeGroupFiltering.scala | 78 | // in order to leverage a regular batch scan in the group filter query |
| MEDIUM | …on/python/streaming/ApplyInPandasWithStateWriter.scala | 107 | // from the entire data part of Arrow RecordBatch. We leverage the state metadata to also |
| MEDIUM | …ors/stateful/join/StreamingSymmetricHashJoinExec.scala | 1098 | // to let users leverage both sides of event time column for output of join, so the watermark |
| MEDIUM | …/execution/streaming/runtime/FileStreamSourceLog.scala | 130 | // be started. We leverage the fact to skip calculation if possible. |
| MEDIUM | …sql/execution/streaming/runtime/ProgressReporter.scala | 572 | // by itself, so leverage it. |
| MEDIUM | …ark/sql/catalyst/expressions/CodeGenerationSuite.scala | 603 | | // to make the test more robust, in case the compiler can eliminate the else branch. |
| MEDIUM | …e/spark/sql/catalyst/analysis/RelationResolution.scala | 397 | // To utilize this code path to execute V1 commands, e.g. INSERT, |
| MEDIUM | …ql/catalyst/expressions/SubExprEvaluationRuntime.scala | 100 | // We leverage `IdentityHashMap` so we compare expression keys by reference here. |
| MEDIUM | …k/sql/catalyst/expressions/codegen/CodeFormatter.scala | 119 | // examines the number of parenthesis and braces in that line. This isn't the most robust |
| MEDIUM | …/spark/sql/hive/execution/HiveCompatibilitySuite.scala | 287 | // The isolated classloader seemed to make some of our test reset mechanisms less robust. |
| MEDIUM | …n/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 931 | // TODO: hard-coding a list here is not very robust. A better idea is to have some kind of query |
| MEDIUM | …/main/java/org/apache/spark/sql/streaming/Trigger.java | 98 | * @deprecated This is deprecated as of Spark 3.4.0. Use {@link #AvailableNow()} to leverage |
| MEDIUM | …e/spark/sql/hive/thriftserver/SharedThriftServer.scala | 134 | // It's much more robust than set a random port generated by ourselves ahead |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …/util/collection/unsafe/sort/UnsafeExternalSorter.java | 474 | // Step 1: |
| LOW | …/util/collection/unsafe/sort/UnsafeExternalSorter.java | 477 | // Step 2: |
| LOW | …/util/collection/unsafe/sort/UnsafeExternalSorter.java | 480 | // Step 3: |
| LOW | …/scala/org/apache/spark/storage/BlockInfoManager.scala | 433 | // reader counts. We need to check if the readLocksByTask per tasks are present, if they |
| LOW | python/pyspark/sql/conversion.py | 183 | # Step 1: pick source columns from batch to align with target schema |
| LOW | python/pyspark/sql/conversion.py | 212 | # Step 2: check types / cast, collect all mismatches |
| LOW | …/streaming/test_streaming_offline_state_repartition.py | 109 | # Step 1: Write initial data and run streaming query |
| LOW | …/streaming/test_streaming_offline_state_repartition.py | 116 | # Step 2: Repartition to more partitions |
| LOW | …/streaming/test_streaming_offline_state_repartition.py | 121 | # Step 3: Add more data and restart query |
| LOW | …/streaming/test_streaming_offline_state_repartition.py | 129 | # Step 4: Repartition to fewer partitions |
| LOW | …/streaming/test_streaming_offline_state_repartition.py | 134 | # Step 5: Add more data and restart query |
| LOW | …rk/sql/streaming/transform_with_state_driver_worker.py | 72 | # and the following code block should be only run once for each query run |
| LOW | R/pkg/inst/worker/daemon.R | 98 | # Forking succeeded and we need to check if they finished their jobs every second. |
| LOW | R/pkg/inst/worker/worker.R | 247 | # Step 1: hash the data to an environment |
| LOW | R/pkg/inst/worker/worker.R | 264 | # Step 2: write out all of the environment as key-value pairs. |
| LOW | …ming/FlatMapGroupsWithStateWithInitialStateSuite.scala | 57 | // We need to check if not explicitly calling update will still save the init state or not |
| LOW | …ming/FlatMapGroupsWithStateWithInitialStateSuite.scala | 124 | // We need to check if not explicitly calling update will still save the state or not |
| LOW | …on/datasources/v2/state/StateDataSourceReadSuite.scala | 1718 | // Step 1: Run the stateful query to create the full checkpoint structure |
| LOW | …on/datasources/v2/state/StateDataSourceReadSuite.scala | 1721 | // Step 2: Delete the state directory |
| LOW | …on/datasources/v2/state/StateDataSourceReadSuite.scala | 1727 | // Step 3: Attempt to read state - expected to fail since state is deleted |
| LOW | …on/datasources/v2/state/StateDataSourceReadSuite.scala | 1733 | // Step 4: Verify the state directory was NOT recreated by the reader |
| LOW | …execution/streaming/state/RocksDBStateStoreSuite.scala | 1866 | // Step 1: Write data with correct schema and commit |
| LOW | …execution/streaming/state/RocksDBStateStoreSuite.scala | 1877 | // Step 2: Reopen with a wrong valueSchema (StringType instead of IntegerType) |
| LOW | …execution/streaming/state/RocksDBStateStoreSuite.scala | 1906 | // Step 1: Write data with correct schema and commit |
| LOW | …execution/streaming/state/RocksDBStateStoreSuite.scala | 1918 | // Step 2: Reopen with a wrong valueSchema (StringType instead of IntegerType) |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 228 | // Step 1: Create state by running a streaming aggregation |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 270 | // Step 1: Create state by running a composite key streaming aggregation |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 304 | // Step 1: Create state by running stream-stream join |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 316 | // Step 2: Test all 4 state stores created by stream-stream join |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 343 | // Step 1: Create state by running flatMapGroupsWithState |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 813 | // Step 1: Create state by running dropDuplicatesWithinWatermark |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 838 | // Step 1: Create state by running dropDuplicates with column |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 863 | // Step 1: Create state by running session window aggregation |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 892 | // Step 1: Create state by running a streaming aggregation |
| LOW | …state/StatePartitionAllColumnFamiliesWriterSuite.scala | 965 | // Step 1: Create state by running a streaming aggregation |
| LOW | …ng/state/OfflineStateRepartitionIntegrationSuite.scala | 128 | // Step 1: Run initial query to create state |
| LOW | …ng/state/OfflineStateRepartitionIntegrationSuite.scala | 131 | // Step 2: Read state data before repartition |
| LOW | …ng/state/OfflineStateRepartitionIntegrationSuite.scala | 150 | // Step 3: Run repartition |
| LOW | …ng/state/OfflineStateRepartitionIntegrationSuite.scala | 157 | // Step 4: Verify offset and commit logs |
| LOW | …ng/state/OfflineStateRepartitionIntegrationSuite.scala | 162 | // Step 5: Validate state for each store and column family after repartition |
| LOW | …ng/state/OfflineStateRepartitionIntegrationSuite.scala | 190 | // Step 6: Resume query with new input and verify |
| LOW | …g/apache/spark/sql/classic/StreamingQueryManager.scala | 310 | // The following code block checks if a stream with the same name or id is running. Then it |
| LOW | …la/org/apache/spark/sql/execution/SparkSqlParser.scala | 119 | // Step 1: Apply variable substitution to expand any variable references. |
| LOW | …la/org/apache/spark/sql/execution/SparkSqlParser.scala | 122 | // Step 2: Apply parameter substitution if a parameter context is provided. |
| LOW | …la/org/apache/spark/sql/execution/SparkSqlParser.scala | 147 | // Step 3: Set up the origin with SQL text and position mapper to enable |
| LOW | …ql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala | 133 | // Also, we need to check if join is done on 2 tables from 2 different databases within same |
| LOW | …xecution/datasources/parquet/ParquetRowConverter.scala | 831 | // in case of schema evolution), we need to check if the repeated type matches one of the |
| LOW | …/execution/aggregate/TungstenAggregationIterator.scala | 268 | // Step 5: Get the sorted iterator from the externalSorter. |
| LOW | …/execution/aggregate/TungstenAggregationIterator.scala | 271 | // Step 6: Pre-load the first key-value pair from the sorted iterator to make |
| LOW | …/execution/aggregate/TungstenAggregationIterator.scala | 284 | // Step 7: set sortBased to true. |
| LOW | …t/analysis/SequentialStreamingUnionAnalysisSuite.scala | 227 | // Step 1: Flatten the nested unions |
| LOW | …t/analysis/SequentialStreamingUnionAnalysisSuite.scala | 236 | // Step 2: Validate the flattened plan |
| LOW | …/spark/sql/catalyst/optimizer/MergeSubplansSuite.scala | 762 | // Step 1: subquery1 (cp) and subquery2 (np) merge: |
| LOW | …/spark/sql/catalyst/optimizer/MergeSubplansSuite.scala | 769 | // Step 2: subquery3 (np) merges with merged(1,2) (cp). The cp Filter is tagged, so only a |
| LOW | …/spark/sql/catalyst/optimizer/MergeSubplansSuite.scala | 818 | // Step 1: subquery1 (cp) and subquery2 (np) merge as usual: |
| LOW | …/spark/sql/catalyst/optimizer/MergeSubplansSuite.scala | 824 | // Step 2: subquery3 (np, condition a > 1) merges with merged(1,2) (cp). The cp Filter is |
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | python/run-tests.py | 506 | # Check if the python executable has coverage installed when 'COVERAGE_PROCESS_START' |
| LOW | python/pyspark/worker.py | 1358 | # Check if this is a continuation of the previous batch's partition |
| LOW | python/pyspark/worker.py | 1467 | # Check if any partition column changed from previous row |
| LOW | python/pyspark/shell.py | 56 | # Check if th eprogress bar needs to be disabled. |
| LOW | python/pyspark/pipelines/cli.py | 74 | # Check if it's a simple file path (no wildcards at all) |
| LOW | python/pyspark/pipelines/cli.py | 78 | # Check if it's a folder path ending with /** |
| LOW | python/pyspark/pandas/frame.py | 12563 | # Check if DataFrame has rows - if yes, raise error; if no, return empty Series |
| LOW | python/pyspark/pandas/frame.py | 12698 | # Check if DataFrame has rows - if yes, raise error; if no, return empty Series |
| LOW | python/pyspark/pandas/data_type_ops/categorical_ops.py | 116 | # Check if categoricals have the same dtype, same categories, and same ordered |
| LOW | python/pyspark/pandas/typedef/typehints.py | 638 | # Check if the name is Tuple. |
| LOW | python/pyspark/pandas/indexes/base.py | 2068 | # Check if the `self` and `other` have different index types. |
| LOW | python/pyspark/sql/metrics.py | 188 | # Add yourself to the list if you have to. |
| LOW | python/pyspark/sql/dataframe.py | 409 | >>> # Check if the DataFrames are equal |
| LOW | python/pyspark/sql/session.py | 2279 | # Check if the target path already exists |
| LOW | python/pyspark/sql/types.py | 3139 | >>> # Check if numeric values are within the allowed range. |
| LOW | python/pyspark/sql/tests/test_utils.py | 1732 | # Check if the error message contains information about 2 mismatches only. |
| LOW | python/pyspark/sql/tests/arrow/test_arrow_map.py | 329 | # Set it to a small odd value to exercise batching logic for all test cases |
| LOW | …s/pandas/streaming/test_pandas_transform_with_state.py | 1436 | # Set it to a very small number so that every row would be a separate pandas df |
| LOW | …s/pandas/streaming/test_pandas_transform_with_state.py | 1463 | # Set it to a very large number so that every row would be in the same pandas df |
| LOW | …s/pandas/streaming/test_pandas_transform_with_state.py | 1529 | # Set it to a very small number so that every row would be a separate pandas df |
| LOW | …/pyspark/sql/tests/pandas/streaming/test_tws_tester.py | 751 | # Set watermark to 15000 - key1's timer should fire. |
| LOW | …/pyspark/sql/tests/pandas/streaming/test_tws_tester.py | 756 | # Set watermark to 16000 - key2's timer should fire. |
| LOW | …/pyspark/sql/tests/pandas/streaming/test_tws_tester.py | 790 | # Set watermark to 6000. |
| LOW | …/pyspark/sql/tests/pandas/streaming/test_tws_tester.py | 821 | # Set watermark to 20 seconds. |
| LOW | …/pyspark/sql/tests/pandas/streaming/test_tws_tester.py | 923 | # Set watermark to 10000. |
| LOW | python/pyspark/sql/streaming/readwriter.py | 1550 | # Check if the data should be processed |
| LOW | python/pyspark/sql/worker/plan_data_source_read.py | 154 | # Check if the names are the same as the schema. |
| LOW | python/pyspark/sql/worker/create_data_source.py | 81 | # Check if the provider name matches the data source's name. |
| LOW | python/pyspark/sql/worker/write_into_data_source.py | 97 | # Check if the provider name matches the data source's name. |
| LOW | python/pyspark/sql/connect/session.py | 1109 | # Check if total size exceeds the limit |
| LOW | python/pyspark/sql/connect/session.py | 1119 | # Check if adding this chunk would exceed batch size |
| LOW | python/pyspark/sql/connect/client/artifact.py | 195 | # Check if it is a file from the scheme |
| LOW | python/pyspark/sql/pandas/serializers.py | 1226 | # Check if the entire column is null |
| LOW | python/pyspark/sql/pandas/serializers.py | 1469 | # Check if the entire column is null |
| LOW | python/pyspark/sql/pandas/conversion.py | 872 | # Check if any columns need to be fixed for Spark to infer properly |
| LOW | python/pyspark/sql/pandas/typehints.py | 69 | # Check if all arguments have type hints |
| LOW | python/pyspark/sql/pandas/typehints.py | 79 | # Check if the return has a type hint |
| LOW | python/pyspark/sql/pandas/typehints.py | 228 | # Check if all arguments have type hints |
| LOW | python/pyspark/sql/pandas/typehints.py | 238 | # Check if the return has a type hint |
| LOW | python/pyspark/sql/pandas/typehints.py | 421 | # Check if all arguments have type hints |
| LOW | python/pyspark/sql/pandas/typehints.py | 431 | # Check if the return has a type hint |
| LOW | python/pyspark/sql/pandas/typehints.py | 514 | # Check if all arguments have type hints |
| LOW | python/pyspark/sql/pandas/typehints.py | 524 | # Check if the return has a type hint |
| LOW | python/pyspark/sql/pandas/typehints.py | 600 | # Check if the name is Tuple first. After that, check the generic types. |
| LOW | sbin/spark-daemon.sh | 50 | # Check if --config is passed as an argument. It is an optional parameter. |
| LOW | sbin/spark-daemon.sh | 154 | # Check if the process has died; in that case we'll tail the log so the user can see |
| LOW | sbin/decommission-worker.sh | 48 | # Check if --block-until-exit is set. |
| LOW | sbin/workers.sh | 57 | # Check if --config is passed as an argument. It is an optional parameter. |
| LOW | …l/src/test/scala/org/apache/spark/repl/ReplSuite.scala | 254 | |# Set everything to be logged to the console |
| LOW | R/pkg/tests/fulltests/test_jvm_api.R | 26 | # Check if get returns the same element |
| LOW | R/pkg/R/sparkR.R | 456 | # Check if version number of SparkSession matches version number of SparkR package |
| LOW | R/pkg/R/serialize.R | 45 | # Check if all elements are of same type |
| LOW | R/pkg/R/jobj.R | 31 | # Check if jobj was created with the current SparkContext |
| LOW | R/pkg/R/DataFrame.R | 386 | # Check if the column names have . in it |
| LOW | R/pkg/R/DataFrame.R | 2282 | # Check if there is any duplicated column name in the DataFrame |
| LOW | R/pkg/inst/worker/worker.R | 97 | # Set libPaths to include SparkR package as loadNamespace needs this |
| LOW | .github/workflows/build_and_test.yml | 1313 | # Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and |
| LOW | .github/workflows/build_and_test.yml | 1337 | # Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and |
| LOW | .github/workflows/build_and_test.yml | 1361 | # Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and |
| LOW | .github/workflows/build_and_test.yml | 1385 | # Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and |
| 3 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …hon/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py | 1110 | .withColumn("name", lit("John Doe")) |
| LOW | python/pyspark/sql/pandas/functions.py | 109 | >>> df = spark.createDataFrame([("John Doe",)], ("name",)) |
| LOW | python/pyspark/sql/pandas/functions.py | 124 | >>> df = spark.createDataFrame([("John Doe",)], ("name",)) |
| LOW | python/pyspark/sql/pandas/functions.py | 506 | >>> df = spark.createDataFrame([("John Doe",)], ("name",)) |
| LOW | python/pyspark/sql/pandas/functions.py | 518 | >>> df = spark.createDataFrame([("John Doe",)], ("name",)) |
| LOW | …apache/spark/graphx/lib/ConnectedComponentsSuite.scala | 119 | val defaultUser = ("John Doe", "Missing") |
| LOW | docs/graphx-programming-guide.md | 193 | val defaultUser = ("John Doe", "Missing") |
| LOW | docs/graphx-programming-guide.md | 432 | val defaultUser = ("John Doe", "Missing") |
| LOW | examples/src/main/python/sql/arrow.py | 308 | df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name", "age")) |
| LOW | …s/test-data/xml-resources/mixed_children_as_string.xml | 4 | Lorem ipsum dolor sit amet. Ut <i>voluptas</i> distinctio et impedit deserunt aut quam fugit et quaerat odit |
| LOW | …s/test-data/xml-resources/mixed_children_as_string.xml | 4 | Lorem ipsum dolor sit amet. Ut <i>voluptas</i> distinctio et impedit deserunt aut quam fugit et quaerat odit |
| LOW | …/test/resources/test-data/xml-resources/processing.xml | 4 | lorem ipsum |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 150 | INSERT INTO products VALUES (1, 'Super Widget', 'Electronics', 155.99, 99.99, 1, 'Acme Inc', 'John D.', '123 Main St', 2 |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 150 | INSERT INTO products VALUES (1, 'Super Widget', 'Electronics', 155.99, 99.99, 1, 'Acme Inc', 'John D.', '123 Main St', 2 |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 154 | INSERT INTO customers VALUES (1, 'Alice Johnson', 'alice@example.com', '555-1000', '101 Maple Ave', NULL, 'Springfield', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 155 | INSERT INTO customers VALUES (2, 'Bob Smith', 'bob@example.com', '555-1002', '202 Oak St', 'Apt 3', 'Oakville', 'CA', '6 |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 156 | INSERT INTO customers VALUES (3, 'Cathy Lee', 'cathy@example.com', '555-1003', '303 Pine Ln', NULL, 'Pineville', 'TX', ' |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 166 | INSERT INTO employees VALUES (1, 'Dan Miller', 'dan@example.com', '555-2001', 'Manager', 'Sales', TIMESTAMP '2018-01-01' |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 167 | INSERT INTO employees VALUES (2, 'Eva Perez', 'eva@example.com', '555-2002', 'Salesperson', 'Sales', TIMESTAMP '2019-03- |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 168 | INSERT INTO employees VALUES (3, 'Frank Wong', 'frank@example.com', '555-2003', 'Warehouse', 'Operations', TIMESTAMP '20 |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 170 | INSERT INTO suppliers VALUES (1, 'Acme Inc', 'John D.', 'Sales Manager', 'john@acme.com', '555-3001', '555-3002', '123 M |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 170 | INSERT INTO suppliers VALUES (1, 'Acme Inc', 'John D.', 'Sales Manager', 'john@acme.com', '555-3001', '555-3002', '123 M |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 170 | INSERT INTO suppliers VALUES (1, 'Acme Inc', 'John D.', 'Sales Manager', 'john@acme.com', '555-3001', '555-3002', '123 M |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 171 | INSERT INTO suppliers VALUES (2, 'Widgets Co', 'Mary K.', 'Customer Success', 'mary@widgets.com', '555-4001', NULL, '456 |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 172 | INSERT INTO suppliers VALUES (3, 'Toy Supply', 'Ann T.', 'Director', 'ann@toysupply.com', '555-5001', NULL, '789 Oak St' |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 212 | INSERT INTO suppliers VALUES (v_temp_id, 'Temp Supplier', 'Temp Contact', 'Temp Role', 'temp |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 260 | INSERT INTO customers VALUES (v_new_customer_id, 'New Customer', 'new@customer.com', '555-1111', '55 |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 393 | VALUES (sub_emp.employee_id + 9999, v_name_part, CONCAT(v_name_part, '@company.com') |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 405 | VALUES (emp.employee_id + 10000, CONCAT('Emp_', emp.employee_id), emp.employee_name, 'Employ |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 474 | INSERT INTO products VALUES ((SELECT COALESCE(MAX(product_id), 0) + 1 FROM products), 'Rare ' || v_m |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 519 | '555-1212', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 552 | '555-1111', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 553 | '123 Main St', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 727 | '555-7777', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 918 | INSERT INTO employees VALUES (v_new_id, 'New Emp ' || v_new_id, 'new' || v_new_id || '@c |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 930 | INSERT INTO employees VALUES (v_temp_id, 'Manager ' || v_temp_id, 'manager' || v_temp_id || '@compan |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 941 | INSERT INTO employees VALUES (v_low_level_emp + 10, 'Temp Emp ' || v_low_level_emp, 'temp' || v_low_level_em |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 982 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 983 | '123 Main St', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 1097 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 1169 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 1417 | '123 Main St', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 1444 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 1491 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 1528 | '555-0001', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 1556 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 1763 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 2072 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 2275 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 2582 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 2615 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 2788 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 2866 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 3112 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 3223 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 3253 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 3282 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 3666 | '555-0000', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 3731 | '555-0001', |
| LOW | …-tests/inputs/scripting/randomly_generated_scripts.sql | 3764 | '555-0002', |
| 35 more matches not shown… | |||
| Severity | File | Line | Snippet |
|---|---|---|---|
| LOW | …/main/java/org/apache/spark/SparkFirehoseListener.java | 27 | * This is a concrete Java class in order to ensure that we don't forget to update it when adding |
| MEDIUM | …/org/apache/spark/storage/BlockReplicationPolicy.scala | 101 | * Method to prioritize a bunch of candidate peers of a block. This is a basic implementation, |
| LOW | python/packaging/classic/setup.py | 151 | # Also don't forget to update python/docs/source/getting_started/install.rst, |
| LOW | python/packaging/classic/setup.py | 151 | # Also don't forget to update python/docs/source/getting_started/install.rst, |
| LOW | python/packaging/classic/setup.py | 351 | # Don't forget to update python/docs/source/getting_started/install.rst |
| LOW | python/packaging/connect/setup.py | 87 | # Also don't forget to update python/docs/source/getting_started/install.rst, |
| LOW | python/packaging/connect/setup.py | 87 | # Also don't forget to update python/docs/source/getting_started/install.rst, |
| LOW | python/packaging/connect/setup.py | 117 | # Don't forget to update python/docs/source/getting_started/install.rst |
| LOW | python/packaging/client/setup.py | 134 | # Also don't forget to update python/docs/source/getting_started/install.rst, |
| LOW | python/packaging/client/setup.py | 134 | # Also don't forget to update python/docs/source/getting_started/install.rst, |
| LOW | python/packaging/client/setup.py | 210 | # Don't forget to update python/docs/source/getting_started/install.rst |
| LOW | python/pyspark/pandas/config.py | 114 | # NOTE: if you are fixing or adding an option here, make sure you execute `show_options()` and |
| LOW | dev/create-release/release-build.sh | 768 | # NOTE: Don't forget to update the valid combinations of distributions at |
| LOW | …/main/scala/org/apache/spark/sql/connect/Dataset.scala | 146 | // Make sure we don't forget to set plan id. |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | python/pyspark/testing/sqlutils.py | 114 | Read the classpath file for a project and return it as a comma-separated string. The classpath file is typical |
| Severity | File | Line | Snippet |
|---|---|---|---|
| HIGH | python/pyspark/ml/dl_util.py | 103 | the empty string, nothing will be written after the auto-generated code. |