apache · AngersZhuuuu · Aug 24, 2021 · Aug 24, 2021 · Aug 24, 2021 · Aug 24, 2021
diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -17,6 +17,11 @@
 
 package org.apache.spark.internal.io
 
+import java.net.URI
+import java.text.SimpleDateFormat
+import java.util.{Date, Locale, Random}
+
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.mapreduce._
 
@@ -50,6 +55,22 @@ import org.apache.spark.util.Utils
 abstract class FileCommitProtocol extends Logging {
   import FileCommitProtocol._
 
+  /**
+   * Get the final directory where the result data will be placed once the job
+   * is committed. This may be null, in which case, there is no output
+   * path to write data to and won't write any data.
+   */
+  def getOutputPath: Path = null
+
+  /**
+   * Get the directory that the task should write results into.
+   * Warning: there's no guarantee that this work path is on the same
+   * FS as the final output, or that it's visible across machines.
+   * May be null, in which case, there is no output path to write data to
+   * and won't write any data.
+   */
+  def getWorkPath: Path = null
+
   /**
    * Setups up a job. Must be called on the driver before any other methods can be invoked.
    */
@@ -230,6 +251,89 @@ object FileCommitProtocol extends Logging {
   def getStagingDir(path: String, jobId: String): Path = {
     new Path(path, ".spark-staging-" + jobId)
   }
+
+  def externalTempPath(
+      path: Path,
+      hadoopConf: Configuration,
+      stagingDir: String,
+      engineType: String,
+      jobId: String): Path = {
+    val extURI = path.toUri
+    if (extURI.getScheme == "viewfs") {
+      getExtTmpPathRelTo(path.getParent, hadoopConf, stagingDir, engineType, jobId)
+    } else {
+      new Path(getExternalScratchDir(extURI, hadoopConf, stagingDir, engineType, jobId),
+        "-ext-10000")
+    }
+  }
+
+  private def getExtTmpPathRelTo(
+      path: Path,
+      hadoopConf: Configuration,
+      stagingDir: String,
+      engineType: String,
+      jobId: String): Path = {
+    // Hive uses 10000
+    new Path(getStagingDir(path, hadoopConf, stagingDir, engineType, jobId), "-ext-10000")
+  }
+
+  private def getExternalScratchDir(
+      extURI: URI,
+      hadoopConf: Configuration,
+      stagingDir: String,
+      engineType: String,
+      jobId: String): Path = {
+    getStagingDir(
+      new Path(extURI.getScheme, extURI.getAuthority, extURI.getPath),
+      hadoopConf,
+      stagingDir,
+      engineType,
+      jobId)
+  }
+
+  def getStagingDir(
+      inputPath: Path,
+      hadoopConf: Configuration,
+      stagingDir: String,
+      engineType: String,
+      jobId: String): Path = {
+    val inputPathName: String = inputPath.toString
+    val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
+    var stagingPathName: String =
+      if (inputPathName.indexOf(stagingDir) == -1) {
+        new Path(inputPathName, stagingDir).toString
+      } else {
+        inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length)
+      }
+
+    // SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the
+    // staging directory needs to avoid being deleted when users set hive.exec.stagingdir
+    // under the table directory.
+    if (isSubDir(new Path(stagingPathName), inputPath, fs) &&
+      !stagingPathName.stripPrefix(inputPathName).stripPrefix("/").startsWith(".")) {
+      logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
+        "with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
+        "directory.")
+      stagingPathName = new Path(inputPathName, ".hive-staging").toString
+    }
+
+    val dir = fs.makeQualified(
+      new Path(stagingPathName + "_" + executionId(engineType) + "-" + jobId))
+    logDebug("Created staging dir = " + dir + " for path = " + inputPath)
+    dir
+  }
+
+  private def isSubDir(p1: Path, p2: Path, fs: FileSystem): Boolean = {
+    val path1 = fs.makeQualified(p1).toString + Path.SEPARATOR
+    val path2 = fs.makeQualified(p2).toString + Path.SEPARATOR
+    path1.startsWith(path2)
+  }
+
+  def executionId(engineType: String): String = {
+    val rand: Random = new Random
+    val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS", Locale.US)
+    s"${engineType}_" + format.format(new Date) + "_" + Math.abs(rand.nextLong)
+  }
 }
 
 /**

diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -73,7 +73,7 @@ class HadoopMapReduceCommitProtocol(
   import FileCommitProtocol._
 
   /** OutputCommitter from Hadoop is not serializable so marking it transient. */
-  @transient private var committer: OutputCommitter = _
+  @transient protected var committer: OutputCommitter = _
 
   /**
    * Checks whether there are files to be committed to a valid output location.
@@ -106,6 +106,16 @@ class HadoopMapReduceCommitProtocol(
    */
   protected def stagingDir = getStagingDir(path, jobId)
 
+  override def getOutputPath: Path = {
+    if (dynamicPartitionOverwrite) {
+      stagingDir
+    } else {
+      new Path(path)
+    }
+  }
+
+  override def getWorkPath: Path = getOutputPath
+
   protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
     val format = context.getOutputFormatClass.getConstructor().newInstance()
     // If OutputFormat is Configurable, we should set conf to it.

diff --git a/...src/hadoop-3/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala b/...src/hadoop-3/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala
@@ -60,9 +60,6 @@ class PathOutputCommitProtocol(
     throw new IOException(PathOutputCommitProtocol.UNSUPPORTED)
   }
 
-  /** The committer created. */
-  @transient private var committer: PathOutputCommitter = _
-
   require(dest != null, "Null destination specified")
 
   private[cloud] val destination: String = dest
@@ -115,7 +112,7 @@ class PathOutputCommitProtocol(
         logTrace(s"Committer $committer may not be tolerant of task commit failures")
       }
     }
-    committer
+    committer.asInstanceOf[PathOutputCommitter]
   }
 
   /**
@@ -131,7 +128,7 @@ class PathOutputCommitProtocol(
       dir: Option[String],
       spec: FileNameSpec): String = {
 
-    val workDir = committer.getWorkPath
+    val workDir = committer.asInstanceOf[PathOutputCommitter].getWorkPath
     val parent = dir.map {
       d => new Path(workDir, d)
     }.getOrElse(workDir)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1282,6 +1282,17 @@ object SQLConf {
       .createWithDefault(
         "org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol")
 
+  val EXEC_STAGING_DIR = buildConf("spark.sql.exec.stagingDir")
+    .doc("The staging directory of Spark job. Spark uses it to deal with files with " +
+      "absolute output path, or writing data into partitioned directory " +
+      "when dynamic partition overwrite mode is on. " +
+      "Default value means staging directory is under table path.")
+    .version("3.3.0")
+    .internal()
+    .stringConf
+    .checkValue(!_.isEmpty, "Should not pass an empty string as staging diretory.")
+    .createWithDefault(".spark-staging")
+
   val PARALLEL_PARTITION_DISCOVERY_THRESHOLD =
     buildConf("spark.sql.sources.parallelPartitionDiscovery.threshold")
       .doc("The maximum number of paths allowed for listing files at driver side. If the number " +
@@ -3966,6 +3977,8 @@ class SQLConf extends Serializable with Logging {
 
   def fileCommitProtocolClass: String = getConf(SQLConf.FILE_COMMIT_PROTOCOL_CLASS)
 
+  def stagingDir: String = getConf(SQLConf.EXEC_STAGING_DIR)
+
   def parallelPartitionDiscoveryThreshold: Int =
     getConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD)
 

diff --git a/.../scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/.../scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -162,23 +162,14 @@ case class InsertIntoHadoopFsRelationCommand(
         }
       }
 
-      // For dynamic partition overwrite, FileOutputCommitter's output path is staging path, files
-      // will be renamed from staging path to final output path during commit job
-      val committerOutputPath = if (dynamicPartitionOverwrite) {
-        FileCommitProtocol.getStagingDir(outputPath.toString, jobId)
-          .makeQualified(fs.getUri, fs.getWorkingDirectory)
-      } else {
-        qualifiedOutputPath
-      }
-
       val updatedPartitionPaths =
         FileFormatWriter.write(
           sparkSession = sparkSession,
           plan = child,
           fileFormat = fileFormat,
           committer = committer,
           outputSpec = FileFormatWriter.OutputSpec(
-            committerOutputPath.toString, customPartitionLocations, outputColumns),
+            committer.getOutputPath.toString, customPartitionLocations, outputColumns),
           hadoopConf = hadoopConf,
           partitionColumns = partitionColumns,
           bucketSpec = bucketSpec,

diff --git a/...n/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala b/...n/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
@@ -21,8 +21,9 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{OutputCommitter, TaskAttemptContext}
 import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
 
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
+import org.apache.spark.internal.io.{FileCommitProtocol, HadoopMapReduceCommitProtocol}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -36,6 +37,10 @@ class SQLHadoopMapReduceCommitProtocol(
   extends HadoopMapReduceCommitProtocol(jobId, path, dynamicPartitionOverwrite)
     with Serializable with Logging {
 
+  override val stagingDir: Path =
+    FileCommitProtocol.externalTempPath(new Path(path), SparkHadoopUtil.get.conf,
+      SQLConf.get.stagingDir, "spark", jobId)
+
   override protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
     var committer = super.setupCommitter(context)
 

diff --git a/...ala/org/apache/spark/sql/execution/datasources/SQLPathHadoopMapReduceCommitProtocol.scala b/...ala/org/apache/spark/sql/execution/datasources/SQLPathHadoopMapReduceCommitProtocol.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{OutputCommitter, TaskAttemptContext}
+
+import org.apache.spark.internal.io.FileNameSpec
+
+/**
+ * A variant of [[SQLHadoopMapReduceCommitProtocol]] that allows specifying the actual
+ * Hadoop output committer using an option specified in SQLConf.
+ */
+class SQLPathHadoopMapReduceCommitProtocol(
+    jobId: String,
+    path: String,
+    dynamicPartitionOverwrite: Boolean = false)
+  extends SQLHadoopMapReduceCommitProtocol(jobId, path, dynamicPartitionOverwrite) {
+
+  // This variable only can be used after setupCommitter.
+  private lazy val sqlPathOutputCommitter: SQLPathOutputCommitter =
+    committer.asInstanceOf[SQLPathOutputCommitter]
+
+  override protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
+    val committer = new SQLPathOutputCommitter(stagingDir, new Path(path), context)
+    logInfo(s"Using output committer class ${committer.getClass.getCanonicalName}")
+    committer
+  }
+
+  override def newTaskTempFile(
+      taskContext: TaskAttemptContext,
+      dir: Option[String],
+      spec: FileNameSpec): String = {
+    val filename = getFilename(taskContext, spec)
+    dir.map { d =>
+      new Path(new Path(
+        sqlPathOutputCommitter.getTaskAttemptPath(taskContext), d), filename).toString
+    }.getOrElse {
+      new Path(sqlPathOutputCommitter.getTaskAttemptPath(taskContext), filename).toString
+    }
+  }
+}