apache · guixiaowen · Mar 27, 2026 · May 31, 2026 · May 31, 2026 · May 31, 2026
diff --git a/native-engine/auron-planner/proto/auron.proto b/native-engine/auron-planner/proto/auron.proto
@@ -53,6 +53,7 @@ message PhysicalPlanNode {
     OrcScanExecNode orc_scan = 25;
     KafkaScanExecNode kafka_scan = 26;
     OrcSinkExecNode orc_sink = 27;
+    CoalesceExecNode coalesce = 29;
   }
 }
 
@@ -779,6 +780,11 @@ message KafkaScanExecNode {
   string mock_data_json_array = 9;
 }
 
+message CoalesceExecNode {
+  PhysicalPlanNode input = 1;
+  int32  numPartitions = 2;
+}
+
 enum KafkaFormat {
   JSON = 0;
   PROTOBUF = 1;

diff --git a/native-engine/auron-planner/src/planner.rs b/native-engine/auron-planner/src/planner.rs
@@ -67,6 +67,7 @@ use datafusion_ext_plans::{
     agg_exec::AggExec,
     broadcast_join_build_hash_map_exec::BroadcastJoinBuildHashMapExec,
     broadcast_join_exec::BroadcastJoinExec,
+    coalesce_exec::CoalesceExec,
     debug_exec::DebugExec,
     empty_partitions_exec::EmptyPartitionsExec,
     expand_exec::ExpandExec,
@@ -576,6 +577,13 @@ impl PhysicalPlanner {
                     schema,
                 )))
             }
+            PhysicalPlanType::Coalesce(coalesce) => {
+                let input: Arc<dyn ExecutionPlan> = convert_box_required!(self, coalesce.input)?;
+                Ok(Arc::new(CoalesceExec::new(
+                    input,
+                    coalesce.num_partitions as usize,
+                )))
+            }
             PhysicalPlanType::CoalesceBatches(coalesce_batches) => {
                 let input: Arc<dyn ExecutionPlan> =
                     convert_box_required!(self, coalesce_batches.input)?;

diff --git a/native-engine/datafusion-ext-plans/src/coalesce_exec.rs b/native-engine/datafusion-ext-plans/src/coalesce_exec.rs
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::{any::Any, fmt::Formatter, sync::Arc};
+
+use arrow::datatypes::SchemaRef;
+use datafusion::{
+    common::Result,
+    execution::context::TaskContext,
+    physical_expr::EquivalenceProperties,
+    physical_plan::{
+        DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
+        SendableRecordBatchStream, Statistics,
+        execution_plan::{Boundedness, EmissionType},
+        metrics::{ExecutionPlanMetricsSet, MetricsSet},
+    },
+};
+use futures::StreamExt;
+use once_cell::sync::OnceCell;
+
+use crate::common::execution_context::ExecutionContext;
+
+#[derive(Debug)]
+pub struct CoalesceExec {
+    input: Arc<dyn ExecutionPlan>,
+    num_partitions: usize,
+    metrics: ExecutionPlanMetricsSet,
+    props: OnceCell<PlanProperties>,
+}
+
+impl CoalesceExec {
+    pub fn new(input: Arc<dyn ExecutionPlan>, num_partitions: usize) -> Self {
+        Self {
+            input,
+            num_partitions,
+            metrics: ExecutionPlanMetricsSet::new(),
+            props: OnceCell::new(),
+        }
+    }
+}
+
+impl DisplayAs for CoalesceExec {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        write!(f, "CoalesceExec[num_partitions={}]", self.num_partitions)
+    }
+}
+
+impl ExecutionPlan for CoalesceExec {
+    fn name(&self) -> &str {
+        "CoalesceExec"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.input.schema()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        self.props.get_or_init(|| {
+            PlanProperties::new(
+                EquivalenceProperties::new(self.schema()),
+                self.input.output_partitioning().clone(),
+                EmissionType::Both,
+                Boundedness::Bounded,
+            )
+        })
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.input]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(CoalesceExec::new(
+            children[0].clone(),
+            self.num_partitions,
+        )))
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let exec_ctx = ExecutionContext::new(context, partition, self.schema(), &self.metrics);
+        let mut input = exec_ctx.execute(&self.input)?;
+        Ok(
+            exec_ctx.output_with_sender("Coalesce", move |sender| async move {
+                while let Some(batch) = input.next().await.transpose()? {
+                    sender.send(batch).await;
+                }
+                Ok(())
+            }),
+        )
+    }
+
+    fn metrics(&self) -> Option<MetricsSet> {
+        Some(self.metrics.clone_inner())
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        todo!()
+    }
+}
diff --git a/native-engine/datafusion-ext-plans/src/lib.rs b/native-engine/datafusion-ext-plans/src/lib.rs
@@ -36,6 +36,7 @@ pub mod agg;
 pub mod agg_exec;
 pub mod broadcast_join_build_hash_map_exec;
 pub mod broadcast_join_exec;
+pub mod coalesce_exec;
 pub mod debug_exec;
 pub mod empty_partitions_exec;
 pub mod expand_exec;

diff --git a/spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/auron/ShimsImpl.scala b/spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/auron/ShimsImpl.scala
@@ -288,6 +288,9 @@ class ShimsImpl extends Shims with Logging {
   override def createNativeFilterExec(condition: Expression, child: SparkPlan): NativeFilterBase =
     NativeFilterExec(condition, child)
 
+  def createNativeCoalesceExec(numPartitions: Int, child: SparkPlan): NativeCoalesceBase =
+    NativeCoalesceExec(numPartitions, child)
+
   override def createNativeGenerateExec(
       generator: Generator,
       requiredChildOutput: Seq[Attribute],

diff --git a/...s-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeCoalesceExec.scala b/...s-spark/src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeCoalesceExec.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.auron.plan
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.execution.SparkPlan
+
+import org.apache.auron.sparkver
+
+case class NativeCoalesceExec(numPartitions: Int, override val child: SparkPlan)
+    extends NativeCoalesceBase(numPartitions, child) {
+  @sparkver("3.2 / 3.3 / 3.4 / 3.5 / 4.0 / 4.1")
+  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
+    copy(child = newChild)
+
+  @sparkver("3.0 / 3.1")
+  override def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan =
+    copy(child = newChildren.head)
+
+  override def output: Seq[Attribute] =
+    child.output
+}
diff --git a/...-extension-shims-spark/src/test/scala/org/apache/auron/AuronNativeCoalesceExecSuite.scala b/...-extension-shims-spark/src/test/scala/org/apache/auron/AuronNativeCoalesceExecSuite.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.auron
+
+import org.apache.spark.sql.{AuronQueryTest, Row}
+import org.apache.spark.sql.execution.auron.plan.NativeCoalesceExec
+
+class AuronNativeCoalesceExecSuite extends AuronQueryTest with BaseAuronSQLSuite {
+  import testImplicits._
+
+  test("test CoalesceExec to native") {
+    withSQLConf("spark.auron.enable.coalesce" -> "true") {
+      Seq((1, 2, "test test"))
+        .toDF("c1", "c2", "part")
+        .createOrReplaceTempView("coalesce_table1")
+      val df = {
+        spark.sql("select /*+ coalesce(2)*/ a.c1, a.c2 from coalesce_table1 a ")
+      }
+      df.show()
+
+      checkAnswer(df, Seq(Row(1, 2)))
+      val test = collectFirst(df.queryExecution.executedPlan) {
+        case coalesceExec: NativeCoalesceExec =>
+          coalesceExec
+      }
+      println(test.get)
+
+      assert(collectFirst(df.queryExecution.executedPlan) {
+        case coalesceExec: NativeCoalesceExec =>
+          coalesceExec
+      }.isDefined)
+    }
+  }
+}
diff --git a/...extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java b/...extension/src/main/java/org/apache/auron/spark/configuration/SparkAuronConfiguration.java
@@ -411,6 +411,11 @@ public class SparkAuronConfiguration extends AuronConfiguration {
             .withDescription("Enable AggregateExec operation conversion to native Auron implementations.")
             .withDefaultValue(true);
 
+    public static final ConfigOption<Boolean> ENABLE_COALESEC = new SQLConfOption<>(Boolean.class)
+            .withKey("auron.enable.coalesce")
+            .withCategory("Operator Supports")
+            .withDescription("Enable CoalesceExec operation conversion to native Auron implementations.")
+            .withDefaultValue(true);
     public static final ConfigOption<Boolean> ENABLE_EXPAND = new SQLConfOption<>(Boolean.class)
             .withKey("auron.enable.expand")
             .withCategory("Operator Supports")

diff --git a/spark-extension/src/main/scala/org/apache/spark/sql/auron/AuronConvertStrategy.scala b/spark-extension/src/main/scala/org/apache/spark/sql/auron/AuronConvertStrategy.scala
@@ -165,6 +165,8 @@ object AuronConvertStrategy extends Logging {
         e.setTagValue(convertStrategyTag, AlwaysConvert)
       case e: GenerateExec if isNative(e.child) =>
         e.setTagValue(convertStrategyTag, AlwaysConvert)
+      case e: CoalesceExec if isNative(e.child) =>
+        e.setTagValue(convertStrategyTag, AlwaysConvert)
       case e: ObjectHashAggregateExec if isNative(e.child) =>
         e.setTagValue(convertStrategyTag, AlwaysConvert)
       case e: LocalTableScanExec =>

diff --git a/spark-extension/src/main/scala/org/apache/spark/sql/auron/AuronConverters.scala b/spark-extension/src/main/scala/org/apache/spark/sql/auron/AuronConverters.scala
@@ -107,6 +107,7 @@ object AuronConverters extends Logging {
     SparkAuronConfiguration.ENABLE_TAKE_ORDERED_AND_PROJECT.get()
   def enableCollectLimit: Boolean = SparkAuronConfiguration.ENABLE_COLLECT_LIMIT.get()
   def enableAggr: Boolean = SparkAuronConfiguration.ENABLE_AGGR.get()
+  def enableCoalesec: Boolean = SparkAuronConfiguration.ENABLE_COALESEC.get()
   def enableExpand: Boolean = SparkAuronConfiguration.ENABLE_EXPAND.get()
   def enableWindow: Boolean = SparkAuronConfiguration.ENABLE_WINDOW.get()
   def enableWindowGroupLimit: Boolean = SparkAuronConfiguration.ENABLE_WINDOW_GROUP_LIMIT.get()
@@ -252,6 +253,10 @@ object AuronConverters extends Logging {
         }
         convertedAgg
 
+      case e: CoalesceExec if enableCoalesec => // coalesec
+        val convertedCoalesce = tryConvert(e, convertCoalesceExec)
+        convertedCoalesce
+
       case e: ObjectHashAggregateExec if enableAggr => // object hash aggregate
         val convertedAgg = tryConvert(e, convertObjectHashAggregateExec)
         if (!e.getTagValue(convertibleTag).contains(true)) {
@@ -370,6 +375,8 @@ object AuronConverters extends Logging {
           "Conversion disabled: spark.auron.enable.local.table.scan=false."
         case _: DataWritingCommandExec if !enableDataWriting =>
           "Conversion disabled: spark.auron.enable.data.writing=false."
+        case _: CoalesceExec if !enableCoalesec =>
+          "Conversion disabled: spark.auron.enable.coalesce=false."
         case _ =>
           s"${exec.getClass.getSimpleName} is not supported yet."
       }
@@ -807,6 +814,10 @@ object AuronConverters extends Logging {
     Shims.get.createNativeCollectLimitExec(limit, offset, exec.child)
   }
 
+  def convertCoalesceExec(exec: CoalesceExec): SparkPlan = {
+    Shims.get.createNativeCoalesceExec(exec.numPartitions, exec.child)
+  }
+
   def convertHashAggregateExec(exec: HashAggregateExec): SparkPlan = {
     // split non-trivial children exprs in partial-agg to a ProjectExec
     // for enabling filter-project optimization in native side

diff --git a/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeRDD.scala b/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeRDD.scala
@@ -26,7 +26,7 @@ import org.apache.spark.Partitioner
 import org.apache.spark.SparkContext
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{CoalescedRDDPartition, RDD}
 import org.apache.spark.sql.catalyst.InternalRow
 
 import org.apache.auron.metric.SparkMetricNode
@@ -80,6 +80,33 @@ class NativeRDD(
   }
 }
 
+class CoalesceNativeRDD(
+    @transient private val rddSparkContext: SparkContext,
+    rddDependencies: Seq[Dependency[_]],
+    partitions: Array[Partition],
+    @transient private val nativePlan: (Partition, TaskContext) => PhysicalPlanNode,
+    friendlyName: String)
+    extends NativeRDD(
+      rddSparkContext,
+      metrics = SparkMetricNode(Map.empty, Seq(), None),
+      rddPartitions = partitions,
+      rddPartitioner = None,
+      rddDependencies,
+      rddShuffleReadFull = false,
+      nativePlan,
+      friendlyName)
+    with Logging
+    with Serializable {
+
+  override protected def getPartitions: Array[Partition] = partitions
+
+  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
+    split.asInstanceOf[CoalescedRDDPartition].parents.iterator.flatMap { parentPartition =>
+      firstParent[InternalRow].iterator(parentPartition, context)
+    }
+  }
+}
+
 class EmptyNativeRDD(@transient private val rddSparkContext: SparkContext)
     extends NativeRDD(
       rddSparkContext = rddSparkContext,

diff --git a/spark-extension/src/main/scala/org/apache/spark/sql/auron/Shims.scala b/spark-extension/src/main/scala/org/apache/spark/sql/auron/Shims.scala
@@ -139,6 +139,8 @@ abstract class Shims {
       offset: Int,
       child: SparkPlan): NativeCollectLimitBase
 
+  def createNativeCoalesceExec(numPartitions: Int, child: SparkPlan): NativeCoalesceBase
+
   def createNativeParquetInsertIntoHiveTableExec(
       cmd: InsertIntoHiveTable,
       child: SparkPlan): NativeParquetInsertIntoHiveTableBase