如何解决在 Spark Sql 中透视多列和多行
我有以下数据。
我必须在 Spark Sql 中旋转才能得到以下结果
尝试了很多方法,在 AWS Glue 中执行起来并不复杂。 任何建议都会有所帮助。
以下是尝试过的,
(select * from category_data
unpivot
(
categoreval
for a in (e1,e2)
));```
解决方法
import org.apache.spark.sql.{DataFrame,SparkSession}
import org.apache.spark.sql.functions._
object ColumnToRowDataframe extends App {
val spark = SparkSession.builder
.master("local[*]")
.appName("Task_1")
.getOrCreate()
import spark.implicits._
val df1 = Seq(
("Michel","01-08-1999","NY"),("Joel","02-08-1999","WT")
).toDF("Name","Dob","Place")
df1.show(false)
// +------+----------+-----+
// |Name |Dob |Place|
// +------+----------+-----+
// |Michel|01-08-1999|NY |
// |Joel |02-08-1999|WT |
// +------+----------+-----+
val resDF = TransposeDF(df1,Seq("Dob","Place"),"Name")
.withColumnRenamed("Name","Val")
resDF.show(false)
// +-----+----------+----------+
// |Val |Joel |Michel |
// +-----+----------+----------+
// |Place|WT |NY |
// |Dob |02-08-1999|01-08-1999|
// +-----+----------+----------+
// https://nikhil-suthar-bigdata.medium.com/how-to-transpose-spark-dataframe-fa82c079a6b
def TransposeDF(df: DataFrame,columns: Seq[String],pivotCol: String): DataFrame = {
val columnsValue = columns.map(x => "'" + x + "'," + x)
val stackCols = columnsValue.mkString(",")
val df_1 = df.selectExpr(pivotCol,"stack(" + columns.size + "," + stackCols + ")")
.select(pivotCol,"col0","col1")
val final_df = df_1.groupBy(col("col0")).pivot(pivotCol).agg(concat_ws("",collect_list(col("col1"))))
.withColumnRenamed("col0",pivotCol)
final_df
}
}