官方列举的常用Action算子。
Scala代码
package com.itechthink.rdd
import org.apache.spark.{SparkConf, SparkContext}
/**
* Scala中常用的Action算子
*
* reduce: 聚合计算
* collect: 获取元素集合
* take(n): 获取前n个元素
* count: 获取元素总数
* saveAsTextFile: 保存文件
* countByKey: 统计相同的key出现多少次
*/
object ScalaActionOperator {
private def getSparkContext: SparkContext = {
val conf = new SparkConf()
// 设置应用程序名称
.setAppName("ScalaActionOperator")
// 设置Spark运行模式
.setMaster("local")
new SparkContext(conf)
}
def main(args: Array[String]): Unit = {
// 1. 先创建SparkContext
val sc = getSparkContext
// reduce:聚合计算
//reduceOperator(sc);
// collect:获取元素集合
//collectOperator(sc);
// take(n):获取前n个元素
//takeOperator(sc);
// count:获取元素总数
//countOperator(sc);
// saveAsTextFile:保存文件
//saveAsTextFileOperator(sc);
// countByKey:统计相同的key出现多少次
//countByKeyOperator(sc);
// 2. 关闭SparkContext
sc.stop()
}
private def countByKeyOperator(sc: SparkContext): Unit = {
val rdd = sc.parallelize(Array(("a", 1), ("b", 2), ("a", 3), ("b", 4)))
println(rdd.countByKey())
}
private def saveAsTextFileOperator(sc: SparkContext): Unit = {
val rdd = sc.parallelize(1 to 10)
rdd.saveAsTextFile("/Users/bear/Downloads/result")
}
private def countOperator(sc: SparkContext): Unit = {
val rdd = sc.parallelize(1 to 10)
println(rdd.count())
}
private def takeOperator(sc: SparkContext): Unit = {
val rdd = sc.parallelize(1 to 10)
println(rdd.take(5).mkString(","))
}
private def collectOperator(sc: SparkContext): Unit = {
val rdd = sc.parallelize(1 to 10)
println(rdd.collect().mkString(","))
}
private def reduceOperator(sc: SparkContext): Unit = {
val rdd = sc.parallelize(1 to 10)
println(rdd.reduce(_ + _))
}
}
原创大约 2 分钟