Bootstrap

Spark的kryo性能测试以及RDD持久化级别

MEMORY_ONLY

代码如下

package com.yxw.Test

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}

object KryoTest000 {
  def main(args: Array[String]): Unit = {

    //定义输入输出路径
    val inputpath = new Path(args(0)) //file:///E:/BaiduNetdiskDownload/cleaned.log
    val outputpath = new Path(args(1)) //file:///E:/BaiduNetdiskDownload/outputpath

    //连接hdfs
    val fsConf = new Configuration()
    val fs = FileSystem.get(fsConf)

    //路径存在就删除
    if (fs.exists(outputpath)){
      fs.delete(outputpath,true)
      val path = args(1).toString
      println(s"已经删除存在的路径 $path")
    }

    //创建sparkcontext
    val conf = new SparkConf().setAppName("KryoTest000APP").setMaster("local[4]")
    val sc = new SparkContext(conf)
    //得到文件 创建RDD
    val files = sc.textFile(args(0))

   // files.foreach(p
;