MEMORY_ONLY
代码如下
package com.yxw.Test
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.{SparkConf, SparkContext}
object KryoTest000 {
def main(args: Array[String]): Unit = {
//定义输入输出路径
val inputpath = new Path(args(0)) //file:///E:/BaiduNetdiskDownload/cleaned.log
val outputpath = new Path(args(1)) //file:///E:/BaiduNetdiskDownload/outputpath
//连接hdfs
val fsConf = new Configuration()
val fs = FileSystem.get(fsConf)
//路径存在就删除
if (fs.exists(outputpath)){
fs.delete(outputpath,true)
val path = args(1).toString
println(s"已经删除存在的路径 $path")
}
//创建sparkcontext
val conf = new SparkConf().setAppName("KryoTest000APP").setMaster("local[4]")
val sc = new SparkContext(conf)
//得到文件 创建RDD
val files = sc.textFile(args(0))
// files.foreach(p