- KafkaUtils.createDstream方式(基于kafka高级Api—–偏移量由zk保存)
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}
import org.apache.spark.streaming.kafka.KafkaUtils
import scala.collection.immutable
object SparkStreamingKafka_Receiver {
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = new SparkConf()
.setAppName("SparkStreamingKafka_Receiver")
.setMaster("local[4]")
.set("spark.streaming.receiver.writeAheadLog.enable","true")
val sc = new SparkContext(sparkConf)
sc.setLogLevel("WARN")
val ssc = new StreamingContext(sc,Seconds(5))
ssc.checkpoint("./Kafka_Receiver")
val zkQuorum="node1:2181,node2:2181,node3:2181"
val groupId="spark_receiver"
val topics=Map("spark_topic1" -> 2)
val receiverDstream: immutable.IndexedSeq[ReceiverInputDStream[(String, String)]] = (1 to 3).map(x => {
val stream: ReceiverInputDStream[(String, String)] = KafkaUtils.createStream(ssc, zkQuorum, groupId, topics)
stream
}
)
val unionDStream: DStream[(String, String)] = ssc.union(receiverDstream)
val topicData: DStream[String] = unionDStream.map(_._2)
val wordAndOne: DStream[(String, Int)] = topicData.flatMap(_.split(" ")).map((_,1))
val result: DStream[(String, Int)] = wordAndOne.reduceByKey(_+_)
result.print()
ssc.start()
ssc.awaitTermination()
}
}
- KafkaUtils.createDirectStream方式(基于kafka低级Api—–偏移量由客户端程序保存)
import kafka.serializer.StringDecoder
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka.KafkaUtils
object SparkStreamingKafka_Direct {
def main(args: Array[String]): Unit = {
val sparkConf: SparkConf = new SparkConf()
.setAppName("SparkStreamingKafka_Direct")
.setMaster("local[2]")
val sc = new SparkContext(sparkConf)
sc.setLogLevel("WARN")
val ssc = new StreamingContext(sc,Seconds(5))
val kafkaParams=Map("metadata.broker.list"->"node1:9092,node2:9092,node3:9092","group.id"->"Kafka_Direct")
val topics=Set("spark_topic1")
val dstream: InputDStream[(String, String)] = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder](ssc,kafkaParams,topics)
val topicData: DStream[String] = dstream.map(_._2)
val wordAndOne: DStream[(String, Int)] = topicData.flatMap(_.split(" ")).map((_,1))
val result: DStream[(String, Int)] = wordAndOne.reduceByKey(_+_)
result.print()
ssc.start()
ssc.awaitTermination()
}
}
- KafkaUtils.createDirectStream方式sparkStreaming对接kafka实现单词计数—-采用Direct(低级API通过checkPoint恢复StreamingContext)
import kafka.serializer.StringDecoder
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka.KafkaUtils
object SparkStreamingKafka_checkpoint {
def createFunc(checkPointPath: String):StreamingContext = {
val sparkConf: SparkConf = new SparkConf()
.setAppName("SparkStreamingKafka_checkpoint")
.setMaster("local[2]")
val sc = new SparkContext(sparkConf)
sc.setLogLevel("WARN")
val ssc = new StreamingContext(sc,Seconds(5))
ssc.checkpoint(checkPointPath)
val kafkaParams=Map("metadata.broker.list"->"node1:9092,node2:9092,node3:9092","group.id"->"Kafka_Direct")
val topics=Set("spark_topic1")
val dstream: InputDStream[(String, String)] = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder](ssc,kafkaParams,topics)
val topicData: DStream[String] = dstream.map(_._2)
val wordAndOne: DStream[(String, Int)] = topicData.flatMap(_.split(" ")).map((_,1))
val result: DStream[(String, Int)] = wordAndOne.reduceByKey(_+_)
result.checkpoint(Seconds(5))
result.print()
ssc
}
def main(args: Array[String]): Unit = {
val checkPointPath="./kafka_checkpoint"
val ssc: StreamingContext = StreamingContext.getOrCreate(checkPointPath, () => {
createFunc(checkPointPath)
})
ssc.start()
ssc.awaitTermination()
}
}