Bootstrap

java spark 消费kafka_SparkStreaming消费kafka数据

概要:本例子为SparkStreaming消费kafka消息的例子,实现的功能是将数据实时的进行抽取、过滤、转换,然后存储到HDFS中。

实例代码

package com.fwmagic.test

import com.alibaba.fastjson.{JSON, JSONException}

import org.apache.kafka.common.serialization.StringDeserializer

import org.apache.spark.SparkConf

import org.apache.spark.sql.{SaveMode, SparkSession}

import org.apache.spark.streaming.kafka010._

import org.apache.spark.streaming.{Seconds, StreamingContext}

import org.slf4j.LoggerFactory

/**

* created by fwmagic

*/

object RealtimeEtl {

private val logger = LoggerFactory.getLogger(PVUV.getClass)

def main(args: Array[String]): Unit = {

System.setProperty("HADOOP_USER_NAME", "hadoop")

val conf = new SparkConf().setAppName("RealtimeEtl").setMaster("local[*]")

val spark &

;