Bootstrap

flink学习(3)——方法的使用—对流的处理(map,flatMap,filter)

map

数据

86.149.9.216 10001 17/05/2015:10:05:30 GET /presentations/logstash-monitorama-2013/images/github-contributions.png
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:06:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:07:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:08:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:09:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:10:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:16:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:26:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
package com.bigdata.day02;

import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * @基本功能:
 * @program:flinkProject
 * @author: jinnian
 * @create:2024-11-21 16:26:15
 **/
public class _01_map {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        DataStreamSource<String> dataStreamSource = env.readTextFile("datas/a.log");
        //3. transformation-数据处理转换
        dataStreamSource.map(new MapFunction<String, String>() {
            @Override
            public String map(String s) throws Exception {
                String[] line = s.split("\\s+");
                LogBean logBean = new LogBean();
                logBean.setIp(line[0]);
                logBean.setUserId(Integer.parseInt(line[1]));
                logBean.setMethod(line[3]);
                
                // 解析时间的方式
                
                // 17/05/2015:10:05:30
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
                Date date = simpleDateFormat.parse(line[2]);
                
                // 另一种方法
                Date date1 = DateUtils.parseDate(line[2],"dd/MM/yyyy:hh:mm:ss");
                
                logBean.setTimestamp(date1.getTime());
                logBean.setPath(line[4]);

                return JSONObject.toJSONString(logBean);
            }
        }).print();
        //4. sink-数据输出


        //5. execute-执行
        env.execute();
    }
}

flatMap案例

张三,苹果手机,联想电脑,华为平板
李四,华为手机,苹果电脑,小米平板
package com.bigdata.day02;

public class _02_flatmap {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        DataStreamSource<String> dataStreamSource = env.readTextFile("datas/flatmap.log");
        //3. transformation-数据处理转换
        dataStreamSource.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public void flatMap(String s, Collector<String> collector) throws Exception {
                String[] split = s.split(",");
                for (int i = 1; i < split.length; i++) {
                    collector.collect(split[0]+"有"+split[i]);
                }
            }
        }).print();
        //4. sink-数据输出


        //5. execute-执行
        env.execute();
    }
}

filter案例

package com.bigdata.day02;

public class _03_filter {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        DataStreamSource<String> dataStreamSource = env.readTextFile("datas/a.log");
        //3. transformation-数据处理转换
        dataStreamSource.map(new MapFunction<String, String>() {
            @Override
            public String map(String s) throws Exception {
                String[] line = s.split("\\s+");
                LogBean logBean = new LogBean();
                logBean.setIp(line[0]);
                logBean.setUserId(Integer.parseInt(line[1]));
                logBean.setMethod(line[3]);
                // 17/05/2015:10:05:30
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
                Date date = simpleDateFormat.parse(line[2]);
                // 另一种方法
                Date date1 = DateUtils.parseDate(line[2],"dd/MM/yyyy:hh:mm:ss");
                logBean.setTimestamp(date1.getTime());
                logBean.setPath(line[4]);

                return JSONObject.toJSONString(logBean);
            }
        }).filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String s) throws Exception {

                return s.contains("83.149.9.216");
            }
        }).print();
        //4. sink-数据输出

        //5. execute-执行
        env.execute();
    }
}

;