Bootstrap

logstash使用心得

logstash使用心得

部署:

  • 下载logstash-7.2.0.tar.gz包
  • 解压
  • 在解压出来的文件夹里面的bin里面./logstash即可运行,这样运行的话,它是读取的默认配置文件config/logstash-sample.conf

建议使用方式:

  • 部署完成后,自己模仿logstash-sample.conf里面,新写一个配置文件,然后使用./logstash -f logstash.conf运行,比如我自己就写了一个启动脚本:
cd /home/elk/logstash/logstash-7.2.0/bin
pwd
./logstash -f /home/elk/logstash/logstash-7.2.0/config/logstash.conf
  • 在写conf文件的时候,往往需要多次调试,每次去重启logstash是很浪费时间的,这时候可以修改config文件夹下的logstash.yml,加入一行配置:
#让logstash持续运行,当配置文件有变化的时候,自动重载
#logstash.yml配置文件中可能已经有这行配置了,只是被注释掉。所以认真找一找,直接开起来也可以的
config.reload.automatic: true

conf文件配置心得

整合


input{
  jdbc {
    type => "fjjkm_healthcode_user_code_change_log"
    jdbc_connection_string => "jdbc:oracle:thin:@//110.90.117.14:36018/orcl"
    jdbc_user => "jkm"
    jdbc_password => "1"
    connection_retry_attempts => "1"
    jdbc_driver_library => "/home/elk/logstash/logstash-7.2.0/lib/ojdbc8-19.3.0.0.jar"
    jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
    statement => " select name,sync_change_time,id,cert_type,origin_health_status,new_health_status,origin_rule_code,new_rule_code,origin_tab_code,new_tab_code,cert_no,create_time from healthcode_user_code_change_log where sync_change_time>to_date(to_char(:sql_last_value,'yyyy-mm-dd hh24:mi:ss'),'yyyy-mm-dd hh24:mi:ss') and rownum<11"
    #仅同步大于当前时间的,建议间隔10分钟
    jdbc_paging_enabled => "false"
    #jdbc_page_size => "10"
    schedule => "* * * * *"
    clean_run => false
    record_last_run => true
    use_column_value => true
    tracking_column=>"sync_change_time"
    tracking_column_type => "timestamp"
    last_run_metadata_path => "/home/elk/logstash/logstash-7.2.0/config/sql_last_value_file/oracle.yml"
  }
  #张贴码申请队列
    kafka {
        type => "fjjkm_scene_record_save"
        bootstrap_servers => "127.0.0.1:9092"
        topics => ["scene_record_save"]
        group_id => "logstash_kafka"
        client_id => "scene_record_save"
        decorate_events => true
    }
    #张贴码扫码日志队列
    kafka {
        type => "fjjkm_scene_scan_log_save_topic"
        bootstrap_servers => "127.0.0.1:9092"
        topics => ["scene_scan_log_save_topic"]
        group_id => "logstash_kafka"
        client_id => "scene_scan_log_save_topic"
        decorate_events => true
    }
    #亮码日志队列
    kafka {
        type => "fjjkm_qrcode_engine_show_topic"
        bootstrap_servers => "127.0.0.1:9092"
        topics => ["qrcode_engine_show_topic"]
        group_id => "logstash_kafka"
        client_id => "qrcode_engine_show_topic"
        decorate_events => true
    }
}

filter{
  if[type] == "fjjkm_qrcode_engine_show_topic" {
        #新增一个字段,叫做timestamp,并将现在时间+8小时发给ES,因为我们这边时区+8
        ruby {
            code => "event.set('timestamp', event.get('@timestamp').time.localtime + 8*60*60)"
        }
        #将message里面的内容提取出来,变成字段
        json {
            source => "message"
        }
        #提取cid的前6位, 赋值给countyCode字段,作为县区代码
        grok {
            match => {
                "cid" => "(?<countyCode>(.{6}))"
            }
        }
        #提取cid的前4位, 赋值给cityCode字段,作为市区代码
        grok {
            match => {
                "cid" => "(?<cityCode>(.{4}))"
            }
        }
        #提取cid的前2位, 赋值给province字段,作为省代码
        grok {
            match => {
                "cid" => "(?<provinceCode>(.{2}))"
            }
        }
        #提取cid的第17位, 赋值给sex字段, 作为性别判断
        grok {
            match => {
                "cid" => "(?<sex>(((?<=.{16})(.{1}))))"
            }
        }
        #提取cid的第7到第14位,作为出生年月
        grok {
            match => {
                "cid" => "(?<birthday>((?<=.{6})(.{8})))"
            }
        }
        mutate {
            #去掉message里面的特殊字符, 转化成数组, 以便后续使用
            gsub => [
                    "message", '\\"', '"',
                    "message", '"{', '{',
                    "message", '}"', '}'
            ]
            #添加date字段, 作为日期, 精确到天
            add_field => {
                "date" => "%{[applyTime][date][year]}-%{[applyTime][date][month]}-%{[applyTime][date][day]}"
            }
            #添加time字段, 作为时间, 只精确到小时
            add_field => {
                "time" => "%{[applyTime][time][hour]}"
            }
            #type字段在input的时候会指定,需要参与logstash后续的逻辑判断,但由于我们的源数据也有这个字段,将会覆盖type的值,影响后续逻辑判断。
            #所以这里的处理方式是:将数据type写入到type2字段,并将type字段重置为logstash的逻辑值,也就是input里面的type的值
            add_field => {
                "apply_type" => "%{[type]}"
            }
            remove_field => ["type"]
            #删除掉原始message字段, 避免再存一次, 浪费空间
            remove_field => ["message", "applyTime", "validTime"]
        }

        mutate {
            add_field => {
                "type" => "fjjkm_qrcode_engine_show_topic"
            }
        }
    }
    if [type] == "fjjkm_scene_record_save" {
        ruby {
            code => "event.set('timestamp', event.get('@timestamp').time.localtime + 8*60*60)"
        }
        json {
            source => "message"
        }
        #提取createTime的前10位作为扫码日期,用于扫码日期统计
        grok {
            match => {
                "createTime" => "(?<createDate>((.{10})))"
            }
        }
        #提取createTime的第12 - 13 位作为小时,用于扫码时间段统计(需要配合日期)
        grok {
            match => {
                "createTime" => "(?<hour>((?<=.{11})(.{2})))"
            }
        }
        #将原始message删掉, 避免浪费空间
        mutate {
            remove_field => ["message"]
        }
    }

    if [type] == "fjjkm_scene_scan_log_save_topic" {
        ruby {
            code => "event.set('timestamp', event.get('@timestamp').time.localtime + 8*60*60)"
        }
        json {
            source => "message"
        }
        mutate {
            remove_field => ["message"]
        }
    }
  if[type]=="fjjkm_healthcode_user_code_change_log"{
    ruby {
      code => "event.set('timestamp', event.get('@timestamp').time.localtime)"
    }
    #提取cert_no的前6位,赋值给countyCode字段,用于分县区统计
    grok {
      match => {
        "cert_no" => "(?<idcard>(.{6}))"
      }
    }
    #提取cert_no的前4位,赋值给cityCode字段,用于分市区统计
    grok {
      match => {
        "cert_no" => "(?<idcard>(.{4}))"
      }
    }
    #提取cert_no的前2位,赋值给provinceCode字段,用于分省统计
    grok {
      match => {
        "cert_no" => "(?<idcard>(.{2}))"
      }
    }
    #提取cert_no的第17位,赋值给sex字段,作为性别判断
    grok {
      match => {
        "cert_no" => "(?<sex>(((?<=.{16})(.{1}))))"
      }
    }
    #提取cert_no的第7到第14位,赋值给birthday字段,用于年龄统计
    grok {
      match => {
        "cert_no" => "(?<birthday>((?<=.{6})(.{8})))"
      }
    }
  }
}

output{
  if[type]=="fjjkm_healthcode_user_code_change_log"{
    elasticsearch {
        hosts => ["http://192.168.3.52:9200"]
        index =>"fjjkm_healthcode_user_code_change_log_%{+yyyyMM}"
        user => "elastic"
        password => "espassword"
        #设置数据的id为数据库中的字段
        document_id => "%{id}"
    }
  }
  if [type] == "fjjkm_scene_record_save" {
        elasticsearch {
            hosts => ["http://192.168.3.52:9200"]
            index => "fjjkm_scene_record_%{+yyyyMM}"
            user => "elastic"
            password => "espassword"
            document_id => "%{timestamp}%{placeManagerCertCypher}"
        }
    }
    if [type] == "fjjkm_scene_scan_log_save_topic" {
        elasticsearch {
            hosts => ["http://192.168.3.52:9200"]
            index => "fjjkm_scene_scan_log_%{+yyyyMM}"
            user => "elastic"
            password => "espassword"
            document_id => "%{timestamp}%{scanerCertNoCypher}"
        }
    }
    if[type] == "fjjkm_qrcode_engine_show_topic" {
        elasticsearch {
            hosts => ["http://192.168.3.52:9200"]
            index => "fjjkm_t_hc_create_log_%{+yyyyMM}"
            user => "elastic"
            password => "espassword"
            #写入ES的主键,因为json中没有能够唯一标识的字段,所以采用时间戳+cid的方式来保证每条数据唯一
            document_id => "%{timestamp}%{cid}"
        }
    }
}

;