Chapter 4 Introduction to ELK

1. Data backup and recovery

1. Install npm tools

1) Download the upload package

#Download address: http://nodejs.cn/download/

[root@es01 ~]# rz
[root@es01 ~]# ll
-rw-r--r--  1 root root  21609252 2020-12-02 17:28 node-v14.15.1-linux-x64.tar.xz

2) Unzip

[root@es01 ~]# tar xf node-v14.15.1-linux-x64.tar.xz
[root@es01 ~]# mv node-v14.15.1-linux-x64 node

3) Configure environment variables

[root@es01 ~]# vim /etc/profile.d/npm.sh 
export PATH=/root/node/bin:$PATH

[root@es01 ~]# source /etc/profile

4) Update domestic sources

[root@es01 ~]# npm config set registry http://registry.npm.taobao.org/

5) Install the backup tool

[root@es01 ~]# npm install elasticdump -g

2. Backup tool

1) Backup parameters

#What we need to master
--input: source file or address
--output: target file or address
--type: Backup Content Type ( settings, analyzer, data, mapping, alias, template)

2) Backup to another ES node

elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=http://staging.es.com:9200/test \
  --type=analyzer
  
elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=http://staging.es.com:9200/test \
  --type=mapping
  
elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=http://staging.es.com:9200/test \
  --type=data

3) Backup data into json file

elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=/data/test_mapping.json \
  --type=mapping
  
elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=/data/test_data.json \
  --type=data
  
elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=/data/test_alias.json \
  --type=alias
  
elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=/data/test_template.json \
  --type=template

elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=/data/test_analyzer.json \
  --type=analyzer

4) Backup as a compressed file

#When the file is exported not for use, but only for preservation, it can be compressed
elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=$ | gzip > /data/test_data.json.gz

5) Backup data of specified conditions

elasticdump \
  --input=http://10.0.0.91:9200/test \
  --output=/data/test_query.json \
  --searchBody='{"query":{"term":{"name": "lhd"}}}'

4. Import command

elasticdump \
  --input=/data/test_alias.json \
  --output=http://10.0.0.91:9200/test \
  --type=alias

elasticdump \
  --input=/data/test_analyzer.json \
  --output=http://10.0.0.91:9200/test \
  --type=analyzer
  
elasticdump \
  --input=/data/test_data.json \
  --output=http://10.0.0.91:9200/test \
  --type=data
  
elasticdump \
  --input=/data/test_template.json \
  --output=http://10.0.0.91:9200/test \
  --type=template
  
elasticdump \
  --input=/data/test_mapping.json \
  --output=http://10.0.0.91:9200/test \
  --type=mapping
  
#Note: When restoring, if the same data already exists, the original data will be overwritten. If the data does not exist, it will have no effect.

5. Backup script

#!/bin/bash
echo 'The machines to be backed up are:'${1}
index_name='
test_2020-11-30
student
linux7
'
for index in `echo $index_name`
do
	echo "start input index ${index}"
	elasticdump --input=http://${1}:9200/${index} --output=/data/${index}_alias.json --type=alias &> /dev/null
	elasticdump --input=http://${1}:9200/${index} --output=/data/${index}_analyzer.json --type=analyzer &> /dev/null
	elasticdump --input=http://${1}:9200/${index} --output=/data/${index}_data.json --type=data &> /dev/null
	elasticdump --input=http://${1}:9200/${index} --output=/data/${index}_alias.json --type=alias &> /dev/null
	elasticdump --input=http://${1}:9200/${index} --output=/data/${index}_template.json --type=template &> /dev/null
done

6. Import data script

#!/bin/bash
echo 'The machines to import are:'${1}
index_name='
test
student
linux7
'
for index in `echo $index_name`
do
    echo "start input index ${index}"
    elasticdump --input=/data/${index}_alias.json --output=http://${1}:9200/${index} --type=alias &> /dev/null
    elasticdump --input=/data/${index}_analyzer.json --output=http://${1}:9200/${index} --type=analyzer &> /dev/null
    elasticdump --input=/data/${index}_data.json --output=http://${1}:9200/${index} --type=data &> /dev/null
    elasticdump --input=/data/${index}_template.json --output=http://${1}:9200/${index} --type=template &> /dev/null
done

2. Chinese tokenizer

https://github.com/medcl/elasticsearch-analysis-ik/

1. Insert test data

POST /index/text/1
{"content":"Is what the US left to Iraq a mess?"}
POST /index/text/2
{"content":"Ministry of Public Security: School buses across the country will enjoy the highest right of way"}
POST /index/text/3
{"content":"Investigation on the conflict between Chinese and South Korean fishermen: South Korean police detain an average of 1 Chinese fishing boat every day"}
POST /index/text/4
{"content":"Asian man shot at Chinese consulate in Los Angeles, suspect turns himself in"}

2. Detection data

POST /index/_search
{
  "query" : { "match" : { "content" : "China" }},
  "highlight" : {
      "pre_tags" : ["<tag1>", "<tag2>"],
      "post_tags" : ["</tag1>", "</tag2>"],
      "fields" : {
          "content" : {}
      }
  }
}

#There is a problem with word segmentation when querying, and China is split into two words for indexing

3. Configure Chinese tokenizer

1) Upload the installation of the plugin (all machines in the cluster execute it)

[root@es01 ~]# rz
[root@es01 ~]# ll
-rw-r--r--  1 root root   4504556 2020-05-19 00:22 elasticsearch-analysis-ik-6.6.0.zip

2) Unzip

[root@es01 ~]# mkdir /usr/share/elasticsearch/plugins/ik -p
[root@es01 ~]# unzip elasticsearch-analysis-ik-6.6.0.zip -d /usr/share/elasticsearch/plugins/ik

3) Edit the configuration file

[root@es03 ~]# vim /usr/share/elasticsearch/plugins/ik/config/IKAnalyzer.cfg.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
    <comment>IK Analyzer Extended configuration</comment>
    <!--Users can configure their own extension dictionary here -->
    <entry key="ext_dict">/etc/elasticsearch/config/my.dic</entry>
        <!--Users can configure their own extended stop word dictionary here-->
        <entry key="ext_stopwords"></entry>
        <!--User can configure remote extension dictionary here -->
        <!-- <entry key="remote_ext_dict">words_location</entry> -->
        <!--User can configure remote extension stop word dictionary here-->
        <!-- <entry key="remote_ext_stopwords">words_location</entry> -->
</properties>

4) Edit the word segmentation file

[root@es03 ~]# cat /etc/elasticsearch/config/my.dic
 China

5) Restart the service

[root@es01 ~]# systemctl restart elasticsearch.service

6) Reinsert the data

1.index
PUT /news

3.create mapping
POST /news/text/_mapping
{
	"properties": {
		"content": {
			"type": "text",
			"analyzer": "ik_max_word",
			"search_analyzer": "ik_smart"
		}
	}
}

3.insert data
POST /news/text/1
{"content":"Is what the US left to Iraq a mess?"}
POST /news/text/2
{"content":"Ministry of Public Security: School buses across the country will enjoy the highest right of way"}
POST /news/text/3
{"content":"Investigation on the conflict between Chinese and South Korean fishermen: South Korean police detain an average of 1 Chinese fishing boat every day"}
POST /news/text/4
{"content":"Asian man shot at Chinese consulate in Los Angeles, suspect turns himself in"}

7) Query the keyword again

POST /news/_search
{
	"query" : { "match" : { "content" : "China" }},
	"highlight" : {
		"pre_tags" : ["<tag1>", "<tag2>"],
		"post_tags" : ["</tag1>", "</tag2>"],
		"fields" : {
			"content" : {}
		}
	}
}

#correct participle

3. Introduction to ELK

1. What is ELK

ELK consists of three software
E: elasticsearch		#java program storage, query log
L: logstash			#java program to collect and filter logs
K: kibana			#java program display, data page

F: filebeat			#go language collection, filter logs

2. The role of ELK

1.Collect: Collect logs from all servers
2.Transmission: stable transmission of logs to ES or message queue
3.storage: ES Can store data efficiently
4.Analysis: Pass web Pages and plots for analysis
5.Monitoring: Monitor the cluster architecture

3.ELK advantages

1.Flexibility in handling data
2.Simple configuration
3.High performance of query data
4.Easy cluster expansion
5.The page is intuitive and beautiful

4. Why use ELK

1.web log collection
2.Business log collection
3.System log collection
4.Analyze the above logs

#In the company, statistical analysis of data
1.User traffic statistics
2.top ten visited IP
3.most visited site URL
4.Query three values ​​above one morning 8:10-12:30
5.Query for three values ​​above one pm 13:30-17:30
6.Compare the above data
7.Compare the data for each day of the week

#If there is ELK above information, it is easy to query

Fourth, logstash introduction

1. Build logstash

1) Install the java environment

[root@web01 ~]# yum localinstall -y jdk-8u181-linux-x64.rpm

2) Time synchronization

[root@web01 ~]# ntpdate time1.aliyun.com

3) Install logstash

[root@web01 ~]# rz
[root@web01 ~]# yum localinstall -y logstash-6.6.0.rpm

4) Authorization

[root@web01 ~]# ll /usr/share/logstash/
[root@web01 ~]# chown -R logstash.logstash /usr/share/logstash/

#startup file
[root@web01 ~]# ll /usr/share/logstash/bin/logstash
-rwxr-xr-x 1 logstash logstash 2354 Jan 24  2019 /usr/share/logstash/bin/logstash

2.logstash plugin

INPUT: Make Logstash Ability to read specific event sources.
OUTPUT: send event data to a specific destination, OUTPUT is the last stage in the event pipeline.
INPUT supports event sourcing OUTPUT supports output sources CODEC codec supports encoding
azure_event_hubs (Microsoft Cloud Event Hub) elasticsearch (search engine database) avro (data serialization)
beats(filebeat log collection tool) email (mail) CEF (Embedded Framework)
elasticsearch (search engine database) file (file) es_bulk (bulk api in ES)
file (file) http (Hypertext Transfer Protocol) Json (data serialization, formatting)
generator (generator) kafka (java-based message queue) Json_lines (easy to store structured)
heartbeat (high availability software) rabbitmq (message queue OpenStack) line (line)
http_poller(http api) redis (cache, message queue, NoSQL) multiline (multiline matching)
jdbc(java driver for connecting to the database) s3* (storage) plain (plain text, no space between events)
kafka (java-based message queue) stdout (standard output) rubydebug(ruby syntax format)
rabbitmq (message queue OpenStack) tcp (Transmission Control Protocol)
redis (cache, message queue, NoSQL) udp (User Datagram Protocol)
s3* (storage)
stdin (standard input)
syslog (system log)
tcp (Transmission Control Protocol)
udp (User Datagram Protocol)

3.logstash input and output plugin test

1) Configure environment variables

[root@web01 ~]# vim /etc/profile.d/logstash.sh
export PATH=/usr/share/logstash/bin/:$PATH

2) Collect standard input to standard output

[root@web01 ~]# logstash -e 'input { stdin {} } output { stdout {} }'

34567890
{
	   #Content collected
       "message" => "34567890",
    #timestamp
    "@timestamp" => 2020-12-03T09:27:18.886Z,
    	  #The host from which the data was collected
          "host" => "web01",
      #collected version
      "@version" => "1"
}

3) Collect standard input to standard output in the specified format

[root@web01 ~]# logstash -e 'input { stdin {} } output { stdout { codec => rubydebug } }'

123
{
       "message" => "123",
      "@version" => "1",
    "@timestamp" => 2020-12-03T09:33:40.563Z,
          "host" => "web01"
}

4) Collect stdin to file

[root@web01 ~]# logstash -e 'input { stdin {} } output { file { path => "/tmp/1.txt" } }'
123
[INFO ] 2020-12-03 17:40:50.731 [[main]>worker0] file - Opening file {:path=>"/tmp/1.txt"}
234
345

#Verify file write
[root@web01 ~]# tail -f /tmp/1.txt 
{"message":"123","@timestamp":"2020-12-03T09:40:50.333Z","host":"web01","@version":"1"}
{"message":"234","@timestamp":"2020-12-03T09:41:27.302Z","host":"web01","@version":"1"}
{"message":"345","@timestamp":"2020-12-03T09:41:45.527Z","host":"web01","@version":"1"}

5) Collect standard input to ES

[root@web01 ~]# logstash -e 'input { stdin {} } output { elasticsearch { hosts => ["10.0.0.71:9200"] index => "test" } }'

#Just enter something after startup and go to ES to view

[root@web01 ~]# logstash -e 'input { stdin {} } output { elasticsearch { hosts => ["10.0.0.71:9200"] index => "test_%{+YYYY-MM-dd}" } }'

Tags: ELK

Posted by xdracox on Tue, 03 May 2022 03:32:24 +0300