A concise tutorial on elasticsearch

Installation configuration

Install

  • Download from the official website www.elastic.co/downloads to install, download elasticsearch-6.8.1.deb here.
  • Also download ik tokenizer (elasticsearch-analysis-ik-6.8.1.zip) and kibana-6.8.1-amd64.deb.
  • Pay attention to the version of the tokenizer, the version of kibana is the same as the version of elasticsearch.

Configure elasticsearch

  • Modify the elasticsearch configuration file ubuntu in /etc/default/elasticsearch
    centos is in /etc/sysconfig/elasticsearch, configure JAVA_HOME:
      JAVA_HOME=/opt/xxx/jdk
  • Modify the /etc/elasticsearch/jvm.options file:
      #elasticsearch uses all the local memory by default
      -Xms2g
      -Xmx2g
  • Modify the /etc/elasticsearch/elasticsearch.yml file
      cluster.name: elmode # cluster name
      node.name: node-1 #node name
      network.host: 0.0.0.0 #Configure ip, 4 0 means can be accessed by all addresses
      port:9200 #port, default 9200
      discovery.zen.ping.unicast.hosts: ["test"] #CPU name
      #Disable plugins
      bootstrap.memory_lock: false
      bootstrap.system_call_filter: false
  • Unzip and copy the tokenizer to the /usr/share/elasticsearch/plugins directory.
  • start elasticsearch
  sudo systemctl start elasticsearch.service
  • kibana configuration (/etc/kibana.yml)
    server.host: "10.1.3.15" #configure ip
    elasticsearch.hosts: ["http://10.1.3.16:9200"] #Configure the elasticsearch node to be monitored
  • start kibana
sudo systemctl start kibana.service
  • access 127.0.0.1:5601
    Kibana starts slowly, if the page shows kibana service is not yet, you can wait for a while.
    After the startup is successful, the kibana page will appear, access the DevTools option on the left menu, and enter the dsl statement on the page
    Operate elasticsearch directly.

DSL common operations

index operation

    #query all indexes
    GET /_cat/indices

    #Create an index, test is the index name, and specify the number of shards and replicas
    PUT /test  
    {
	"settings": {
	    "number_of_shards": 5,
	    "number_of_replicas": 1
	}
    }

    #query index
    GET /test

    #drop index
    DELETE /test

map fields

    #Create a mapping field, goods is the type name, which is equivalent to a database table
    PUT /test/_mapping/goods
    {
	"properties": {
	    "title": {
		"type": "text",
		"analyzer": "ik_max_word" 
	    },
	    "images": {
		"type": "keyword", 
		"index": false
	    },
	    "price": {
		"type": "long"
	    }
	}
    }

    #query index
    GET /test/_mapping

document manipulation

    #Adding a document is equivalent to adding a database record, test is the index name, and goods is the type name.
    #10001 is a manually specified id, and it can be automatically generated if there is no specification.
    POST /test/goods/10001
    {
	"title": "Xiaomi mobile phone, born for fever",
	"image": "http://www.xxx.com/xxx.jpg",
	"price": 1999
    }

    #query document
    GET /test/_search

    #Specify query conditions
    GET /test/_search
    {
	"query": {
	    "match": {
		"title": "Millet"
	    }
	}
    }

    #Modify the document, modify the price with id 10001
    POST /test/goods/10001/_update
    {
	"doc": {
	    "price": 2399.9
	}
    }

    #To delete a document, specify lc9- as the document id
    DELETE /test/goods/lc9-dnQBt5qA1asqTRh7

Inquire

  • query all
GET /test/_search
  • id query
GET /test/_search/10001
  • match query
    #match all
    GET /test/_search
    {
	"query": {
	    "match_all": {}
	}
    }
    #match phrases
    GET /test/_search
    {
	"query": {
	    "match_phrase": {
		"title": "Huawei cell phone"
	    }
	}
    }
  • Other inquiries
    #Multi-field match
    GET /test/_search
    {
	"query": {
	    "multi_match": {
		"query": "Millet",
		"fields": ["title","image"]
	    }
	}
    }
    #Entry query, entry as a whole query, no word segmentation
    GET /test/_search
    {
	"query": {
	    "term": {
		"title": {
		    "value": "Huawei"
		}
	    }
	}
    }

    #Multiple entry query
    GET /test/_search
    {
	"query": {
	    "terms": {
		"title": [
		    "Huawei",
		    "cell phone"
		]
	    }
	}
    }

    #range query
    GET /test/_search
    {
	"query": {
	    "range": {
		"price": {
		    "gte": 1000,
		    "lte": 2000
		}
	    }
	}
    }
    #Fuzzy query, allows spelling errors, the error exceeds 2 characters, only in English.
    GET /test/_search
    {
	"query": {
	    "fuzzy": {
		"title": {
		    "value": "oppe"
		}
	    }
	}
    }
  • boolean query
#must intersection should union
GET /test/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "price": {
              "gte": 1999,
              "lte": 2999
            }
          }
        },{
          "range": {
            "price": {
              "gte": 2999,
              "lte": 3999
            }
          }
        }
      ]
    }
  }
}

filter

GET /test/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "match": {
            "title":  "cell phone"
          }
        }
      ],"filter": {
        "term": {
          "title": "oppo"
        }
      }
    }
  }
}

Sort, paginate, highlight, filter result set

  • The filter here is which fields need to be displayed
GET /test/_search
{
  "query": {
   "match": {
     "title": "cell phone"
   }
  },"sort": [
    {
      "price": {
        "order": "asc"
      }
    },{
      "_id":{
        "order": "desc"
      }
    }
  ],
  "from": 0,
  "size": 5,
  "highlight": {
    "fields":{"title":{}},
    "pre_tags": "<em>",
    "post_tags": "</em>"
  },
  "_source": ["title","price"]
}

Aggregate.Measure

GET /test/_search
{
  "size": 0, 
  "aggs": {
    "brand": {
      "terms": {
        "field": "attr.brand.keyword"
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

java operation elasticsearch

import package

  <dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
  </dependency>

Define the pojo class, and the mapping with the document

  //The mapping between entity classes and documents specifies the index, type, number of shards, and number of replicas respectively.
  @Document(indexName = "user", type = "info", shards = 3, replicas = 3)
  public class User {

      //specify id
      @Id
      private Long id;

      //Map field types and specify tokenizers
      @Field(type = FieldType.Text, analyzer = "ik_max_word")
      private String name;

      @Field(type = FieldType.Integer)
      private Integer age;

      @Field(type = FieldType.Keyword, index = false)
      private String password;
	
      public User() {}
	
      public User(Long id, String name, Integer age, String password) {
	  this.id = id;
	  this.name = name;
	  this.age = age;
	  this.password = password;
      }

      //Omit getter,setter...

  }

Indexing and document-related operations

  • The operation of the document defines the UserRepository interface, which is defined as follows:
public interface UserRepository extends ElasticsearchRepository<User, Long> {

}
  @SpringBootTest
  @RunWith(SpringRunner.class)
  public class ElasticSearchTest {

      @Autowired
      private ElasticsearchRestTemplate restTemplate;
	
      @Autowired
      private UserRepository userRepository;
	
      @Test
      public void elasticsearch() {
	  //create index
	  this.restTemplate.createIndex(User.class);
		
	  //Create a map
	  this.restTemplate.putMapping(User.class);
      }
	
      @Test
      public void repository() {
	  //Add a new document (equivalent to adding a new table record)
	  //this.userRepository.save(new User(10001L,"Zhang Tiantian",18,"123465"));
		
	  //Bulk add
	  List<User> users = Arrays.asList(
					   new User(10002L,"Zhao Zhong",21,"123456"),
					   new User(10003L,"a lot of money",23,"123456"),
					   new User(10004L,"Sun Xiangyi",22,"123456"),
					   new User(10005L,"Li Mingming",18,"123456"),
					   new User(10006L,"Zhou Tao",20,"123456"),
					   new User(10007L,"Wu Yuan",35,"123456")
					   );
	  this.userRepository.saveAll(users);
      }
  }

Inquire

  public interface UserRepository extends ElasticsearchRepository<User, Long> {

      //Customize the query, the method name can be used directly according to the template.
      List<User> findByAgeBetween(Integer age1, Integer age2);
      //Example of method name template for official document query:
      /##
       # 
       # findByName : name is the field name, query by name, if you query by other fields, you can change the field, such as: findByAge.
       # fintByNameNot : Invert by name query.
       # findByAgeLessThan: LessThan fixed writing, query results younger than the specified age
       # findByAgeGreateThan: Query results older than the specified age
       # findByAgeBetween: query age range
       # findByNameAndAge: Multiple field query
       # ....
       # For more templates, refer to the official documentation.
       # 
       #/
  }

Advanced Search

Use the DSL statement directly

  //The following query age is the union of age1-age2, age3-age4.
  //?0,?1,?2,?3 in the dsl statement are placeholders, which match the 4 parameters in the method respectively
  //When calling this method, you can pass in the age parameter.
  @Query("{\n" + 
	 "    \"bool\": {\n" + 
	 "      \"should\": [\n" + 
	 "        {\n" + 
	 "          \"range\": {\n" + 
	 "            \"age\": {\n" + 
	 "              \"gte\": \"?0\",\n" + 
	 "              \"lte\": \"?1\"\n" + 
	 "            }\n" + 
	 "          }\n" + 
	 "        },{\n" + 
	 "          \"range\": {\n" + 
	 "            \"age\": {\n" + 
	 "              \"gte\": \"?2\",\n" + 
	 "              \"lte\": \"?3\"\n" + 
	 "            }\n" + 
	 "          }\n" + 
	 "        }\n" + 
	 "      ]\n" + 
	 "    }\n" + 
	 "  }")
  List<User> findByQuery(Integer age1, Integer age2, Integer age3, Integer age4);

Build a queryer to achieve paging sorting highlighting

  @Test
  public void queryBuild() {
      NativeSearchQueryBuilder queryBuild  = new NativeSearchQueryBuilder();
      queryBuild.withQuery(QueryBuilders.matchQuery("name", "day by day"));
		
      //pagination
      queryBuild.withPageable(PageRequest.of(0, 2));
		
      //sort
      queryBuild.withSort(SortBuilders.fieldSort("age").order(SortOrder.ASC));
		
      //highlight
      queryBuild.withHighlightBuilder(new HighlightBuilder().field("name").preTags("<em>").postTags("</em>"));
      Page<User> userPage = this.userRepository.search(queryBuild.build());
      List<User> list = userPage.getContent();
      for (User user : list) {
	  System.out.println(user.getName() + " : " + user.getAge());
      }
  }

a simple example

Search by keywords on the home page of the simulated shopping website.

Define the mapping of entity classes to documents

  @Document(indexName = "goods", type = "info", shards = 3, replicas = 2)
  public class Goods {

      @Id
      private Long skuId;

      @Field(type = FieldType.Keyword, index = false)
      private String pic;

      @Field(type = FieldType.Text, analyzer = "ik_max_word")
      private String title;

      @Field(type = FieldType.Keyword,  index = false)
      private BigDecimal price;

      @Field(type = FieldType.Long)
      private Long sale; // sales

      @Field(type = FieldType.Date)
      private Date createTime;

      @Field(type = FieldType.Long)
      private Long brandId; // brand id

      @Field(type = FieldType.Keyword)
      private String brandName; // brand name

      @Field(type = FieldType.Long)
      private Long categoryId; // category id

      @Field(type = FieldType.Keyword)
      private String categoryName; // Category Name

      @Field(type = FieldType.Nested)
      private List<?> attrs; // Search properties

      //Omit getter, setter, constructor
  }

create index

  @Test
  public void init() {
      this.restTemplate.createIndex(Goods.class);
      this.restTemplate.putMapping(Goods.class);
  }

Prepare test data

  @Test
  public void data() {
      List<SearchAttr> sl = Arrays.asList(
					  new SearchAttr(10001L,"running memory","8GB"),
					  new SearchAttr(10002L,"Screen","ALOMD"),
					  new SearchAttr(10003L,"storage","128GB"));
      List<SearchAttr> sl2 = Arrays.asList(
					   new SearchAttr(10004L,"running memory","6GB"),
					   new SearchAttr(10005L,"Screen","LCD"),
					   new SearchAttr(10006L,"storage","256GB"));
      List<SearchAttr> sl3 = Arrays.asList(
					   new SearchAttr(10007L,"running memory","4GB"),
					   new SearchAttr(10008L,"Screen","ALOMD"),
					   new SearchAttr(10009L,"Battery","4000mah"));
      List<SearchAttr> sl4 = Arrays.asList(
					   new SearchAttr(10010L,"hard disk","solid state"),
					   new SearchAttr(10011L,"Resolution","1920x1080"),
					   new SearchAttr(10012L,"color","dark night"));
      List<SearchAttr> sl5 = Arrays.asList(
					   new SearchAttr(10013L,"hard disk","solid state"),
					   new SearchAttr(10014L,"Resolution","2560x1440"),
					   new SearchAttr(10015L,"color","morning mist white"));
	
		
      List<Goods> sd = Arrays.asList(
				     new Goods(20001L,"","Xiaomi Mi 10 mobile phone",4999.0,1000L,new Date(),30001L,"Millet",40001L,"cell phone",sl),
				     new Goods(20002L,"","Xiaomi Mi 10 pro cell phone",5999.0,1000L,new Date(),30001L,"Millet",40001L,"cell phone",sl2),
				     new Goods(20003L,"","Red rice note 8 cell phone",1999.0,1000L,new Date(),30001L,"Millet",40001L,"cell phone",sl3));
		
      List<Goods> sd2 = Arrays.asList(
				      new Goods(20004L,"","Huawei p40 cell phone",4999.0,800L,new Date(),30002L,"Huawei",40001L,"cell phone",sl),
				      new Goods(20005L,"","Huawei nova30 cell phone",2999.0,700L,new Date(),30002L,"Huawei",40001L,"cell phone",sl2),
				      new Goods(20006L,"","glory play cell phone",1999.0,1500L,new Date(),30002L,"Huawei",40001L,"cell phone",sl3));
		
      List<Goods> sd3 = Arrays.asList(
				      new Goods(20007L,"","Lenovo Air notebook",5999.0,600L,new Date(),30003L,"Lenovo",40002L,"notebook",sl5),
				      new Goods(20008L,"","Lenovo Xiaoxin Notebook",4888.0,500L,new Date(),30003L,"Lenovo",40002L,"notebook",sl4));
		
      this.goodsRepository.saveAll(sd);
      this.goodsRepository.saveAll(sd2);
      this.goodsRepository.saveAll(sd3);
  }

Write a DSL

This query uses the ##mobile phone## keyword to query Xiaomi and Huawei's 8GB mobile phones.

GET /goods/_search
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "title": {
            "query": "cell phone",  #query keywords
            "operator": "and"
          }
        }}
      ],
      "filter": [
        {
        "terms":{
          "brandId": [30001,30002] #Filter conditions (Xiaomi brand and Huawei brand)
        }
      },
      {
        "terms":{
          "categoryId": [40001] # Filter Condition(Category: Mobile Phone)
        }
      },
      {
        "bool":{
          "must":[
            {
              "nested":{ #Nested property keywords
                "path": "attrs",
                "query":{
                  "bool":{
                    "must":[
                      {
                        "term":{
                          "attrs.attrId": 10001 #Search Properties (8GB RAM)
                        }
                      }
                      ]
                  }
                }
              }
            }
            ]
        }
      }
      ]
    }
  }
}

build query

  public class GoodsTest {

      @Autowired
      private RestHighLevelClient client;
	
      @Test
      public void  buildQueryDsl() throws IOException {
	  //Query Condition Builder
	  SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
		
	  BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
		
	  //Build query conditions
	  boolQueryBuilder.must(QueryBuilders.matchQuery("title", "cell phone").operator(Operator.AND));
		
	  //Build Filters (Brands)
	  String[] brand = new String[] {"30001","30002"};
	  boolQueryBuilder.filter(QueryBuilders.termsQuery("brandId", brand));
		
	  //Build Filters (Categories)
	  String[] category = new String[] {"40001"};
	  boolQueryBuilder.filter(QueryBuilders.termsQuery("categoryId", category));
		
	  //Build Specification Attribute Query Conditions
	  String attrId = "10001";
	  BoolQueryBuilder subQuery = QueryBuilders.boolQuery();
	  subQuery.must(QueryBuilders.termQuery("attrs.attrId", attrId));
		
	  BoolQueryBuilder attrsQuery = QueryBuilders.boolQuery();
	  attrsQuery.must(QueryBuilders.nestedQuery("attrs", subQuery, ScoreMode.None));
	  boolQueryBuilder.filter(attrsQuery);
		
	  sourceBuilder.query(boolQueryBuilder);
	  //Build pagination
	  sourceBuilder.from(0);
	  sourceBuilder.size(5);
		
	  //build sort
	  sourceBuilder.sort("price", SortOrder.ASC);
		
	  //build highlight
	  sourceBuilder.highlighter(new HighlightBuilder().field("title").preTags("<em>").postTags("</em>"));
		
	  //Build aggregates (aggregate by brand)
	  sourceBuilder.aggregation(AggregationBuilders.terms("brnadIdAdd").field("brandId"));
		
	  SearchRequest searchRequest = new SearchRequest("goods"); //index to query
	  searchRequest.types("info");
	  searchRequest.source(sourceBuilder);
	  SearchResponse resp = client.search(searchRequest, RequestOptions.DEFAULT);
		
	  System.out.println(resp);
      }
  }

Parse the result set

The returned result set is json, which can be parsed by conventional methods.

  {
      "took": 20,
      "timed_out": false,
      "_shards": {
	  "total": 3,
	  "successful": 3,
	  "skipped": 0,
	  "failed": 0
      },
      "hits": {
	  "total": 2,
	  "max_score": null,
	  "hits": [{
	      "_index": "goods",
	      "_type": "info",
	      "_id": "20004",
	      "_score": null,
	      "_source": {
		  "skuId": 20004,
		  "pic": "",
		  "title": "Huawei p40 cell phone",
		  "price": 4999.0,
		  "sale": 800,
		  "createTime": 1599991429799,
		  "brandId": 30002,
		  "brandName": "Huawei",
		  "categoryId": 40001,
		  "categoryName": "cell phone",
		  "attrs": [{
		      "attrId": 10001,
		      "attrName": "running memory",
		      "attrValue": "8GB"
		  }, {
		      "attrId": 10002,
		      "attrName": "Screen",
		      "attrValue": "ALOMD"
		  }, {
		      "attrId": 10003,
		      "attrName": "storage",
		      "attrValue": "128GB"
		  }]
	      },
	      "highlight": {
		  "title": ["Huawei p40<em>cell phone</em>"]
	      },
	      "sort": ["4999.0"]
	  }, {
	      "_index": "goods",
	      "_type": "info",
	      "_id": "20001",
	      "_score": null,
	      "_source": {
		  "skuId": 20001,
		  "pic": "",
		  "title": "Xiaomi Mi 10 mobile phone",
		  "price": 4999.0,
		  "sale": 1000,
		  "createTime": 1599991429799,
		  "brandId": 30001,
		  "brandName": "Millet",
		  "categoryId": 40001,
		  "categoryName": "cell phone",
		  "attrs": [{
		      "attrId": 10001,
		      "attrName": "running memory",
		      "attrValue": "8GB"
		  }, {
		      "attrId": 10002,
		      "attrName": "Screen",
		      "attrValue": "ALOMD"
		  }, {
		      "attrId": 10003,
		      "attrName": "storage",
		      "attrValue": "128GB"
		  }]
	      },
	      "highlight": {
		  "title": ["Xiaomi Mi 10<em>cell phone</em>"]
	      },
	      "sort": ["4999.0"]
	  }]
      },
      "aggregations": {
	  "lterms#brnadIdAdd": {
	      "doc_count_error_upper_bound": 0,
	      "sum_other_doc_count": 0,
	      "buckets": [{
		  "key": 30001,
		  "doc_count": 1
	      }, {
		  "key": 30002,
		  "doc_count": 1
	      }]
	  }
      }
  }

Tags: Interview

Posted by GESmithPhoto on Tue, 17 May 2022 17:33:38 +0300