原创

ElasticSearch 距离排序模型

  • 验证按时间的增量是否能跟上去
    GET /shop/_search
    {
    "query": {
      "match_phrase": {
        "name": "和府捞面(正大乐城店)"
      }
    },
    "sort": [
      {
        "id": {
          "order": "asc"
        }
      }
    ]
    }
    
  • 期望搜索出凯悦酒店,但是出现了 花悦庭果木烤鸭
GET /shop/_search
{
  "query": {
    "match": {
      "name": "凯悦"
    }
  }
}

GET /shop/_analyze
{
  "analyzer": "ik_smart", //分为 凯 悦 
  "text": "凯悦"  
}

GET /shop/_analyze
{
  "analyzer": "ik_max_word",  // 含有 悦 
  "text": "花悦庭果木烤鸭"  
}
  • 带上距离字段
    GET /shop/_search
    {
    "query": {
      "match": {
        "name": "凯悦"
      }
    },
    "_source": "*", // 不加则只有 distance 字段展示在结果中
    "script_fields": {
      "distance": { // 新增定义
        "script": {
          "source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)", //半正失曲线?
          "lang": "expression",
          "params": {"lat":31.37,"lon":127.12}
        }
      }
    }
    }
    
  • 使用距离排序
    GET /shop/_search
    {
    "query": {
      "match": {
        "name": "凯悦"
      }
    },
    "_source": "*", // 不加则只有 distance 字段展示在结果中
    "script_fields": {
      "distance": { // 新增定义
        "script": {
          "source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)", //半正失曲线?
          "lang": "expression",
          "params": {"lat":31.37,"lon":127.12}
        }
      }
    },
    "sort": [ // 导致"_score" : null, 
      {
        "_geo_distance": {
          "location": {
            "lat": 31.37,
            "lon": 127.12
          },
          "unit": "km",
          "distance_type": "arc" //球形
        }
      }
    ]
    }
    
  • 使用function score解决排序模型 【搜索最终版本】
    GET /shop/_search
    {
    //"explain": true,  //最后再使用explain调优 
    "_source": "*",
    "script_fields": {
       "distance": { // 新增定义
        "script": {
          "source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)", //半正失曲线?
          "lang": "expression",
          "params": {"lat":31.23916171,"lon":121.48789949}
        }
      }
    },
    "query": {
      "function_score": {
        "query": {
          "bool": {
            "must": [
              {"match": {"name":{"query": "凯悦","boost": 0.1}}}, //boost文本再降权0.1,花悦庭果木烤鸭被拉到了第二名
              {"term":{"seller_disabled_flag": 0}}, //精确匹配
              {"term":{"category_id": 2}}
            ]
          }
        },
        "functions": [
          {
            "gauss":{
              "location":{
                "origin":"31.23916171,121.48789949",
                "scale":"100km", //衰减
                "offset":"0km", //起始点
                "decay":0.5 
              }
            },
            "weight": 9 //距离对于评价来说,跟重要些
          },
          {
            "field_value_factor": {
              "field": "remark_score" //1-5分,归一化到 0-1 之间,所以设置 weight:0.2
            },
            "weight": 0.2
          },
          {
            "field_value_factor": {
              "field": "seller_remark_score"  //1-5分,归一化到 0-1 之间,所以设置 weight:0.2,但是门店评分比商家评分更重要,所以设置 商家评分seller_remark_score weight : 0.1
            },
            "weight": 0.1
          }
        ],
        "score_mode": "sum",
        "boost_mode": "sum" //replace 不考虑match文本相关性,只考虑距离和评分。本项目搜索使用replace。
      }
    },
    "sort": [
      {
        "_score": { //最后按打分排序
          "order": "desc"
        }
      }
    ]
    }
    

    gauss-高斯函数说明


  • 重新定义门店索引结构,支持同义词
    PUT /shop
    {
    "settings": {
      "number_of_shards": 1, 
      "number_of_replicas": 0, //根据实际集群情况调整,此时是单节点,所以使用0
      "analysis":{ //支持同义词
        "filter":{
          "my_synonym_fiter":{ //随便取名
            "type":"synonym",
            "synonyms_path":"analysis-ik/synonyms.dic" //相对esnode/config/路径
          }
        },
        "analyzer":{
          "ik_syno":{ //自定义后要去修改下面 name 的分词器
            "type":"custom",
            "tokenizer":"ik_smart",
            "filter":["my_synonym_fiter"] 
          },
          "ik_syno_max":{
            "type":"custom",
            "tokenizer":"ik_max_word",
            "filter":["my_synonym_fiter"] 
          }
        }
      }
    },
    "mappings": {
      "properties": {
        "id":{"type": "integer"}, 
        "name":{"type": "text","analyzer": "ik_syno_max","search_analyzer": "ik_syno"}, //分词器的最佳实践,使用自定义的分词器支持同义词
        "tags":{"type": "text","analyzer": "whitespace","fielddata": true},//多个标签以空格分开;fielddata表示分词后数量的聚合,1变多,比较耗内存
        "location":{"type": "geo_point"},
        "remark_score":{"type": "double"},
        "price_per_man":{"type": "integer"},
        "category_id":{"type": "integer"},
        "category_name":{"type": "keyword"},//keyword 不分词,完全匹配
        "seller_id":{"type": "integer"},
        "seller_remark_score":{"type": "double"},
        "seller_disabled_flag":{"type": "integer"}
      }
    }
    }
    
  • 验证同义词分析
    GET /shop/_analyze
    {
    "field": "name", 
    "text": "烤鸭"  //分成 烤鸭 能吃 的,同时分析"能吃的",也能得到3个相同分析结果,但是分析"能吃",仅能得到"能吃"
    }
    
正文到此结束
本文目录