原创

ElasticSearch 距离排序模型

验证按时间的增量是否能跟上去

GET /shop/_search
{
"query": {
  "match_phrase": {
    "name": "和府捞面(正大乐城店)"
  }
},
"sort": [
  {
    "id": {
      "order": "asc"
    }
  }
]
}

期望搜索出凯悦酒店，但是出现了花悦庭果木烤鸭

GET /shop/_search
{
  "query": {
    "match": {
      "name": "凯悦"
    }
  }
}

GET /shop/_analyze
{
  "analyzer": "ik_smart", //分为 凯 悦 
  "text": "凯悦"  
}

GET /shop/_analyze
{
  "analyzer": "ik_max_word",  // 含有 悦 
  "text": "花悦庭果木烤鸭"  
}

带上距离字段

GET /shop/_search
{
"query": {
  "match": {
    "name": "凯悦"
  }
},
"_source": "*", // 不加则只有 distance 字段展示在结果中
"script_fields": {
  "distance": { // 新增定义
    "script": {
      "source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)", //半正失曲线?
      "lang": "expression",
      "params": {"lat":31.37,"lon":127.12}
    }
  }
}
}

使用距离排序

GET /shop/_search
{
"query": {
  "match": {
    "name": "凯悦"
  }
},
"_source": "*", // 不加则只有 distance 字段展示在结果中
"script_fields": {
  "distance": { // 新增定义
    "script": {
      "source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)", //半正失曲线?
      "lang": "expression",
      "params": {"lat":31.37,"lon":127.12}
    }
  }
},
"sort": [ // 导致"_score" : null, 
  {
    "_geo_distance": {
      "location": {
        "lat": 31.37,
        "lon": 127.12
      },
      "unit": "km",
      "distance_type": "arc" //球形
    }
  }
]
}

使用function score解决排序模型【搜索最终版本】

GET /shop/_search
{
//"explain": true,  //最后再使用explain调优 
"_source": "*",
"script_fields": {
   "distance": { // 新增定义
    "script": {
      "source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)", //半正失曲线?
      "lang": "expression",
      "params": {"lat":31.23916171,"lon":121.48789949}
    }
  }
},
"query": {
  "function_score": {
    "query": {
      "bool": {
        "must": [
          {"match": {"name":{"query": "凯悦","boost": 0.1}}}, //boost文本再降权0.1，花悦庭果木烤鸭被拉到了第二名
          {"term":{"seller_disabled_flag": 0}}, //精确匹配
          {"term":{"category_id": 2}}
        ]
      }
    },
    "functions": [
      {
        "gauss":{
          "location":{
            "origin":"31.23916171,121.48789949",
            "scale":"100km", //衰减
            "offset":"0km", //起始点
            "decay":0.5 
          }
        },
        "weight": 9 //距离对于评价来说，跟重要些
      },
      {
        "field_value_factor": {
          "field": "remark_score" //1-5分,归一化到 0-1 之间，所以设置 weight：0.2
        },
        "weight": 0.2
      },
      {
        "field_value_factor": {
          "field": "seller_remark_score"  //1-5分,归一化到 0-1 之间，所以设置 weight：0.2，但是门店评分比商家评分更重要，所以设置 商家评分seller_remark_score weight : 0.1
        },
        "weight": 0.1
      }
    ],
    "score_mode": "sum",
    "boost_mode": "sum" //replace 不考虑match文本相关性，只考虑距离和评分。本项目搜索使用replace。
  }
},
"sort": [
  {
    "_score": { //最后按打分排序
      "order": "desc"
    }
  }
]
}

重新定义门店索引结构，支持同义词

PUT /shop
{
"settings": {
  "number_of_shards": 1, 
  "number_of_replicas": 0, //根据实际集群情况调整，此时是单节点，所以使用0
  "analysis":{ //支持同义词
    "filter":{
      "my_synonym_fiter":{ //随便取名
        "type":"synonym",
        "synonyms_path":"analysis-ik/synonyms.dic" //相对esnode/config/路径
      }
    },
    "analyzer":{
      "ik_syno":{ //自定义后要去修改下面 name 的分词器
        "type":"custom",
        "tokenizer":"ik_smart",
        "filter":["my_synonym_fiter"] 
      },
      "ik_syno_max":{
        "type":"custom",
        "tokenizer":"ik_max_word",
        "filter":["my_synonym_fiter"] 
      }
    }
  }
},
"mappings": {
  "properties": {
    "id":{"type": "integer"}, 
    "name":{"type": "text","analyzer": "ik_syno_max","search_analyzer": "ik_syno"}, //分词器的最佳实践，使用自定义的分词器支持同义词
    "tags":{"type": "text","analyzer": "whitespace","fielddata": true},//多个标签以空格分开；fielddata表示分词后数量的聚合，1变多，比较耗内存
    "location":{"type": "geo_point"},
    "remark_score":{"type": "double"},
    "price_per_man":{"type": "integer"},
    "category_id":{"type": "integer"},
    "category_name":{"type": "keyword"},//keyword 不分词，完全匹配
    "seller_id":{"type": "integer"},
    "seller_remark_score":{"type": "double"},
    "seller_disabled_flag":{"type": "integer"}
  }
}
}

验证同义词分析

GET /shop/_analyze
{
"field": "name", 
"text": "烤鸭"  //分成 烤鸭 能吃 的，同时分析"能吃的"，也能得到3个相同分析结果，但是分析"能吃"，仅能得到"能吃"
}

正文到此结束

所属分类：数据库

本文标签： ElasticSearch
本文链接： https://code.jiangjiesheng.cn/article/48
版权声明： 本文由小江同学原创发布，转载请先联系本站长，谢谢。

ElasticSearch 距离排序模型

热门推荐

相关文章

说给你听

本文目录

标签云

近期评论

网站信息