ElasticSearch 距离排序模型
- 验证按时间的增量是否能跟上去
GET /shop/_search { "query": { "match_phrase": { "name": "和府捞面(正大乐城店)" } }, "sort": [ { "id": { "order": "asc" } } ] }
- 期望搜索出凯悦酒店,但是出现了 花悦庭果木烤鸭
GET /shop/_search
{
"query": {
"match": {
"name": "凯悦"
}
}
}
GET /shop/_analyze
{
"analyzer": "ik_smart", //分为 凯 悦
"text": "凯悦"
}
GET /shop/_analyze
{
"analyzer": "ik_max_word", // 含有 悦
"text": "花悦庭果木烤鸭"
}
- 带上距离字段
GET /shop/_search { "query": { "match": { "name": "凯悦" } }, "_source": "*", // 不加则只有 distance 字段展示在结果中 "script_fields": { "distance": { // 新增定义 "script": { "source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)", //半正失曲线? "lang": "expression", "params": {"lat":31.37,"lon":127.12} } } } }
- 使用距离排序
GET /shop/_search { "query": { "match": { "name": "凯悦" } }, "_source": "*", // 不加则只有 distance 字段展示在结果中 "script_fields": { "distance": { // 新增定义 "script": { "source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)", //半正失曲线? "lang": "expression", "params": {"lat":31.37,"lon":127.12} } } }, "sort": [ // 导致"_score" : null, { "_geo_distance": { "location": { "lat": 31.37, "lon": 127.12 }, "unit": "km", "distance_type": "arc" //球形 } } ] }
- 使用function score解决排序模型 【搜索最终版本】
GET /shop/_search { //"explain": true, //最后再使用explain调优 "_source": "*", "script_fields": { "distance": { // 新增定义 "script": { "source": "haversin(lat,lon,doc['location'].lat,doc['location'].lon)", //半正失曲线? "lang": "expression", "params": {"lat":31.23916171,"lon":121.48789949} } } }, "query": { "function_score": { "query": { "bool": { "must": [ {"match": {"name":{"query": "凯悦","boost": 0.1}}}, //boost文本再降权0.1,花悦庭果木烤鸭被拉到了第二名 {"term":{"seller_disabled_flag": 0}}, //精确匹配 {"term":{"category_id": 2}} ] } }, "functions": [ { "gauss":{ "location":{ "origin":"31.23916171,121.48789949", "scale":"100km", //衰减 "offset":"0km", //起始点 "decay":0.5 } }, "weight": 9 //距离对于评价来说,跟重要些 }, { "field_value_factor": { "field": "remark_score" //1-5分,归一化到 0-1 之间,所以设置 weight:0.2 }, "weight": 0.2 }, { "field_value_factor": { "field": "seller_remark_score" //1-5分,归一化到 0-1 之间,所以设置 weight:0.2,但是门店评分比商家评分更重要,所以设置 商家评分seller_remark_score weight : 0.1 }, "weight": 0.1 } ], "score_mode": "sum", "boost_mode": "sum" //replace 不考虑match文本相关性,只考虑距离和评分。本项目搜索使用replace。 } }, "sort": [ { "_score": { //最后按打分排序 "order": "desc" } } ] }
- 重新定义门店索引结构,支持同义词
PUT /shop { "settings": { "number_of_shards": 1, "number_of_replicas": 0, //根据实际集群情况调整,此时是单节点,所以使用0 "analysis":{ //支持同义词 "filter":{ "my_synonym_fiter":{ //随便取名 "type":"synonym", "synonyms_path":"analysis-ik/synonyms.dic" //相对esnode/config/路径 } }, "analyzer":{ "ik_syno":{ //自定义后要去修改下面 name 的分词器 "type":"custom", "tokenizer":"ik_smart", "filter":["my_synonym_fiter"] }, "ik_syno_max":{ "type":"custom", "tokenizer":"ik_max_word", "filter":["my_synonym_fiter"] } } } }, "mappings": { "properties": { "id":{"type": "integer"}, "name":{"type": "text","analyzer": "ik_syno_max","search_analyzer": "ik_syno"}, //分词器的最佳实践,使用自定义的分词器支持同义词 "tags":{"type": "text","analyzer": "whitespace","fielddata": true},//多个标签以空格分开;fielddata表示分词后数量的聚合,1变多,比较耗内存 "location":{"type": "geo_point"}, "remark_score":{"type": "double"}, "price_per_man":{"type": "integer"}, "category_id":{"type": "integer"}, "category_name":{"type": "keyword"},//keyword 不分词,完全匹配 "seller_id":{"type": "integer"}, "seller_remark_score":{"type": "double"}, "seller_disabled_flag":{"type": "integer"} } } }
- 验证同义词分析
GET /shop/_analyze { "field": "name", "text": "烤鸭" //分成 烤鸭 能吃 的,同时分析"能吃的",也能得到3个相同分析结果,但是分析"能吃",仅能得到"能吃" }
正文到此结束
- 本文标签: ElasticSearch
- 本文链接: https://code.jiangjiesheng.cn/article/48
- 版权声明: 本文由小江同学原创发布,转载请先联系本站长,谢谢。