From 56dcb65ba2588c19f4c665e14a3052d67e63857c Mon Sep 17 00:00:00 2001 From: deepzz0 Date: Fri, 28 Oct 2016 01:10:57 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96elasticsearch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 158 +++++++++++++++++++++++++++++++++++- elasticsearch.go | 202 ++++++++++++++++------------------------------ front.go | 11 +-- helper.go | 2 +- views/search.html | 2 +- 5 files changed, 232 insertions(+), 143 deletions(-) diff --git a/README.md b/README.md index 33b519a..87fd231 100644 --- a/README.md +++ b/README.md @@ -353,4 +353,160 @@ after_success: - docker push registry.cn-hangzhou.aliyuncs.com/deepzz/eiblog ``` -2、修改`build_docker.sh`文件中的`domain`为自己仓库地址。执行`./build_docker.sh`。 \ No newline at end of file +2、修改`build_docker.sh`文件中的`domain`为自己仓库地址。执行`./build_docker.sh`。 + +### 资料备忘 + +#### 创建mapping + +``` + mapping := map[string]interface{}{ + "mappings": map[string]interface{}{ + TYPE: map[string]interface{}{ + "properties": map[string]interface{}{ + "title": map[string]string{ + "type": "string", + "term_vector": "with_positions_offsets", + "analyzer": "ik_syno", + "search_analyzer": "ik_syno", + }, + "content": map[string]string{ + "type": "string", + "term_vector": "with_positions_offsets", + "analyzer": "ik_syno", + "search_analyzer": "ik_syno", + }, + "slug": map[string]string{ + "type": "string", + }, + "tag": map[string]string{ + "type": "string", + "index": "not_analyzed", + }, + "date": map[string]string{ + "type": "date", + "index": "not_analyzed", + }, + }, + }, + }, + } +``` + +#### DSL高亮查询 + +``` +fehelperFeHelper:JSON格式化查看 + +{"highlight":{"fields":{"content":{},"title":{}},"post_tags":["\u003c/b\u003e"],"pre_tags":["\u003cb\u003e"]},"query":{"dis_max":{"queries":[{"match":{"title":{"boost":4,"minimum_should_match":"50%","query":"天气"}}},{"match":{"content":{"boost":4,"minimum_should_match":"75%","query":"天气"}}},{"match":{"tag":{"boost":2,"minimum_should_match":"100%","query":"天气"}}},{"match":{"slug":{"boost":1,"minimum_should_match":"100%","query":"天气"}}}],"tie_breaker":0.3}},"filter":{"bool":{"must":[{"range":{"date":{"gte":"2016-10","lte": "2016-10||/M","format": "yyyy-MM-dd||yyyy-MM||yyyy"}}},{"term":{"tag":"tag3"}}]}}} +格式化 +{ + "highlight": { + "fields": { + "content": {}, + "title": {} + }, + "post_tags": [ + "" + ], + "pre_tags": [ + "" + ] + }, + "query": { + "dis_max": { + "queries": [ + { + "match": { + "title": { + "boost": 4, + "minimum_should_match": "50%", + "query": "天气" + } + } + }, + { + "match": { + "content": { + "boost": 4, + "minimum_should_match": "75%", + "query": "天气" + } + } + }, + { + "match": { + "tag": { + "boost": 2, + "minimum_should_match": "100%", + "query": "天气" + } + } + }, + { + "match": { + "slug": { + "boost": 1, + "minimum_should_match": "100%", + "query": "天气" + } + } + } + ], + "tie_breaker": 0.3 + } + }, + "filter": { + "bool": { + "must": [ + { + "range": { + "date": { + "gte": "2016-10", + "lte": "2016-10||/M", + "format": "yyyy-MM-dd||yyyy-MM||yyyy" + } + } + }, + { + "term": { + "tag": "tag3" + } + } + ] + } + } +} +``` + +#### term 查询 + +``` +{ + "query": { + "bool": { + "must": [ + { + "term": { + "slug": "slug1" + } + },{ + "term": { + "tag": "tag1" + } + } + ] + } + }, + "filter": { + "range": { + "date": { + "gte": "2016-10", + "lte": "2016-10||/M", + "format": "yyyy-MM||yyyy" + } + } + } +} +``` + diff --git a/elasticsearch.go b/elasticsearch.go index f46279c..4db303e 100644 --- a/elasticsearch.go +++ b/elasticsearch.go @@ -7,6 +7,8 @@ import ( "fmt" "io/ioutil" "net/http" + "regexp" + "strings" "time" "github.com/eiblog/eiblog/setting" @@ -16,6 +18,10 @@ import ( const ( INDEX = "eiblog" TYPE = "article" + + ES_FILTER = `"filter":{"bool":{"must":[%s]}}` + ES_TERM = `{"term":{"%s":"%s"}}` + ES_DATE = `{"range":{"date":{"gte":"%s","lte": "%s","format": "yyyy-MM-dd||yyyy-MM||yyyy"}}}` // 2016-10||/M ) var es *ElasticService @@ -26,112 +32,63 @@ func init() { } func initIndex() { - mapping := map[string]interface{}{ - "mappings": map[string]interface{}{ - TYPE: map[string]interface{}{ - "properties": map[string]interface{}{ - "title": map[string]string{ - "type": "string", - "term_vector": "with_positions_offsets", - "analyzer": "ik_syno", - "search_analyzer": "ik_syno", - }, - "content": map[string]string{ - "type": "string", - "term_vector": "with_positions_offsets", - "analyzer": "ik_syno", - "search_analyzer": "ik_syno", - }, - "slug": map[string]string{ - "type": "string", - }, - "tags": map[string]string{ - "type": "string", - "index": "not_analyzed", - }, - "create_time": map[string]string{ - "type": "date", - "index": "not_analyzed", - }, - }, - }, - }, - } - b, _ := json.Marshal(mapping) - err := CreateIndexAndMappings(INDEX, TYPE, b) + mappings := fmt.Sprintf(`{"mappings":{"%s":{"properties":{"content":{"analyzer":"ik_syno","search_analyzer":"ik_syno","term_vector":"with_positions_offsets","type":"string"},"date":{"index":"not_analyzed","type":"date"},"slug":{"type":"string"},"tag":{"index":"not_analyzed","type":"string"},"title":{"analyzer":"ik_syno","search_analyzer":"ik_syno","term_vector":"with_positions_offsets","type":"string"}}}}}`, TYPE) + err := CreateIndexAndMappings(INDEX, TYPE, []byte(mappings)) if err != nil { logd.Fatal(err) } } -func Elasticsearch(kw string, size, from int) *ESSearchResult { - dsl := map[string]interface{}{ - "query": map[string]interface{}{ - "dis_max": map[string]interface{}{ - "queries": []map[string]interface{}{ - map[string]interface{}{ - "match": map[string]interface{}{ - "title": map[string]interface{}{ - "query": kw, - "minimum_should_match": "50%", - "boost": 4, - }, - }, - }, - map[string]interface{}{ - "match": map[string]interface{}{ - "content": map[string]interface{}{ - "query": kw, - "minimum_should_match": "75%", - "boost": 4, - }, - }, - }, - map[string]interface{}{ - "match": map[string]interface{}{ - "tags": map[string]interface{}{ - "query": kw, - "minimum_should_match": "100%", - "boost": 2, - }, - }, - }, - map[string]interface{}{ - "match": map[string]interface{}{ - "slug": map[string]interface{}{ - "query": kw, - "minimum_should_match": "100%", - "boost": 1, - }, - }, - }, - }, - "tie_breaker": 0.3, - }, - }, - "highlight": map[string]interface{}{ - "pre_tags": []string{""}, - "post_tags": []string{""}, - "fields": map[string]interface{}{ - "title": map[string]string{}, - "content": map[string]string{ - // "fragment_size": 150, - // "number_of_fragments": "3", - }, - }, - }, +func Elasticsearch(qStr string, size, from int) *ESSearchResult { + // 分析查询字符串 + reg := regexp.MustCompile(`(tag|slug|date):`) + indexs := reg.FindAllStringIndex(qStr, -1) + length := len(indexs) + var str, kw string + var filter []string + if length == 0 { // 全文搜索 + kw = qStr } - b, _ := json.Marshal(dsl) - docs, err := IndexQueryDSL(INDEX, TYPE, size, from, b) - if err != nil { - logd.Error(err) - return nil + // 字段搜索,检出 全文搜索 + for i, index := range indexs { + if i == length-1 { + str = qStr[index[0]:] + if space := strings.Index(str, " "); space != -1 && space < len(str)-1 { + kw = str[space+1:] + str = str[:space] + } + } else { + str = strings.TrimSpace(qStr[index[0]:indexs[i+1][0]]) + } + kv := strings.Split(str, ":") + switch kv[0] { + case "slug": + filter = append(filter, fmt.Sprintf(ES_TERM, kv[0], kv[1])) + case "tag": + filter = append(filter, fmt.Sprintf(ES_TERM, kv[0], kv[1])) + case "date": + var date string + switch len(kv[1]) { + case 4: + date = fmt.Sprintf(ES_DATE, kv[1], kv[1]+"||/y") + case 7: + date = fmt.Sprintf(ES_DATE, kv[1], kv[1]+"||/M") + case 10: + date = fmt.Sprintf(ES_DATE, kv[1], kv[1]+"||/d") + default: + break + } + filter = append(filter, date) + } } - return docs -} - -func ElasticsearchSimple(q string, size, from int) *ESSearchResult { - docs, err := IndexQuerySimple(INDEX, TYPE, size, from, q) + // 判断是否为空,选择搜索方式 + var dsl string + if kw != "" { + dsl = strings.Replace(strings.Replace(`{"highlight":{"fields":{"content":{},"title":{}},"post_tags":["\u003c/b\u003e"],"pre_tags":["\u003cb\u003e"]},"query":{"dis_max":{"queries":[{"match":{"title":{"boost":4,"minimum_should_match":"50%","query":"$1"}}},{"match":{"content":{"boost":4,"minimum_should_match":"75%","query":"$1"}}},{"match":{"tag":{"boost":2,"minimum_should_match":"100%","query":"$1"}}},{"match":{"slug":{"boost":1,"minimum_should_match":"100%","query":"$1"}}}],"tie_breaker":0.3}},$2}`, "$1", kw, -1), "$2", fmt.Sprintf(ES_FILTER, strings.Join(filter, ",")), -1) + } else { + dsl = fmt.Sprintf("{"+ES_FILTER+"}", strings.Join(filter, ",")) + } + docs, err := IndexQueryDSL(INDEX, TYPE, size, from, []byte(dsl)) if err != nil { logd.Error(err) return nil @@ -142,12 +99,12 @@ func ElasticsearchSimple(q string, size, from int) *ESSearchResult { func ElasticIndex(artc *Article) error { img := PickFirstImage(artc.Content) mapping := map[string]interface{}{ - "title": artc.Title, - "content": IgnoreHtmlTag(artc.Content), - "slug": artc.Slug, - "tags": artc.Tags, - "img": img, - "create_time": artc.CreateTime, + "title": artc.Title, + "content": IgnoreHtmlTag(artc.Content), + "slug": artc.Slug, + "tag": artc.Tags, + "img": img, + "date": artc.CreateTime, } b, _ := json.Marshal(mapping) return IndexOrUpdateDocument(INDEX, TYPE, artc.ID, b) @@ -245,9 +202,9 @@ func IndexOrUpdateDocument(index, typ string, id int32, doc []byte) (err error) } type ESDeleteDocument struct { - _Index string `json:"_index"` - _Type string `json:"_type"` - _ID string `json:"_id"` + Index string `json:"_index"` + Type string `json:"_type"` + ID string `json:"_id"` } type ESDeleteResult struct { @@ -260,7 +217,7 @@ type ESDeleteResult struct { func DeleteDocument(index, typ string, ids []string) error { var buff bytes.Buffer for _, id := range ids { - dd := &ESDeleteDocument{_Index: index, _Type: typ, _ID: id} + dd := &ESDeleteDocument{Index: index, Type: typ, ID: id} m := map[string]*ESDeleteDocument{"delete": dd} b, _ := json.Marshal(m) buff.Write(b) @@ -298,11 +255,11 @@ type ESSearchResult struct { Hits []struct { ID string `json:"_id"` Source struct { - Slug string `json:"slug"` - Content string `json:"content"` - CreateTime time.Time `json:"create_time"` - Title string `json:"title"` - Img string `json:"img"` + Slug string `json:"slug"` + Content string `json:"content"` + Date time.Time `json:"date"` + Title string `json:"title"` + Img string `json:"img"` } `json:"_source"` Highlight struct { Title []string `json:"title"` @@ -312,23 +269,6 @@ type ESSearchResult struct { } `json:"hits"` } -func IndexQuerySimple(index, typ string, size, from int, q string) (*ESSearchResult, error) { - req, err := http.NewRequest("GET", es.ParseURL("/%s/%s/_search?size=%d&from=%d&q=%s", index, typ, size, from, q), nil) - if err != nil { - return nil, err - } - data, err := es.Do(req) - if err != nil { - return nil, err - } - result := &ESSearchResult{} - err = json.Unmarshal(data.([]byte), result) - if err != nil { - return nil, err - } - return result, nil -} - func IndexQueryDSL(index, typ string, size, from int, dsl []byte) (*ESSearchResult, error) { req, err := http.NewRequest("POST", es.ParseURL("/%s/%s/_search?size=%d&from=%d", index, typ, size, from), bytes.NewReader(dsl)) if err != nil { diff --git a/front.go b/front.go index f30fd6c..d9d176c 100644 --- a/front.go +++ b/front.go @@ -8,7 +8,6 @@ import ( "html/template" "io/ioutil" "net/http" - "regexp" "strconv" "strings" "time" @@ -156,7 +155,7 @@ func HandleSearchPage(c *gin.Context) { h["Path"] = "" h["CurrentPage"] = "search-post" - q := c.Query("q") + q := strings.TrimSpace(c.Query("q")) if q != "" { start, err := strconv.Atoi(c.Query("start")) if start < 1 || err != nil { @@ -165,13 +164,7 @@ func HandleSearchPage(c *gin.Context) { h["Word"] = q var result *ESSearchResult vals := c.Request.URL.Query() - reg := regexp.MustCompile(`^[a-z]+:\w+$`) - logd.Debug(reg.MatchString(q)) - if reg.MatchString(q) { - result = ElasticsearchSimple(q, setting.Conf.PageNum, start-1) - } else { - result = Elasticsearch(q, setting.Conf.PageNum, start-1) - } + result = Elasticsearch(q, setting.Conf.PageNum, start-1) if result != nil { result.Took /= 1000 for i, v := range result.Hits.Hits { diff --git a/helper.go b/helper.go index dba494f..6164513 100644 --- a/helper.go +++ b/helper.go @@ -60,7 +60,7 @@ func IgnoreHtmlTag(src string) string { src = re.ReplaceAllString(src, "") //去除换行符 - re, _ = regexp.Compile("\\s{1,}") + re, _ = regexp.Compile("\\s{2,}") return re.ReplaceAllString(src, "") } diff --git a/views/search.html b/views/search.html index ca07a6c..230654a 100644 --- a/views/search.html +++ b/views/search.html @@ -1 +1 @@ -{{define "search"}}

站内搜索

{{if .Word}}{{with .SearchResult}}{{if gt (.Hits.Hits|len) 0}}
本次搜索共找到结果 {{.Hits.Total}} 条 (用时 {{.Took}} 秒)
{{range .Hits.Hits}}
{{if .Source.Img}}
{{end}}
{{dateformat .Source.CreateTime "2006-01-02"}} ... {{str2html (join .Highlight.Content "...")}} ...
{{end}}{{else}}
没有找到任何结果,请更换查询词试试~
或者试试 Google 站内搜索:site:{{$.Domain}} {{$.Word}}
{{end}}{{end}}{{else}}
热搜词:{{range .HotWords}}{{.}}{{end}}

支持的搜索格式:

  1. 输入关键词全文搜索:HTTP/2 性能优化
  2. 指定时间段搜索:date:2014date:2015-05
  3. 指定标签搜索:tag:性能优化tag:HTTPS
  4. 组合搜索:date:2015 tag:HTTP 优化
{{end}}
{{if or .Prev .Next}}{{end}}
{{end}} \ No newline at end of file +{{define "search"}}

站内搜索

{{if .Word}}{{with .SearchResult}}{{if gt (.Hits.Hits|len) 0}}
本次搜索共找到结果 {{.Hits.Total}} 条 (用时 {{.Took}} 秒)
{{range .Hits.Hits}}
{{if .Source.Img}}
{{end}}
{{dateformat .Source.Date "2006-01-02"}} ... {{str2html (join .Highlight.Content "...")}} ...
{{end}}{{else}}
没有找到任何结果,请更换查询词试试~
或者试试 Google 站内搜索:site:{{$.Domain}} {{$.Word}}
{{end}}{{end}}{{else}}
热搜词:{{range .HotWords}}{{.}}{{end}}

支持的搜索格式:

  1. 输入关键词全文搜索:搭建docker registry
  2. 指定时间段搜索:date:2015date:2016-10
  3. 指定标签搜索:tag:性能优化tag:HTTPS
  4. 组合搜索:date:2015 tag:HTTP 优化
{{end}}
{{if or .Prev .Next}}{{end}}
{{end}}