优化elasticsearch

This commit is contained in:
deepzz0
2016-10-28 01:10:57 +08:00
parent 8b496f42bf
commit 56dcb65ba2
5 changed files with 232 additions and 143 deletions

158
README.md
View File

@@ -353,4 +353,160 @@ after_success:
- docker push registry.cn-hangzhou.aliyuncs.com/deepzz/eiblog
```
2、修改`build_docker.sh`文件中的`domain`为自己仓库地址。执行`./build_docker.sh`
2、修改`build_docker.sh`文件中的`domain`为自己仓库地址。执行`./build_docker.sh`
### 资料备忘
#### 创建mapping
```
mapping := map[string]interface{}{
"mappings": map[string]interface{}{
TYPE: map[string]interface{}{
"properties": map[string]interface{}{
"title": map[string]string{
"type": "string",
"term_vector": "with_positions_offsets",
"analyzer": "ik_syno",
"search_analyzer": "ik_syno",
},
"content": map[string]string{
"type": "string",
"term_vector": "with_positions_offsets",
"analyzer": "ik_syno",
"search_analyzer": "ik_syno",
},
"slug": map[string]string{
"type": "string",
},
"tag": map[string]string{
"type": "string",
"index": "not_analyzed",
},
"date": map[string]string{
"type": "date",
"index": "not_analyzed",
},
},
},
},
}
```
#### DSL高亮查询
```
fehelperFeHelperJSON格式化查看
{"highlight":{"fields":{"content":{},"title":{}},"post_tags":["\u003c/b\u003e"],"pre_tags":["\u003cb\u003e"]},"query":{"dis_max":{"queries":[{"match":{"title":{"boost":4,"minimum_should_match":"50%","query":"天气"}}},{"match":{"content":{"boost":4,"minimum_should_match":"75%","query":"天气"}}},{"match":{"tag":{"boost":2,"minimum_should_match":"100%","query":"天气"}}},{"match":{"slug":{"boost":1,"minimum_should_match":"100%","query":"天气"}}}],"tie_breaker":0.3}},"filter":{"bool":{"must":[{"range":{"date":{"gte":"2016-10","lte": "2016-10||/M","format": "yyyy-MM-dd||yyyy-MM||yyyy"}}},{"term":{"tag":"tag3"}}]}}}
格式化
{
"highlight": {
"fields": {
"content": {},
"title": {}
},
"post_tags": [
""
],
"pre_tags": [
""
]
},
"query": {
"dis_max": {
"queries": [
{
"match": {
"title": {
"boost": 4,
"minimum_should_match": "50%",
"query": "天气"
}
}
},
{
"match": {
"content": {
"boost": 4,
"minimum_should_match": "75%",
"query": "天气"
}
}
},
{
"match": {
"tag": {
"boost": 2,
"minimum_should_match": "100%",
"query": "天气"
}
}
},
{
"match": {
"slug": {
"boost": 1,
"minimum_should_match": "100%",
"query": "天气"
}
}
}
],
"tie_breaker": 0.3
}
},
"filter": {
"bool": {
"must": [
{
"range": {
"date": {
"gte": "2016-10",
"lte": "2016-10||/M",
"format": "yyyy-MM-dd||yyyy-MM||yyyy"
}
}
},
{
"term": {
"tag": "tag3"
}
}
]
}
}
}
```
#### term 查询
```
{
"query": {
"bool": {
"must": [
{
"term": {
"slug": "slug1"
}
},{
"term": {
"tag": "tag1"
}
}
]
}
},
"filter": {
"range": {
"date": {
"gte": "2016-10",
"lte": "2016-10||/M",
"format": "yyyy-MM||yyyy"
}
}
}
}
```

View File

@@ -7,6 +7,8 @@ import (
"fmt"
"io/ioutil"
"net/http"
"regexp"
"strings"
"time"
"github.com/eiblog/eiblog/setting"
@@ -16,6 +18,10 @@ import (
const (
INDEX = "eiblog"
TYPE = "article"
ES_FILTER = `"filter":{"bool":{"must":[%s]}}`
ES_TERM = `{"term":{"%s":"%s"}}`
ES_DATE = `{"range":{"date":{"gte":"%s","lte": "%s","format": "yyyy-MM-dd||yyyy-MM||yyyy"}}}` // 2016-10||/M
)
var es *ElasticService
@@ -26,112 +32,63 @@ func init() {
}
func initIndex() {
mapping := map[string]interface{}{
"mappings": map[string]interface{}{
TYPE: map[string]interface{}{
"properties": map[string]interface{}{
"title": map[string]string{
"type": "string",
"term_vector": "with_positions_offsets",
"analyzer": "ik_syno",
"search_analyzer": "ik_syno",
},
"content": map[string]string{
"type": "string",
"term_vector": "with_positions_offsets",
"analyzer": "ik_syno",
"search_analyzer": "ik_syno",
},
"slug": map[string]string{
"type": "string",
},
"tags": map[string]string{
"type": "string",
"index": "not_analyzed",
},
"create_time": map[string]string{
"type": "date",
"index": "not_analyzed",
},
},
},
},
}
b, _ := json.Marshal(mapping)
err := CreateIndexAndMappings(INDEX, TYPE, b)
mappings := fmt.Sprintf(`{"mappings":{"%s":{"properties":{"content":{"analyzer":"ik_syno","search_analyzer":"ik_syno","term_vector":"with_positions_offsets","type":"string"},"date":{"index":"not_analyzed","type":"date"},"slug":{"type":"string"},"tag":{"index":"not_analyzed","type":"string"},"title":{"analyzer":"ik_syno","search_analyzer":"ik_syno","term_vector":"with_positions_offsets","type":"string"}}}}}`, TYPE)
err := CreateIndexAndMappings(INDEX, TYPE, []byte(mappings))
if err != nil {
logd.Fatal(err)
}
}
func Elasticsearch(kw string, size, from int) *ESSearchResult {
dsl := map[string]interface{}{
"query": map[string]interface{}{
"dis_max": map[string]interface{}{
"queries": []map[string]interface{}{
map[string]interface{}{
"match": map[string]interface{}{
"title": map[string]interface{}{
"query": kw,
"minimum_should_match": "50%",
"boost": 4,
},
},
},
map[string]interface{}{
"match": map[string]interface{}{
"content": map[string]interface{}{
"query": kw,
"minimum_should_match": "75%",
"boost": 4,
},
},
},
map[string]interface{}{
"match": map[string]interface{}{
"tags": map[string]interface{}{
"query": kw,
"minimum_should_match": "100%",
"boost": 2,
},
},
},
map[string]interface{}{
"match": map[string]interface{}{
"slug": map[string]interface{}{
"query": kw,
"minimum_should_match": "100%",
"boost": 1,
},
},
},
},
"tie_breaker": 0.3,
},
},
"highlight": map[string]interface{}{
"pre_tags": []string{"<b>"},
"post_tags": []string{"</b>"},
"fields": map[string]interface{}{
"title": map[string]string{},
"content": map[string]string{
// "fragment_size": 150,
// "number_of_fragments": "3",
},
},
},
func Elasticsearch(qStr string, size, from int) *ESSearchResult {
// 分析查询字符串
reg := regexp.MustCompile(`(tag|slug|date):`)
indexs := reg.FindAllStringIndex(qStr, -1)
length := len(indexs)
var str, kw string
var filter []string
if length == 0 { // 全文搜索
kw = qStr
}
b, _ := json.Marshal(dsl)
docs, err := IndexQueryDSL(INDEX, TYPE, size, from, b)
if err != nil {
logd.Error(err)
return nil
// 字段搜索,检出 全文搜索
for i, index := range indexs {
if i == length-1 {
str = qStr[index[0]:]
if space := strings.Index(str, " "); space != -1 && space < len(str)-1 {
kw = str[space+1:]
str = str[:space]
}
} else {
str = strings.TrimSpace(qStr[index[0]:indexs[i+1][0]])
}
kv := strings.Split(str, ":")
switch kv[0] {
case "slug":
filter = append(filter, fmt.Sprintf(ES_TERM, kv[0], kv[1]))
case "tag":
filter = append(filter, fmt.Sprintf(ES_TERM, kv[0], kv[1]))
case "date":
var date string
switch len(kv[1]) {
case 4:
date = fmt.Sprintf(ES_DATE, kv[1], kv[1]+"||/y")
case 7:
date = fmt.Sprintf(ES_DATE, kv[1], kv[1]+"||/M")
case 10:
date = fmt.Sprintf(ES_DATE, kv[1], kv[1]+"||/d")
default:
break
}
filter = append(filter, date)
}
}
return docs
}
func ElasticsearchSimple(q string, size, from int) *ESSearchResult {
docs, err := IndexQuerySimple(INDEX, TYPE, size, from, q)
// 判断是否为空,选择搜索方式
var dsl string
if kw != "" {
dsl = strings.Replace(strings.Replace(`{"highlight":{"fields":{"content":{},"title":{}},"post_tags":["\u003c/b\u003e"],"pre_tags":["\u003cb\u003e"]},"query":{"dis_max":{"queries":[{"match":{"title":{"boost":4,"minimum_should_match":"50%","query":"$1"}}},{"match":{"content":{"boost":4,"minimum_should_match":"75%","query":"$1"}}},{"match":{"tag":{"boost":2,"minimum_should_match":"100%","query":"$1"}}},{"match":{"slug":{"boost":1,"minimum_should_match":"100%","query":"$1"}}}],"tie_breaker":0.3}},$2}`, "$1", kw, -1), "$2", fmt.Sprintf(ES_FILTER, strings.Join(filter, ",")), -1)
} else {
dsl = fmt.Sprintf("{"+ES_FILTER+"}", strings.Join(filter, ","))
}
docs, err := IndexQueryDSL(INDEX, TYPE, size, from, []byte(dsl))
if err != nil {
logd.Error(err)
return nil
@@ -142,12 +99,12 @@ func ElasticsearchSimple(q string, size, from int) *ESSearchResult {
func ElasticIndex(artc *Article) error {
img := PickFirstImage(artc.Content)
mapping := map[string]interface{}{
"title": artc.Title,
"content": IgnoreHtmlTag(artc.Content),
"slug": artc.Slug,
"tags": artc.Tags,
"img": img,
"create_time": artc.CreateTime,
"title": artc.Title,
"content": IgnoreHtmlTag(artc.Content),
"slug": artc.Slug,
"tag": artc.Tags,
"img": img,
"date": artc.CreateTime,
}
b, _ := json.Marshal(mapping)
return IndexOrUpdateDocument(INDEX, TYPE, artc.ID, b)
@@ -245,9 +202,9 @@ func IndexOrUpdateDocument(index, typ string, id int32, doc []byte) (err error)
}
type ESDeleteDocument struct {
_Index string `json:"_index"`
_Type string `json:"_type"`
_ID string `json:"_id"`
Index string `json:"_index"`
Type string `json:"_type"`
ID string `json:"_id"`
}
type ESDeleteResult struct {
@@ -260,7 +217,7 @@ type ESDeleteResult struct {
func DeleteDocument(index, typ string, ids []string) error {
var buff bytes.Buffer
for _, id := range ids {
dd := &ESDeleteDocument{_Index: index, _Type: typ, _ID: id}
dd := &ESDeleteDocument{Index: index, Type: typ, ID: id}
m := map[string]*ESDeleteDocument{"delete": dd}
b, _ := json.Marshal(m)
buff.Write(b)
@@ -298,11 +255,11 @@ type ESSearchResult struct {
Hits []struct {
ID string `json:"_id"`
Source struct {
Slug string `json:"slug"`
Content string `json:"content"`
CreateTime time.Time `json:"create_time"`
Title string `json:"title"`
Img string `json:"img"`
Slug string `json:"slug"`
Content string `json:"content"`
Date time.Time `json:"date"`
Title string `json:"title"`
Img string `json:"img"`
} `json:"_source"`
Highlight struct {
Title []string `json:"title"`
@@ -312,23 +269,6 @@ type ESSearchResult struct {
} `json:"hits"`
}
func IndexQuerySimple(index, typ string, size, from int, q string) (*ESSearchResult, error) {
req, err := http.NewRequest("GET", es.ParseURL("/%s/%s/_search?size=%d&from=%d&q=%s", index, typ, size, from, q), nil)
if err != nil {
return nil, err
}
data, err := es.Do(req)
if err != nil {
return nil, err
}
result := &ESSearchResult{}
err = json.Unmarshal(data.([]byte), result)
if err != nil {
return nil, err
}
return result, nil
}
func IndexQueryDSL(index, typ string, size, from int, dsl []byte) (*ESSearchResult, error) {
req, err := http.NewRequest("POST", es.ParseURL("/%s/%s/_search?size=%d&from=%d", index, typ, size, from), bytes.NewReader(dsl))
if err != nil {

View File

@@ -8,7 +8,6 @@ import (
"html/template"
"io/ioutil"
"net/http"
"regexp"
"strconv"
"strings"
"time"
@@ -156,7 +155,7 @@ func HandleSearchPage(c *gin.Context) {
h["Path"] = ""
h["CurrentPage"] = "search-post"
q := c.Query("q")
q := strings.TrimSpace(c.Query("q"))
if q != "" {
start, err := strconv.Atoi(c.Query("start"))
if start < 1 || err != nil {
@@ -165,13 +164,7 @@ func HandleSearchPage(c *gin.Context) {
h["Word"] = q
var result *ESSearchResult
vals := c.Request.URL.Query()
reg := regexp.MustCompile(`^[a-z]+:\w+$`)
logd.Debug(reg.MatchString(q))
if reg.MatchString(q) {
result = ElasticsearchSimple(q, setting.Conf.PageNum, start-1)
} else {
result = Elasticsearch(q, setting.Conf.PageNum, start-1)
}
result = Elasticsearch(q, setting.Conf.PageNum, start-1)
if result != nil {
result.Took /= 1000
for i, v := range result.Hits.Hits {

View File

@@ -60,7 +60,7 @@ func IgnoreHtmlTag(src string) string {
src = re.ReplaceAllString(src, "")
//去除换行符
re, _ = regexp.Compile("\\s{1,}")
re, _ = regexp.Compile("\\s{2,}")
return re.ReplaceAllString(src, "")
}

View File

@@ -1 +1 @@
{{define "search"}}<div id="content" class="inner"><article class="post post-search"><h1 class="title">站内搜索</h1><div class="entry-content"><div id="search"><form action="/search.html"><div class="wrapper"><input maxlength="80" placeholder="请输入关键字..." id="keyword" name="q" value="{{.Word}}" type="search" required></div><input class="submit" type="submit" value="搜索"></form></div><div id="searchResult">{{if .Word}}{{with .SearchResult}}{{if gt (.Hits.Hits|len) 0}}<div class="info">本次搜索共找到结果 {{.Hits.Total}} 条 (用时 {{.Took}} 秒)</div>{{range .Hits.Hits}}<div class="item"><div class="title"><a href="/post/{{.Source.Slug}}.html">{{if .Highlight.Title}}{{str2html (join .Highlight.Title "")}}{{else}}{{.Source.Title}}{{end}}</a></div><div class="desc">{{if .Source.Img}}<div class="img"><img data-src="{{.Source.Img}}?imageView2/1/w/216/h/162"></div>{{end}}<div class="summary"><span class="date">{{dateformat .Source.CreateTime "2006-01-02"}}</span> ... {{str2html (join .Highlight.Content "...")}} ...</div></div></div>{{end}}{{else}}<div class="no-result">没有找到任何结果,请更换查询词试试~</div><div class="item"><div class="title">或者试试 Google 站内搜索:<a target="_blank" href="//www.google.com/#q=site:{{$.Domain}} {{$.Word}}">site:{{$.Domain}} {{$.Word}}</a></div></div>{{end}}{{end}}{{else}}<div class="hot-words">热搜词:{{range .HotWords}}<a href="?q={{.}}">{{.}}</a>{{end}}</div><div class="intro"><p>支持的搜索格式:</p><ol><li>输入关键词全文搜索:<a href="?q=HTTP/2 性能优化">HTTP/2 性能优化</a></li><li>指定时间段搜索:<a href="?q=date:2014">date:2014</a><a href="?q=date:2015-05">date:2015-05</a></li><li>指定标签搜索:<a href="?q=tag:性能优化">tag:性能优化</a><a href="?q=tag:HTTPS">tag:HTTPS</a></li><li>组合搜索:<a href="?q=date:2015 tag:HTTP 优化">date:2015 tag:HTTP 优化</a></li></ol></div>{{end}}</div></div></article>{{if or .Prev .Next}}<nav class="page-navi">{{with .Prev}}<a href="?{{html .}}" class="prev">« 上一页</a>{{end}}{{with .Next}}<a href="?{{html .}}" class="next">下一页 »</a>{{end}}</nav>{{end}}</div>{{end}}
{{define "search"}}<div id="content" class="inner"><article class="post post-search"><h1 class="title">站内搜索</h1><div class="entry-content"><div id="search"><form action="/search.html"><div class="wrapper"><input maxlength="80" placeholder="请输入关键字..." id="keyword" name="q" value="{{.Word}}" type="search" required></div><input class="submit" type="submit" value="搜索"></form></div><div id="searchResult">{{if .Word}}{{with .SearchResult}}{{if gt (.Hits.Hits|len) 0}}<div class="info">本次搜索共找到结果 {{.Hits.Total}} 条 (用时 {{.Took}} 秒)</div>{{range .Hits.Hits}}<div class="item"><div class="title"><a href="/post/{{.Source.Slug}}.html">{{if .Highlight.Title}}{{str2html (join .Highlight.Title "")}}{{else}}{{.Source.Title}}{{end}}</a></div><div class="desc">{{if .Source.Img}}<div class="img"><img data-src="{{.Source.Img}}?imageView2/1/w/216/h/162"></div>{{end}}<div class="summary"><span class="date">{{dateformat .Source.Date "2006-01-02"}}</span> ... {{str2html (join .Highlight.Content "...")}} ...</div></div></div>{{end}}{{else}}<div class="no-result">没有找到任何结果,请更换查询词试试~</div><div class="item"><div class="title">或者试试 Google 站内搜索:<a target="_blank" href="//www.google.com/#q=site:{{$.Domain}} {{$.Word}}">site:{{$.Domain}} {{$.Word}}</a></div></div>{{end}}{{end}}{{else}}<div class="hot-words">热搜词:{{range .HotWords}}<a href="?q={{.}}">{{.}}</a>{{end}}</div><div class="intro"><p>支持的搜索格式:</p><ol><li>输入关键词全文搜索:<a href="?q=HTTP/2 性能优化">搭建docker registry</a></li><li>指定时间段搜索:<a href="?q=date:2015">date:2015</a><a href="?q=date:2016-10">date:2016-10</a></li><li>指定标签搜索:<a href="?q=tag:性能优化">tag:性能优化</a><a href="?q=tag:HTTPS">tag:HTTPS</a></li><li>组合搜索:<a href="?q=date:2015 tag:HTTP 优化">date:2015 tag:HTTP 优化</a></li></ol></div>{{end}}</div></div></article>{{if or .Prev .Next}}<nav class="page-navi">{{with .Prev}}<a href="?{{html .}}" class="prev">« 上一页</a>{{end}}{{with .Next}}<a href="?{{html .}}" class="next">下一页 »</a>{{end}}</nav>{{end}}</div>{{end}}