From 71ec1811e1e372a11ba72b74a9c91e8fed7600e1 Mon Sep 17 00:00:00 2001 From: Zheng Kai Date: Tue, 1 Aug 2023 15:08:01 +0800 Subject: [PATCH] limit --- misc/docker/Dockerfile | 2 -- misc/test/chat.sh | 4 +-- misc/test/direct.sh | 14 +++++++++ misc/test/local-chat.sh | 3 ++ misc/test/moderations.sh | 9 ++++++ server/src/core/fetch.go | 3 ++ server/src/core/req.go | 4 +++ server/src/go.mod | 8 +++--- server/src/go.sum | 17 +++++------ server/src/metrics/init.go | 3 ++ server/src/metrics/limit.go | 57 +++++++++++++++++++++++++++++++++++++ server/src/project.go | 2 +- server/src/web/server.go | 1 + 13 files changed, 110 insertions(+), 17 deletions(-) create mode 100755 misc/test/direct.sh create mode 100755 misc/test/local-chat.sh create mode 100755 misc/test/moderations.sh create mode 100644 server/src/metrics/limit.go diff --git a/misc/docker/Dockerfile b/misc/docker/Dockerfile index dfe5198..fc7d465 100644 --- a/misc/docker/Dockerfile +++ b/misc/docker/Dockerfile @@ -21,8 +21,6 @@ COPY --from=builder /project/server/dist/prod/orca-server-next /orca-server COPY ./misc/docker/http-ca.crt /etc/ssl/certs/elastic-http-ca.crt -RUN apk add --no-cache gzip brotli - ENV TZ="Asia/Shanghai" ENV ORCA_WEB=":80" diff --git a/misc/test/chat.sh b/misc/test/chat.sh index d851ee8..82568ba 100755 --- a/misc/test/chat.sh +++ b/misc/test/chat.sh @@ -2,9 +2,9 @@ BASE="${OPENAI_API_BASE:-https://api.openai.com/v1}" -curl "${BASE}/chat/completions" \ +curl -v "${BASE}/chat/completions" \ -H "Content-Type: application/json" \ - -H "Authorization: Bearer ${OPENAI_API_KEY}" \ + -H "Authorization: Bearer ${OPENAI_API_KEY} no-cache" \ -d '{ "model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "Hello!"}] diff --git a/misc/test/direct.sh b/misc/test/direct.sh new file mode 100755 index 0000000..dd8a3c7 --- /dev/null +++ b/misc/test/direct.sh @@ -0,0 +1,14 @@ +#!/bin/bash -ex + +OPENAI_API_KEY="sk-rhjeVT1fkcuarBKnQR6ST$(cat ~/.config/openai)" +export OPENAI_API_KEY + +BASE="https://api.openai.com/v1" + +curl -v "${BASE}/chat/completions" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${OPENAI_API_KEY}" \ + -d '{ + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": "Hello!"}] + }' diff --git a/misc/test/local-chat.sh b/misc/test/local-chat.sh new file mode 100755 index 0000000..469050d --- /dev/null +++ b/misc/test/local-chat.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +OPENAI_API_BASE=http://localhost:22035/v1 ./chat.sh diff --git a/misc/test/moderations.sh b/misc/test/moderations.sh new file mode 100755 index 0000000..76633e8 --- /dev/null +++ b/misc/test/moderations.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +BASE="${OPENAI_API_BASE:-https://api.openai.com/v1}" + +curl -v "${BASE}/moderations" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${OPENAI_API_KEY}" \ + -d '{"input": "你媽逼啊"}' \ + | jq . - diff --git a/server/src/core/fetch.go b/server/src/core/fetch.go index cc76c3a..c12a0dc 100644 --- a/server/src/core/fetch.go +++ b/server/src/core/fetch.go @@ -10,6 +10,7 @@ import ( "os" "path" "project/config" + "project/metrics" "project/pb" "project/util" "time" @@ -61,6 +62,8 @@ func (pr *row) fetchRemote() (ab []byte, err error) { rsp.Body.Close() + metrics.Limit(rsp.Header) + if err == nil { e := &pb.OpenAIError{} json.Unmarshal(ab, e) diff --git a/server/src/core/req.go b/server/src/core/req.go index 810d8dd..f73732a 100644 --- a/server/src/core/req.go +++ b/server/src/core/req.go @@ -5,11 +5,15 @@ import ( "net/http" "project/pb" "project/util" + "strings" ) func (c *Core) getAB(p *pb.Req, r *http.Request) (ab []byte, cached bool, pr *row, err error) { canCache := p.Method != http.MethodGet && p.Method != http.MethodDelete + if strings.Contains(r.Header.Get(`Authorization`), `no-cache`) { + canCache = false + } // canCache = false diff --git a/server/src/go.mod b/server/src/go.mod index f202fe9..5192655 100644 --- a/server/src/go.mod +++ b/server/src/go.mod @@ -3,11 +3,11 @@ module project go 1.20 require ( - github.com/elastic/go-elasticsearch/v8 v8.8.1 + github.com/elastic/go-elasticsearch/v8 v8.9.0 github.com/prometheus/client_golang v1.16.0 github.com/zhengkai/life-go v1.0.3 github.com/zhengkai/zog v1.0.3 - github.com/zhengkai/zu v1.0.15 + github.com/zhengkai/zu v1.1.1 google.golang.org/protobuf v1.31.0 ) @@ -19,6 +19,6 @@ require ( github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/prometheus/client_model v0.4.0 // indirect github.com/prometheus/common v0.44.0 // indirect - github.com/prometheus/procfs v0.11.0 // indirect - golang.org/x/sys v0.9.0 // indirect + github.com/prometheus/procfs v0.11.1 // indirect + golang.org/x/sys v0.10.0 // indirect ) diff --git a/server/src/go.sum b/server/src/go.sum index e68ccf7..1939917 100644 --- a/server/src/go.sum +++ b/server/src/go.sum @@ -3,10 +3,11 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/elastic/elastic-transport-go/v8 v8.0.0-20230329154755-1a3c63de0db6/go.mod h1:87Tcz8IVNe6rVSLdBux1o/PEItLtyabHU3naC7IoqKI= github.com/elastic/elastic-transport-go/v8 v8.3.0 h1:DJGxovyQLXGr62e9nDMPSxRyWION0Bh6d9eCFBriiHo= github.com/elastic/elastic-transport-go/v8 v8.3.0/go.mod h1:87Tcz8IVNe6rVSLdBux1o/PEItLtyabHU3naC7IoqKI= -github.com/elastic/go-elasticsearch/v8 v8.8.1 h1:/OiP5Yex40q5eWpzFVQIS8jRE7SaEZrFkG9JbE6TXtY= -github.com/elastic/go-elasticsearch/v8 v8.8.1/go.mod h1:GU1BJHO7WeamP7UhuElYwzzHtvf9SDmeVpSSy9+o6Qg= +github.com/elastic/go-elasticsearch/v8 v8.9.0 h1:8xtmYjUkqtahl50E0Bg/wjKI7K63krJrrLipbNj/fCU= +github.com/elastic/go-elasticsearch/v8 v8.9.0/go.mod h1:NGmpvohKiRHXI0Sw4fuUGn6hYOmAXlyCphKpzVBiqDE= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= @@ -21,17 +22,17 @@ github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUo github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= -github.com/prometheus/procfs v0.11.0 h1:5EAgkfkMl659uZPbe9AS2N68a7Cc1TJbPEuGzFuRbyk= -github.com/prometheus/procfs v0.11.0/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= +github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI= +github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY= github.com/zhengkai/life-go v1.0.3 h1:rzm+Hb8H4He5trWx3lthFEQPf3sHpns0bDZ7vubT6sI= github.com/zhengkai/life-go v1.0.3/go.mod h1:e2RGLfk+uRzjhRrMQash9X4iY3jAuGj99r0qj5JS7m4= github.com/zhengkai/zog v1.0.3 h1:dkJdXJKRjbqqlseFycA1d80AUU6HAZrPe4WplpmwTo4= github.com/zhengkai/zog v1.0.3/go.mod h1:dXbJ0XDMRXQX+XeNuIM9hJy/6OLRNtXHPq/86ll8u6I= -github.com/zhengkai/zu v1.0.15 h1:yS59XBKWLpzgcUuS0VKj2mM45CqKtFScGJ642mmwZOo= -github.com/zhengkai/zu v1.0.15/go.mod h1:JWXGfd+7xrc4gAt65qWco7WtmLqD7CF1TTZxfm+OxgM= +github.com/zhengkai/zu v1.1.1 h1:iN2wqHx5Gc4NGK4CVStFnEXs6DgUjOeW3VsyGS6y0tU= +github.com/zhengkai/zu v1.1.1/go.mod h1:1odPGbjAlQyLjJs0AsgpWizhtS72qWrUFugKsgokBNs= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s= -golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= diff --git a/server/src/metrics/init.go b/server/src/metrics/init.go index 81c62b1..bf62f68 100644 --- a/server/src/metrics/init.go +++ b/server/src/metrics/init.go @@ -18,4 +18,7 @@ func init() { prometheus.MustRegister(rspTokenByIP) prometheus.MustRegister(rspTokenByModel) prometheus.MustRegister(rspTokenByKey) + + prometheus.MustRegister(limitReq) + prometheus.MustRegister(limitToken) } diff --git a/server/src/metrics/limit.go b/server/src/metrics/limit.go new file mode 100644 index 0000000..87c904a --- /dev/null +++ b/server/src/metrics/limit.go @@ -0,0 +1,57 @@ +package metrics + +import ( + "net/http" + "project/zj" + "strconv" + + "github.com/prometheus/client_golang/prometheus" +) + +var ( + limitReq = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: `orca_limit_req_by_model`, + Help: `limit request by model`, + }, + []string{`model`}, + ) + limitToken = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: `orca_limit_token_by_model`, + Help: `limit token by model`, + }, + []string{`model`}, + ) +) + +// Limit ... +func Limit(h http.Header) { + + model := h.Get(`openai-model`) + if model == `` { + return + } + + if h.Get(`x-ratelimit-limit-requests`) != `` { + req := h.Get(`x-ratelimit-remaining-requests`) + limitReq.WithLabelValues(model).Set(strToFloat(req)) + } + if h.Get(`x-ratelimit-limit-tokens`) != `` { + token := h.Get(`x-ratelimit-remaining-tokens`) + limitToken.WithLabelValues(model).Set(strToFloat(token)) + + zj.J(`limit time`, model, h.Get(`x-ratelimit-reset-requests`), h.Get(`x-ratelimit-reset-tokens`)) + } +} + +func strToFloat(s string) float64 { + if s == `` { + return 0 + } + f, err := strconv.Atoi(s) + if err != nil { + return 0 + } + return float64(f) +} diff --git a/server/src/project.go b/server/src/project.go index 791e857..9645f7e 100644 --- a/server/src/project.go +++ b/server/src/project.go @@ -22,7 +22,7 @@ func Start() { if !config.Prod { // es.LastItem() // st.DateHistogram() - go es.Test() + // go es.Test() } go web.Server() diff --git a/server/src/web/server.go b/server/src/web/server.go index 9477910..1b5a89f 100644 --- a/server/src/web/server.go +++ b/server/src/web/server.go @@ -16,6 +16,7 @@ func Server() { mux := http.NewServeMux() mux.Handle(`/_metrics`, promhttp.Handler()) + mux.Handle(`/v1/moderations`, core.NewCore()) mux.Handle(`/v1/completions`, core.NewCore()) mux.Handle(`/v1/chat/completions`, core.NewCore())