@@ -9,10 +9,12 @@ import (
99 "fmt"
1010 "math"
1111 "sort"
12+ "strings"
1213 "sync"
1314 "time"
1415 "unicode/utf8"
1516
17+ "github.com/cespare/xxhash/v2"
1618 "github.com/go-kit/log"
1719 "github.com/go-kit/log/level"
1820 "github.com/pkg/errors"
@@ -46,6 +48,7 @@ type queryConnMetricLabel string
4648const (
4749 ExternalLabels queryConnMetricLabel = "external_labels"
4850 StoreType queryConnMetricLabel = "store_type"
51+ IPPort queryConnMetricLabel = "ip_port"
4952)
5053
5154type GRPCEndpointSpec struct {
@@ -117,28 +120,41 @@ type EndpointStatus struct {
117120// A Collector is required as we want atomic updates for all 'thanos_store_nodes_grpc_connections' series.
118121// TODO(hitanshu-mehta) Currently,only collecting metrics of storeEndpoints. Make this struct generic.
119122type endpointSetNodeCollector struct {
120- mtx sync.Mutex
121- storeNodes map [string ]map [string ]int
122- storePerExtLset map [string ]int
123+ mtx sync.Mutex
124+ storeNodes endpointStats
123125
124126 logger log.Logger
125127 connectionsDesc * prometheus.Desc
126128 labels []string
129+ labelsMap map [string ]struct {}
130+
131+ hasherPool sync.Pool
127132}
128133
129134func newEndpointSetNodeCollector (logger log.Logger , labels ... string ) * endpointSetNodeCollector {
130135 if len (labels ) == 0 {
131136 labels = []string {string (ExternalLabels ), string (StoreType )}
132137 }
138+
139+ labelsMap := make (map [string ]struct {})
140+ for _ , lbl := range labels {
141+ labelsMap [lbl ] = struct {}{}
142+ }
133143 return & endpointSetNodeCollector {
134144 logger : logger ,
135- storeNodes : map [ string ] map [ string ] int {},
145+ storeNodes : endpointStats {},
136146 connectionsDesc : prometheus .NewDesc (
137147 "thanos_store_nodes_grpc_connections" ,
138148 "Number of gRPC connection to Store APIs. Opened connection means healthy store APIs available for Querier." ,
139149 labels , nil ,
140150 ),
141- labels : labels ,
151+ labels : labels ,
152+ labelsMap : labelsMap ,
153+ hasherPool : sync.Pool {
154+ New : func () any {
155+ return xxhash .New ()
156+ },
157+ },
142158 }
143159}
144160
@@ -155,52 +171,66 @@ func truncateExtLabels(s string, threshold int) string {
155171 }
156172 return s
157173}
158- func (c * endpointSetNodeCollector ) Update (nodes map [string ]map [string ]int ) {
159- storeNodes := make (map [string ]map [string ]int , len (nodes ))
160- storePerExtLset := map [string ]int {}
161-
162- for storeType , occurrencesPerExtLset := range nodes {
163- storeNodes [storeType ] = make (map [string ]int , len (occurrencesPerExtLset ))
164- for externalLabels , occurrences := range occurrencesPerExtLset {
165- externalLabels = truncateExtLabels (externalLabels , externalLabelLimit )
166- storePerExtLset [externalLabels ] += occurrences
167- storeNodes [storeType ][externalLabels ] = occurrences
168- }
169- }
170-
174+ func (c * endpointSetNodeCollector ) Update (stats endpointStats ) {
171175 c .mtx .Lock ()
172176 defer c .mtx .Unlock ()
173- c .storeNodes = storeNodes
174- c .storePerExtLset = storePerExtLset
177+ c .storeNodes = stats
175178}
176179
177180func (c * endpointSetNodeCollector ) Describe (ch chan <- * prometheus.Desc ) {
178181 ch <- c .connectionsDesc
179182}
180183
184+ func (c * endpointSetNodeCollector ) hash (e endpointStat ) uint64 {
185+ h := c .hasherPool .Get ().(* xxhash.Digest )
186+ defer func () {
187+ h .Reset ()
188+ c .hasherPool .Put (h )
189+ }()
190+
191+ if _ , ok := c .labelsMap [string (IPPort )]; ok {
192+ _ , _ = h .Write ([]byte (e .ip ))
193+ }
194+ if _ , ok := c .labelsMap [string (ExternalLabels )]; ok {
195+ _ , _ = h .Write ([]byte (e .extLset ))
196+ }
197+ if _ , ok := c .labelsMap [string (StoreType )]; ok {
198+ _ , _ = h .Write ([]byte (e .component ))
199+ }
200+
201+ return h .Sum64 ()
202+ }
203+
181204func (c * endpointSetNodeCollector ) Collect (ch chan <- prometheus.Metric ) {
182205 c .mtx .Lock ()
183206 defer c .mtx .Unlock ()
184207
185- for k , occurrencesPerExtLset := range c . storeNodes {
186- for externalLabels , occurrences := range occurrencesPerExtLset {
187- // Select only required labels.
188- lbls := [] string {}
189- for _ , lbl := range c . labels {
190- switch lbl {
191- case string ( ExternalLabels ):
192- lbls = append ( lbls , externalLabels )
193- case string ( StoreType ):
194- lbls = append ( lbls , k )
195- }
196- }
197- select {
198- case ch <- prometheus . MustNewConstMetric ( c . connectionsDesc , prometheus . GaugeValue , float64 ( occurrences ), lbls ... ):
199- case <- time . After ( 1 * time . Second ):
200- level . Warn ( c . logger ). Log ( "msg" , "failed to collect endpointset metrics" , "timeout" , 1 * time . Second )
201- return
208+ var occurrences = make ( map [ uint64 ] int )
209+ for _ , e := range c . storeNodes {
210+ h := c . hash ( e )
211+ occurrences [ h ] ++
212+ }
213+
214+ for _ , n := range c . storeNodes {
215+ h := c . hash ( n )
216+ lbls := make ([] string , 0 , len ( c . labels ))
217+ for _ , lbl := range c . labels {
218+ switch lbl {
219+ case string ( ExternalLabels ):
220+ lbls = append ( lbls , n . extLset )
221+ case string ( StoreType ):
222+ lbls = append ( lbls , n . component )
223+ case string ( IPPort ):
224+ lbls = append ( lbls , n . ip )
202225 }
203226 }
227+
228+ select {
229+ case ch <- prometheus .MustNewConstMetric (c .connectionsDesc , prometheus .GaugeValue , float64 (occurrences [h ]), lbls ... ):
230+ case <- time .After (1 * time .Second ):
231+ level .Warn (c .logger ).Log ("msg" , "failed to collect endpointset metrics" , "timeout" , 1 * time .Second )
232+ return
233+ }
204234 }
205235}
206236
@@ -374,9 +404,21 @@ func (e *EndpointSet) Update(ctx context.Context) {
374404 }
375405 level .Debug (e .logger ).Log ("msg" , "updated endpoints" , "activeEndpoints" , len (e .endpoints ))
376406
407+ nodes := make (map [string ]map [string ]int , len (component .All ))
408+ for _ , comp := range component .All {
409+ nodes [comp .String ()] = map [string ]int {}
410+ }
411+
377412 // Update stats.
378413 stats := newEndpointAPIStats ()
379- for addr , er := range e .endpoints {
414+
415+ endpointIPs := make ([]string , 0 , len (e .endpoints ))
416+ for addr := range e .endpoints {
417+ endpointIPs = append (endpointIPs , addr )
418+ }
419+ sort .Strings (endpointIPs )
420+ for _ , addr := range endpointIPs {
421+ er := e .endpoints [addr ]
380422 if ! er .isQueryable () {
381423 continue
382424 }
@@ -385,12 +427,14 @@ func (e *EndpointSet) Update(ctx context.Context) {
385427
386428 // All producers that expose StoreAPI should have unique external labels. Check all which connect to our Querier.
387429 if er .HasStoreAPI () && (er .ComponentType () == component .Sidecar || er .ComponentType () == component .Rule ) &&
388- stats [component .Sidecar .String ()][extLset ]+ stats [component .Rule .String ()][extLset ] > 0 {
430+ nodes [component .Sidecar .String ()][extLset ]+ nodes [component .Rule .String ()][extLset ] > 0 {
389431
390432 level .Warn (e .logger ).Log ("msg" , "found duplicate storeEndpoints producer (sidecar or ruler). This is not advised as it will malform data in in the same bucket" ,
391- "address" , addr , "extLset" , extLset , "duplicates" , fmt .Sprintf ("%v" , stats [component .Sidecar .String ()][extLset ]+ stats [component .Rule .String ()][extLset ]+ 1 ))
433+ "address" , addr , "extLset" , extLset , "duplicates" , fmt .Sprintf ("%v" , nodes [component .Sidecar .String ()][extLset ]+ nodes [component .Rule .String ()][extLset ]+ 1 ))
392434 }
393- stats [er .ComponentType ().String ()][extLset ]++
435+ nodes [er .ComponentType ().String ()][extLset ]++
436+
437+ stats = stats .append (er .addr , extLset , er .ComponentType ().String ())
394438 }
395439
396440 e .endpointsMetric .Update (stats )
@@ -861,12 +905,44 @@ type endpointMetadata struct {
861905 * infopb.InfoResponse
862906}
863907
864- func newEndpointAPIStats () map [string ]map [string ]int {
865- nodes := make (map [string ]map [string ]int , len (component .All ))
866- for _ , comp := range component .All {
867- nodes [comp .String ()] = map [string ]int {}
868- }
869- return nodes
908+ type endpointStat struct {
909+ ip string
910+ extLset string
911+ component string
912+ }
913+
914+ func newEndpointAPIStats () endpointStats {
915+ return []endpointStat {}
916+ }
917+
918+ type endpointStats []endpointStat
919+
920+ func (s * endpointStats ) Sort () endpointStats {
921+ sort .Slice (* s , func (i , j int ) bool {
922+ ipc := strings .Compare ((* s )[i ].ip , (* s )[j ].ip )
923+ if ipc != 0 {
924+ return ipc < 0
925+ }
926+
927+ extLsetc := strings .Compare ((* s )[i ].extLset , (* s )[j ].extLset )
928+ if extLsetc != 0 {
929+ return extLsetc < 0
930+ }
931+
932+ return strings .Compare ((* s )[i ].component , (* s )[j ].component ) < 0
933+ })
934+
935+ return * s
936+ }
937+
938+ func (es * endpointStats ) append (ip , extLset , component string ) endpointStats {
939+ truncatedExtLabels := truncateExtLabels (extLset , externalLabelLimit )
940+
941+ return append (* es , endpointStat {
942+ ip : ip ,
943+ extLset : truncatedExtLabels ,
944+ component : component ,
945+ })
870946}
871947
872948func maxRangeStoreMetadata () * endpointMetadata {
0 commit comments