Skip to content

Is a Ternary Search Tree really better? #41

@erikdubbelboer

Description

@erikdubbelboer

I'm curious about the choice for a Ternary Search Tree instead of just a normal map. I wrote a quick benchmark an no matter how I configure it the map always uses 90% less memory and is 10% to 500% faster depending on the hit rate. Do you have some benchmark to show why it is better for your case?

package main

import (
	"container/list"
	"math/rand"
	"runtime"
	"testing"
)

// Basic Implementation for Ternary Search Tree (TST)

// tst returns Ternary Search Tree
type tst interface {
	Set(word []byte, value interface{})
	Get(word []byte) interface{}
	GetString(word string) interface{}
}

// Ternary Search Tree node that holds a single character and value if there is
type tstNode struct {
	lower  *tstNode
	higher *tstNode
	equal  *tstNode
	char   byte
	value  interface{}
}

// newTST returns Ternary Search Tree
func newTST() tst {
	return &tstNode{}
}

// Set adds a value to provided key
func (t *tstNode) Set(key []byte, value interface{}) {
	if len(key) < 1 {
		return
	}

	t.insert(t, key, 0, value)
}

// Get gets the value of provided key if it's existing, otherwise returns nil
func (t *tstNode) Get(key []byte) interface{} {
	length := len(key)
	if length < 1 || t == nil {
		return nil
	}
	lastElm := length - 1

	n := t
	idx := 0
	char := key[idx]
	for n != nil {
		if char < n.char {
			n = n.lower
		} else if char > n.char {
			n = n.higher
		} else {
			if idx == lastElm {
				return n.value
			}

			idx++
			n = n.equal
			char = key[idx]
		}
	}
	return nil
}

// Get gets the value of provided key (string) if it's existing, otherwise returns nil
func (t *tstNode) GetString(key string) interface{} {
	return t.Get([]byte(key))
}

// insert is an internal method for inserting a []byte with value in TST
func (t *tstNode) insert(n *tstNode, key []byte, index int, value interface{}) *tstNode {
	char := key[index]
	lastElm := len(key) - 1

	if n == nil {
		n = &tstNode{char: char}
	}

	if char < n.char {
		n.lower = t.insert(n.lower, key, index, value)
	} else if char > n.char {
		n.higher = t.insert(n.higher, key, index, value)
	} else {
		if index == lastElm {
			n.value = value
		} else {
			n.equal = t.insert(n.equal, key, index+1, value)
		}
	}

	return n
}

// RandBytes generates random string from English letters
func RandBytes() []byte {
	const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
	b := make([]byte, rand.Intn(100))
	for i := range b {
		b[i] = letterBytes[rand.Intn(len(letterBytes))]
	}
	return b
}

var (
	keys = make([][]byte, 1000)
	T    tst
	M    = make(map[string]interface{})

	Sink interface{}
)

func init() {
	T = newTST()
	rand.Seed(0)

	for i := 0; i < len(keys); i++ {
		keys[i] = RandBytes()
	}

	rand.Seed(0)
	var b, a runtime.MemStats
	runtime.ReadMemStats(&b)
	for i := 0; i < 500; i++ { // Only fill half of the keys so we get a 50% hit rate.
		T.Set(keys[i], &list.Element{})
	}
	runtime.ReadMemStats(&a)
	println(a.Alloc - b.Alloc)

	rand.Seed(0)
	runtime.ReadMemStats(&b)
	for i := 0; i < 500; i++ { // Only fill half of the keys so we get a 50% hit rate.
		M[string(keys[i])] = &list.Element{}
	}
	runtime.ReadMemStats(&a)
	println(a.Alloc - b.Alloc)
}

func BenchmarkTST(b *testing.B) {
	for n := 0; n < b.N; n++ {
		Sink = T.Get(keys[(n*31)%len(keys)]) // Lookup a semi random key (using rand.Intn is too heavy here compared to the operation we are benchmarking).
	}
}

func BenchmarkMap(b *testing.B) {
	for n := 0; n < b.N; n++ {
		Sink = M[string(keys[(n*31)%len(keys)])] // Lookup a semi random key (using rand.Intn is too heavy here compared to the operation we are benchmarking).
	}
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    questionFurther information is requested

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions