first commit
commit
c0561abb32
|
@ -0,0 +1,4 @@
|
||||||
|
package hyper
|
||||||
|
|
||||||
|
// Package hyper discretizes n-dimensional space and generates hashes,
|
||||||
|
// so that fast approximate search of nearest points in n-space is possible.
|
|
@ -0,0 +1,100 @@
|
||||||
|
package hyper
|
||||||
|
|
||||||
|
// Params returns discretization parameters.
|
||||||
|
// numBuckets represents number of discretization buckets into which all values
|
||||||
|
// will fall. Ids of those buckets will be used to create hashes.
|
||||||
|
// min and max are minimum and maximum possible values of discretized variable.
|
||||||
|
// bucketWidth is width of the discretization bucket.
|
||||||
|
// bucketPct is percentage of bucketWidth to allow for an error of discretized
|
||||||
|
// variable (a specific value of a discretized variable may fall into 2 buckets
|
||||||
|
// simultaneosly).
|
||||||
|
// eps is actual width corresponding to the bucketWidth bucketPct on the discretized
|
||||||
|
// variable axis.
|
||||||
|
func Params(numBuckets int, min, max, bucketPct float64) (bucketWidth, eps float64) {
|
||||||
|
if bucketPct >= 0.5 {
|
||||||
|
panic("Error: bucketPct must be less than 50%. Recommendation: decrease numBuckets instead.")
|
||||||
|
}
|
||||||
|
bucketWidth = (max - min) / float64(numBuckets)
|
||||||
|
eps = bucketPct * bucketWidth
|
||||||
|
return bucketWidth, eps
|
||||||
|
}
|
||||||
|
|
||||||
|
// Buckets generates a set of slices of all possible bucket ids
|
||||||
|
// as permutations based on n-dimensional space discretization.
|
||||||
|
// point are values for each of those n dimensions.
|
||||||
|
// min and max are minimum and maximum possible values of discretized
|
||||||
|
// point components. The assumption is that min and max are the same for all
|
||||||
|
// dimensions (in the context of the Buckets function).
|
||||||
|
// bucketWidth and eps are defined in the Params function.
|
||||||
|
func Buckets(point []float64, min, max, bucketWidth, eps float64) (tree [][]int) {
|
||||||
|
|
||||||
|
// Bucket ids. Default bucket is b.
|
||||||
|
var (
|
||||||
|
val float64 // Sample value (one axis of n-space).
|
||||||
|
bL, bR int // Left and right bucket ids.
|
||||||
|
treeCopy [][]int // Bucket tree copy.
|
||||||
|
length int
|
||||||
|
)
|
||||||
|
|
||||||
|
// For each component of the point.
|
||||||
|
for k := 0; k < len(point); k++ {
|
||||||
|
val = point[k]
|
||||||
|
|
||||||
|
bL = int((val - eps) / bucketWidth)
|
||||||
|
bR = int((val + eps) / bucketWidth)
|
||||||
|
|
||||||
|
if val-eps < min { // No bucket for smaller than min.
|
||||||
|
bL = bR
|
||||||
|
} else if val+eps > max { // No bucket for larger than max.
|
||||||
|
bR = bL
|
||||||
|
}
|
||||||
|
|
||||||
|
if bL == bR { // No branching.
|
||||||
|
if len(tree) == 0 {
|
||||||
|
tree = append(tree, []int{bL})
|
||||||
|
} else {
|
||||||
|
length = len(tree)
|
||||||
|
for i := 0; i < length; i++ {
|
||||||
|
// Constructing buckets set.
|
||||||
|
tree[i] = append(tree[i], bL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else { // Branching.
|
||||||
|
treeCopy = make([][]int, len(tree))
|
||||||
|
copy(treeCopy, tree)
|
||||||
|
|
||||||
|
if len(tree) == 0 {
|
||||||
|
tree = append(tree, []int{bL})
|
||||||
|
} else {
|
||||||
|
length = len(tree)
|
||||||
|
for i := 0; i < length; i++ {
|
||||||
|
tree[i] = append(tree[i], bL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(treeCopy) == 0 {
|
||||||
|
treeCopy = append(treeCopy, []int{bR})
|
||||||
|
} else {
|
||||||
|
length = len(treeCopy)
|
||||||
|
for i := 0; i < length; i++ {
|
||||||
|
treeCopy[i] = append(treeCopy[i], bR)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tree = append(tree, treeCopy...)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verification that branching works correctly and no buckets are lost.
|
||||||
|
// TODO: Disable once whole package got tested on large image sets.
|
||||||
|
length = len(point)
|
||||||
|
for i := 0; i < len(tree); i++ {
|
||||||
|
if len(tree[i]) != length {
|
||||||
|
panic(`Buckets slice length must be equal to len(point).`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tree
|
||||||
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
package hyper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParams(t *testing.T) {
|
||||||
|
numBuckets, min, max, bucketPct := 10, 0.0, 255.0, 0.25
|
||||||
|
bucketWidth, eps := Params(numBuckets, min, max, bucketPct)
|
||||||
|
wantBucketWidth, wantEps := 25.5, 6.375
|
||||||
|
if bucketWidth != wantBucketWidth {
|
||||||
|
t.Errorf(`Got bucketWidth %v, want %v`, bucketWidth, wantBucketWidth)
|
||||||
|
}
|
||||||
|
if eps != wantEps {
|
||||||
|
t.Errorf(`Got eps %v, want %v`, eps, wantEps)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParamsPanic(t *testing.T) {
|
||||||
|
defer func() { recover() }()
|
||||||
|
// Intentionally forbiden value for bucketPct.
|
||||||
|
numBuckets, min, max, bucketPct := 10, 0.0, 255.0, 0.51
|
||||||
|
_, _ = Params(numBuckets, min, max, bucketPct)
|
||||||
|
// Never reaches here if Params panics.
|
||||||
|
t.Errorf("Params did not panic on bucketPct > 0.5")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuckets(t *testing.T) {
|
||||||
|
numBuckets, min, max, bucketPct := 10, 0.0, 255.0, 0.25
|
||||||
|
values := []float64{25.5, 0.01, 210.3, 93.9, 6.6, 9.1, 254.9}
|
||||||
|
bucketWidth, eps := Params(numBuckets, min, max, bucketPct)
|
||||||
|
got := Buckets(values, min, max, bucketWidth, eps)
|
||||||
|
want := [][]int{{0, 0, 7, 3, 0, 0, 9}, {1, 0, 7, 3, 0, 0, 9},
|
||||||
|
{0, 0, 8, 3, 0, 0, 9}, {1, 0, 8, 3, 0, 0, 9}}
|
||||||
|
if !reflect.DeepEqual(got, want) {
|
||||||
|
t.Errorf(`Got %v, want %v. Number of buckets is %v.`, got, want, numBuckets)
|
||||||
|
}
|
||||||
|
|
||||||
|
values = []float64{0.01, bucketWidth * 2 * 0.999, bucketWidth * 2 * 1.001}
|
||||||
|
got = Buckets(values, min, max, bucketWidth, eps)
|
||||||
|
want = [][]int{{0, 1, 1}, {0, 2, 1}, {0, 1, 2}, {0, 2, 2}}
|
||||||
|
if !reflect.DeepEqual(got, want) {
|
||||||
|
t.Errorf(`Got %v, want %v. Number of buckets is %v.`, got, want, numBuckets)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
package hyper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/gob"
|
||||||
|
"hash/fnv"
|
||||||
|
)
|
||||||
|
|
||||||
|
// For a specific hashing function to be (re)defined.
|
||||||
|
type Hash func(buckets []int) uint64
|
||||||
|
|
||||||
|
// Fnva64 is a specific hash implementation, which returns
|
||||||
|
// a FVN-1a hash for a slice of bucket numbers.
|
||||||
|
func Fnva64(buckets []int) uint64 {
|
||||||
|
var b bytes.Buffer
|
||||||
|
gob.NewEncoder(&b).Encode(buckets)
|
||||||
|
hash := fnv.New64a()
|
||||||
|
hash.Write(b.Bytes())
|
||||||
|
return hash.Sum64()
|
||||||
|
}
|
||||||
|
|
||||||
|
// HashSet returns a slice of hashes for a tree of bucket ids.
|
||||||
|
func HashSet(tree [][]int, hash Hash) (hs []uint64) {
|
||||||
|
for i := 0; i < len(tree); i++ {
|
||||||
|
hs = append(hs, hash(tree[i]))
|
||||||
|
}
|
||||||
|
return hs
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
package hyper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFnva64(t *testing.T) {
|
||||||
|
buckets := []int{5, 59, 255, 9, 7, 12, 22, 31}
|
||||||
|
hash := Fnva64(buckets)
|
||||||
|
want := uint64(13992349377752315208)
|
||||||
|
if hash != want {
|
||||||
|
t.Errorf(`Got %v, want %v`, hash, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHashSet(t *testing.T) {
|
||||||
|
tree := [][]int{
|
||||||
|
{0, 0, 7, 3, 0, 0, 9},
|
||||||
|
{1, 0, 7, 3, 0, 0, 9},
|
||||||
|
{0, 0, 8, 3, 0, 0, 9},
|
||||||
|
{1, 0, 8, 3, 0, 0, 9}}
|
||||||
|
hs := HashSet(tree, Fnva64)
|
||||||
|
want := []uint64{
|
||||||
|
14647827280143437043,
|
||||||
|
17530493565529410009,
|
||||||
|
7065940388079601005,
|
||||||
|
13953051952027146823}
|
||||||
|
if !reflect.DeepEqual(hs, want) {
|
||||||
|
t.Errorf(`Got %v, want %v`, hs, want)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue