improved comments
parent
d86797a079
commit
fdb7af71e7
2
about.go
2
about.go
|
@ -3,7 +3,7 @@ package hyper
|
||||||
// Package hyper allows fast approximate search of nearest
|
// Package hyper allows fast approximate search of nearest
|
||||||
// neighbour vectors in n-dimensional space.
|
// neighbour vectors in n-dimensional space.
|
||||||
// Package functions discretize a vector and generate a set
|
// Package functions discretize a vector and generate a set
|
||||||
// of fuzzy hashes, as described in the following paper:
|
// of fuzzy hashes, as described in the following document:
|
||||||
// https://vitali-fedulov.github.io/algorithm-for-hashing-high-dimensional-float-vectors.html
|
// https://vitali-fedulov.github.io/algorithm-for-hashing-high-dimensional-float-vectors.html
|
||||||
|
|
||||||
// A typical sequence of functions when using the package is:
|
// A typical sequence of functions when using the package is:
|
||||||
|
|
|
@ -6,7 +6,7 @@ import (
|
||||||
"hash/fnv"
|
"hash/fnv"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Hash64 can be any function of this kind.
|
// Hash64 can be any function (user defined, for example).
|
||||||
type Hash64 func(buckets []int) uint64
|
type Hash64 func(buckets []int) uint64
|
||||||
|
|
||||||
// FVN1a is the default hash in this package.
|
// FVN1a is the default hash in this package.
|
||||||
|
|
|
@ -10,7 +10,7 @@ func TestDefault(t *testing.T) {
|
||||||
hash := FVN1a(buckets)
|
hash := FVN1a(buckets)
|
||||||
want := uint64(13992349377752315208)
|
want := uint64(13992349377752315208)
|
||||||
if hash != want {
|
if hash != want {
|
||||||
t.Errorf(`Got %v, want %v`, hash, want)
|
t.Errorf(`Got %v, want %v.`, hash, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,6 +27,6 @@ func TestHashes64(t *testing.T) {
|
||||||
7065940388079601005,
|
7065940388079601005,
|
||||||
13953051952027146823}
|
13953051952027146823}
|
||||||
if !reflect.DeepEqual(hs, want) {
|
if !reflect.DeepEqual(hs, want) {
|
||||||
t.Errorf(`Got %v, want %v`, hs, want)
|
t.Errorf(`Got %v, want %v.`, hs, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,25 +1,19 @@
|
||||||
package hyper
|
package hyper
|
||||||
|
|
||||||
// Params returns discretization parameters.
|
// Params helps with discretization parameters.
|
||||||
// numBuckets represents number of discretization buckets into
|
// numBuckets is number of buckets per dimension.
|
||||||
// which all values will fall. Ids of those buckets will be used
|
// min and max are value limits per dimension.
|
||||||
// to create hashes.
|
// epsPercent is the uncertainty interval expressed as fraction
|
||||||
// min and max are minimum and maximum possible values
|
// of bucketWidth.
|
||||||
// of discretized variable.
|
// eps is the absolute value of the uncertainty interval epsilon.
|
||||||
// bucketWidth is width of the discretization bucket.
|
|
||||||
// bucketPct is percentage of bucketWidth to allow for an error
|
|
||||||
// of discretized variable (a specific value of a discretized
|
|
||||||
// variable may fall into 2 buckets simultaneosly).
|
|
||||||
// eps is actual width corresponding to the bucketWidth bucketPct
|
|
||||||
// on the discretized variable axis.
|
|
||||||
func Params(
|
func Params(
|
||||||
numBuckets int, min, max, bucketPct float64) (bucketWidth, eps float64) {
|
numBuckets int, min, max, epsPercent float64) (bucketWidth, eps float64) {
|
||||||
if bucketPct >= 0.5 {
|
if epsPercent >= 0.5 {
|
||||||
panic(`Error: bucketPct must be less than 50%.
|
panic(`Error: epsPercent must be less than 50%.
|
||||||
Recommendation: decrease numBuckets instead.`)
|
Recommendation: decrease numBuckets instead.`)
|
||||||
}
|
}
|
||||||
bucketWidth = (max - min) / float64(numBuckets)
|
bucketWidth = (max - min) / float64(numBuckets)
|
||||||
eps = bucketPct * bucketWidth
|
eps = epsPercent * bucketWidth
|
||||||
return bucketWidth, eps
|
return bucketWidth, eps
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,10 +10,10 @@ func TestParams(t *testing.T) {
|
||||||
bucketWidth, eps := Params(numBuckets, min, max, bucketPct)
|
bucketWidth, eps := Params(numBuckets, min, max, bucketPct)
|
||||||
wantBucketWidth, wantEps := 25.5, 6.375
|
wantBucketWidth, wantEps := 25.5, 6.375
|
||||||
if bucketWidth != wantBucketWidth {
|
if bucketWidth != wantBucketWidth {
|
||||||
t.Errorf(`Got bucketWidth %v, want %v`, bucketWidth, wantBucketWidth)
|
t.Errorf(`Got bucketWidth %v, want %v.`, bucketWidth, wantBucketWidth)
|
||||||
}
|
}
|
||||||
if eps != wantEps {
|
if eps != wantEps {
|
||||||
t.Errorf(`Got eps %v, want %v`, eps, wantEps)
|
t.Errorf(`Got eps %v, want %v.`, eps, wantEps)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue