improved comments
parent
d86797a079
commit
fdb7af71e7
2
about.go
2
about.go
|
@ -3,7 +3,7 @@ package hyper
|
|||
// Package hyper allows fast approximate search of nearest
|
||||
// neighbour vectors in n-dimensional space.
|
||||
// Package functions discretize a vector and generate a set
|
||||
// of fuzzy hashes, as described in the following paper:
|
||||
// of fuzzy hashes, as described in the following document:
|
||||
// https://vitali-fedulov.github.io/algorithm-for-hashing-high-dimensional-float-vectors.html
|
||||
|
||||
// A typical sequence of functions when using the package is:
|
||||
|
|
|
@ -6,7 +6,7 @@ import (
|
|||
"hash/fnv"
|
||||
)
|
||||
|
||||
// Hash64 can be any function of this kind.
|
||||
// Hash64 can be any function (user defined, for example).
|
||||
type Hash64 func(buckets []int) uint64
|
||||
|
||||
// FVN1a is the default hash in this package.
|
||||
|
|
|
@ -10,7 +10,7 @@ func TestDefault(t *testing.T) {
|
|||
hash := FVN1a(buckets)
|
||||
want := uint64(13992349377752315208)
|
||||
if hash != want {
|
||||
t.Errorf(`Got %v, want %v`, hash, want)
|
||||
t.Errorf(`Got %v, want %v.`, hash, want)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,6 @@ func TestHashes64(t *testing.T) {
|
|||
7065940388079601005,
|
||||
13953051952027146823}
|
||||
if !reflect.DeepEqual(hs, want) {
|
||||
t.Errorf(`Got %v, want %v`, hs, want)
|
||||
t.Errorf(`Got %v, want %v.`, hs, want)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,25 +1,19 @@
|
|||
package hyper
|
||||
|
||||
// Params returns discretization parameters.
|
||||
// numBuckets represents number of discretization buckets into
|
||||
// which all values will fall. Ids of those buckets will be used
|
||||
// to create hashes.
|
||||
// min and max are minimum and maximum possible values
|
||||
// of discretized variable.
|
||||
// bucketWidth is width of the discretization bucket.
|
||||
// bucketPct is percentage of bucketWidth to allow for an error
|
||||
// of discretized variable (a specific value of a discretized
|
||||
// variable may fall into 2 buckets simultaneosly).
|
||||
// eps is actual width corresponding to the bucketWidth bucketPct
|
||||
// on the discretized variable axis.
|
||||
// Params helps with discretization parameters.
|
||||
// numBuckets is number of buckets per dimension.
|
||||
// min and max are value limits per dimension.
|
||||
// epsPercent is the uncertainty interval expressed as fraction
|
||||
// of bucketWidth.
|
||||
// eps is the absolute value of the uncertainty interval epsilon.
|
||||
func Params(
|
||||
numBuckets int, min, max, bucketPct float64) (bucketWidth, eps float64) {
|
||||
if bucketPct >= 0.5 {
|
||||
panic(`Error: bucketPct must be less than 50%.
|
||||
numBuckets int, min, max, epsPercent float64) (bucketWidth, eps float64) {
|
||||
if epsPercent >= 0.5 {
|
||||
panic(`Error: epsPercent must be less than 50%.
|
||||
Recommendation: decrease numBuckets instead.`)
|
||||
}
|
||||
bucketWidth = (max - min) / float64(numBuckets)
|
||||
eps = bucketPct * bucketWidth
|
||||
eps = epsPercent * bucketWidth
|
||||
return bucketWidth, eps
|
||||
}
|
||||
|
||||
|
|
|
@ -10,10 +10,10 @@ func TestParams(t *testing.T) {
|
|||
bucketWidth, eps := Params(numBuckets, min, max, bucketPct)
|
||||
wantBucketWidth, wantEps := 25.5, 6.375
|
||||
if bucketWidth != wantBucketWidth {
|
||||
t.Errorf(`Got bucketWidth %v, want %v`, bucketWidth, wantBucketWidth)
|
||||
t.Errorf(`Got bucketWidth %v, want %v.`, bucketWidth, wantBucketWidth)
|
||||
}
|
||||
if eps != wantEps {
|
||||
t.Errorf(`Got eps %v, want %v`, eps, wantEps)
|
||||
t.Errorf(`Got eps %v, want %v.`, eps, wantEps)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue