improved comments

master
Vitali Fedulov 2021-12-15 05:58:06 +01:00
parent d86797a079
commit fdb7af71e7
5 changed files with 16 additions and 22 deletions

View File

@ -3,7 +3,7 @@ package hyper
// Package hyper allows fast approximate search of nearest
// neighbour vectors in n-dimensional space.
// Package functions discretize a vector and generate a set
// of fuzzy hashes, as described in the following paper:
// of fuzzy hashes, as described in the following document:
// https://vitali-fedulov.github.io/algorithm-for-hashing-high-dimensional-float-vectors.html
// A typical sequence of functions when using the package is:

View File

@ -6,7 +6,7 @@ import (
"hash/fnv"
)
// Hash64 can be any function of this kind.
// Hash64 can be any function (user defined, for example).
type Hash64 func(buckets []int) uint64
// FVN1a is the default hash in this package.

View File

@ -10,7 +10,7 @@ func TestDefault(t *testing.T) {
hash := FVN1a(buckets)
want := uint64(13992349377752315208)
if hash != want {
t.Errorf(`Got %v, want %v`, hash, want)
t.Errorf(`Got %v, want %v.`, hash, want)
}
}
@ -27,6 +27,6 @@ func TestHashes64(t *testing.T) {
7065940388079601005,
13953051952027146823}
if !reflect.DeepEqual(hs, want) {
t.Errorf(`Got %v, want %v`, hs, want)
t.Errorf(`Got %v, want %v.`, hs, want)
}
}

View File

@ -1,25 +1,19 @@
package hyper
// Params returns discretization parameters.
// numBuckets represents number of discretization buckets into
// which all values will fall. Ids of those buckets will be used
// to create hashes.
// min and max are minimum and maximum possible values
// of discretized variable.
// bucketWidth is width of the discretization bucket.
// bucketPct is percentage of bucketWidth to allow for an error
// of discretized variable (a specific value of a discretized
// variable may fall into 2 buckets simultaneosly).
// eps is actual width corresponding to the bucketWidth bucketPct
// on the discretized variable axis.
// Params helps with discretization parameters.
// numBuckets is number of buckets per dimension.
// min and max are value limits per dimension.
// epsPercent is the uncertainty interval expressed as fraction
// of bucketWidth.
// eps is the absolute value of the uncertainty interval epsilon.
func Params(
numBuckets int, min, max, bucketPct float64) (bucketWidth, eps float64) {
if bucketPct >= 0.5 {
panic(`Error: bucketPct must be less than 50%.
numBuckets int, min, max, epsPercent float64) (bucketWidth, eps float64) {
if epsPercent >= 0.5 {
panic(`Error: epsPercent must be less than 50%.
Recommendation: decrease numBuckets instead.`)
}
bucketWidth = (max - min) / float64(numBuckets)
eps = bucketPct * bucketWidth
eps = epsPercent * bucketWidth
return bucketWidth, eps
}

View File

@ -10,10 +10,10 @@ func TestParams(t *testing.T) {
bucketWidth, eps := Params(numBuckets, min, max, bucketPct)
wantBucketWidth, wantEps := 25.5, 6.375
if bucketWidth != wantBucketWidth {
t.Errorf(`Got bucketWidth %v, want %v`, bucketWidth, wantBucketWidth)
t.Errorf(`Got bucketWidth %v, want %v.`, bucketWidth, wantBucketWidth)
}
if eps != wantEps {
t.Errorf(`Got eps %v, want %v`, eps, wantEps)
t.Errorf(`Got eps %v, want %v.`, eps, wantEps)
}
}