improved comments

master
Vitali Fedulov 2021-12-15 05:58:06 +01:00
parent d86797a079
commit fdb7af71e7
5 changed files with 16 additions and 22 deletions

View File

@ -3,7 +3,7 @@ package hyper
// Package hyper allows fast approximate search of nearest // Package hyper allows fast approximate search of nearest
// neighbour vectors in n-dimensional space. // neighbour vectors in n-dimensional space.
// Package functions discretize a vector and generate a set // Package functions discretize a vector and generate a set
// of fuzzy hashes, as described in the following paper: // of fuzzy hashes, as described in the following document:
// https://vitali-fedulov.github.io/algorithm-for-hashing-high-dimensional-float-vectors.html // https://vitali-fedulov.github.io/algorithm-for-hashing-high-dimensional-float-vectors.html
// A typical sequence of functions when using the package is: // A typical sequence of functions when using the package is:

View File

@ -6,7 +6,7 @@ import (
"hash/fnv" "hash/fnv"
) )
// Hash64 can be any function of this kind. // Hash64 can be any function (user defined, for example).
type Hash64 func(buckets []int) uint64 type Hash64 func(buckets []int) uint64
// FVN1a is the default hash in this package. // FVN1a is the default hash in this package.

View File

@ -10,7 +10,7 @@ func TestDefault(t *testing.T) {
hash := FVN1a(buckets) hash := FVN1a(buckets)
want := uint64(13992349377752315208) want := uint64(13992349377752315208)
if hash != want { if hash != want {
t.Errorf(`Got %v, want %v`, hash, want) t.Errorf(`Got %v, want %v.`, hash, want)
} }
} }
@ -27,6 +27,6 @@ func TestHashes64(t *testing.T) {
7065940388079601005, 7065940388079601005,
13953051952027146823} 13953051952027146823}
if !reflect.DeepEqual(hs, want) { if !reflect.DeepEqual(hs, want) {
t.Errorf(`Got %v, want %v`, hs, want) t.Errorf(`Got %v, want %v.`, hs, want)
} }
} }

View File

@ -1,25 +1,19 @@
package hyper package hyper
// Params returns discretization parameters. // Params helps with discretization parameters.
// numBuckets represents number of discretization buckets into // numBuckets is number of buckets per dimension.
// which all values will fall. Ids of those buckets will be used // min and max are value limits per dimension.
// to create hashes. // epsPercent is the uncertainty interval expressed as fraction
// min and max are minimum and maximum possible values // of bucketWidth.
// of discretized variable. // eps is the absolute value of the uncertainty interval epsilon.
// bucketWidth is width of the discretization bucket.
// bucketPct is percentage of bucketWidth to allow for an error
// of discretized variable (a specific value of a discretized
// variable may fall into 2 buckets simultaneosly).
// eps is actual width corresponding to the bucketWidth bucketPct
// on the discretized variable axis.
func Params( func Params(
numBuckets int, min, max, bucketPct float64) (bucketWidth, eps float64) { numBuckets int, min, max, epsPercent float64) (bucketWidth, eps float64) {
if bucketPct >= 0.5 { if epsPercent >= 0.5 {
panic(`Error: bucketPct must be less than 50%. panic(`Error: epsPercent must be less than 50%.
Recommendation: decrease numBuckets instead.`) Recommendation: decrease numBuckets instead.`)
} }
bucketWidth = (max - min) / float64(numBuckets) bucketWidth = (max - min) / float64(numBuckets)
eps = bucketPct * bucketWidth eps = epsPercent * bucketWidth
return bucketWidth, eps return bucketWidth, eps
} }

View File

@ -10,10 +10,10 @@ func TestParams(t *testing.T) {
bucketWidth, eps := Params(numBuckets, min, max, bucketPct) bucketWidth, eps := Params(numBuckets, min, max, bucketPct)
wantBucketWidth, wantEps := 25.5, 6.375 wantBucketWidth, wantEps := 25.5, 6.375
if bucketWidth != wantBucketWidth { if bucketWidth != wantBucketWidth {
t.Errorf(`Got bucketWidth %v, want %v`, bucketWidth, wantBucketWidth) t.Errorf(`Got bucketWidth %v, want %v.`, bucketWidth, wantBucketWidth)
} }
if eps != wantEps { if eps != wantEps {
t.Errorf(`Got eps %v, want %v`, eps, wantEps) t.Errorf(`Got eps %v, want %v.`, eps, wantEps)
} }
} }