diff --git a/about.go b/about.go index 129eb3c..3a663a7 100644 --- a/about.go +++ b/about.go @@ -3,7 +3,7 @@ package hyper // Package hyper allows fast approximate search of nearest // neighbour vectors in n-dimensional space. // Package functions discretize a vector and generate a set -// of fuzzy hashes, as described in the following paper: +// of fuzzy hashes, as described in the following document: // https://vitali-fedulov.github.io/algorithm-for-hashing-high-dimensional-float-vectors.html // A typical sequence of functions when using the package is: diff --git a/hashes.go b/hashes.go index f811be3..3054c4e 100644 --- a/hashes.go +++ b/hashes.go @@ -6,7 +6,7 @@ import ( "hash/fnv" ) -// Hash64 can be any function of this kind. +// Hash64 can be any function (user defined, for example). type Hash64 func(buckets []int) uint64 // FVN1a is the default hash in this package. diff --git a/hashes_test.go b/hashes_test.go index 854fc80..6e78550 100644 --- a/hashes_test.go +++ b/hashes_test.go @@ -10,7 +10,7 @@ func TestDefault(t *testing.T) { hash := FVN1a(buckets) want := uint64(13992349377752315208) if hash != want { - t.Errorf(`Got %v, want %v`, hash, want) + t.Errorf(`Got %v, want %v.`, hash, want) } } @@ -27,6 +27,6 @@ func TestHashes64(t *testing.T) { 7065940388079601005, 13953051952027146823} if !reflect.DeepEqual(hs, want) { - t.Errorf(`Got %v, want %v`, hs, want) + t.Errorf(`Got %v, want %v.`, hs, want) } } diff --git a/hypercubes.go b/hypercubes.go index e5808fc..1268e4d 100644 --- a/hypercubes.go +++ b/hypercubes.go @@ -1,25 +1,19 @@ package hyper -// Params returns discretization parameters. -// numBuckets represents number of discretization buckets into -// which all values will fall. Ids of those buckets will be used -// to create hashes. -// min and max are minimum and maximum possible values -// of discretized variable. -// bucketWidth is width of the discretization bucket. -// bucketPct is percentage of bucketWidth to allow for an error -// of discretized variable (a specific value of a discretized -// variable may fall into 2 buckets simultaneosly). -// eps is actual width corresponding to the bucketWidth bucketPct -// on the discretized variable axis. +// Params helps with discretization parameters. +// numBuckets is number of buckets per dimension. +// min and max are value limits per dimension. +// epsPercent is the uncertainty interval expressed as fraction +// of bucketWidth. +// eps is the absolute value of the uncertainty interval epsilon. func Params( - numBuckets int, min, max, bucketPct float64) (bucketWidth, eps float64) { - if bucketPct >= 0.5 { - panic(`Error: bucketPct must be less than 50%. + numBuckets int, min, max, epsPercent float64) (bucketWidth, eps float64) { + if epsPercent >= 0.5 { + panic(`Error: epsPercent must be less than 50%. Recommendation: decrease numBuckets instead.`) } bucketWidth = (max - min) / float64(numBuckets) - eps = bucketPct * bucketWidth + eps = epsPercent * bucketWidth return bucketWidth, eps } diff --git a/hypercubes_test.go b/hypercubes_test.go index 089724c..559d45e 100644 --- a/hypercubes_test.go +++ b/hypercubes_test.go @@ -10,10 +10,10 @@ func TestParams(t *testing.T) { bucketWidth, eps := Params(numBuckets, min, max, bucketPct) wantBucketWidth, wantEps := 25.5, 6.375 if bucketWidth != wantBucketWidth { - t.Errorf(`Got bucketWidth %v, want %v`, bucketWidth, wantBucketWidth) + t.Errorf(`Got bucketWidth %v, want %v.`, bucketWidth, wantBucketWidth) } if eps != wantEps { - t.Errorf(`Got eps %v, want %v`, eps, wantEps) + t.Errorf(`Got eps %v, want %v.`, eps, wantEps) } }