commit c0561abb32fdf20cbb8fca3199a8c4384afbf21d
Author: Vitali Fedulov <fedulov.vitali@gmail.com>
Date:   Mon Oct 25 15:50:52 2021 +0200

    first commit

diff --git a/about.go b/about.go
new file mode 100644
index 0000000..fdd2c59
--- /dev/null
+++ b/about.go
@@ -0,0 +1,4 @@
+package hyper
+
+// Package hyper discretizes n-dimensional space and generates hashes,
+// so that fast approximate search of nearest points in n-space is possible.
diff --git a/buckets.go b/buckets.go
new file mode 100644
index 0000000..6229181
--- /dev/null
+++ b/buckets.go
@@ -0,0 +1,100 @@
+package hyper
+
+// Params returns discretization parameters.
+// numBuckets represents number of discretization buckets into which all values
+// will fall. Ids of those buckets will be used to create hashes.
+// min and max are minimum and maximum possible values of discretized variable.
+// bucketWidth is width of the discretization bucket.
+// bucketPct is percentage of bucketWidth to allow for an error of discretized
+// variable (a specific value of a discretized variable may fall into 2 buckets
+// simultaneosly).
+// eps is actual width corresponding to the bucketWidth bucketPct on the discretized
+// variable axis.
+func Params(numBuckets int, min, max, bucketPct float64) (bucketWidth, eps float64) {
+	if bucketPct >= 0.5 {
+		panic("Error: bucketPct must be less than 50%. Recommendation: decrease numBuckets instead.")
+	}
+	bucketWidth = (max - min) / float64(numBuckets)
+	eps = bucketPct * bucketWidth
+	return bucketWidth, eps
+}
+
+// Buckets generates a set of slices of all possible bucket ids
+// as permutations based on n-dimensional space discretization.
+// point are values for each of those n dimensions.
+// min and max are minimum and maximum possible values of discretized
+// point components. The assumption is that min and max are the same for all
+// dimensions (in the context of the Buckets function).
+// bucketWidth and eps are defined in the Params function.
+func Buckets(point []float64, min, max, bucketWidth, eps float64) (tree [][]int) {
+
+	// Bucket ids. Default bucket is b.
+	var (
+		val      float64 // Sample value (one axis of n-space).
+		bL, bR   int     // Left and right bucket ids.
+		treeCopy [][]int // Bucket tree copy.
+		length   int
+	)
+
+	// For each component of the point.
+	for k := 0; k < len(point); k++ {
+		val = point[k]
+
+		bL = int((val - eps) / bucketWidth)
+		bR = int((val + eps) / bucketWidth)
+
+		if val-eps < min { // No bucket for smaller than min.
+			bL = bR
+		} else if val+eps > max { // No bucket for larger than max.
+			bR = bL
+		}
+
+		if bL == bR { // No branching.
+			if len(tree) == 0 {
+				tree = append(tree, []int{bL})
+			} else {
+				length = len(tree)
+				for i := 0; i < length; i++ {
+					// Constructing buckets set.
+					tree[i] = append(tree[i], bL)
+				}
+			}
+
+		} else { // Branching.
+			treeCopy = make([][]int, len(tree))
+			copy(treeCopy, tree)
+
+			if len(tree) == 0 {
+				tree = append(tree, []int{bL})
+			} else {
+				length = len(tree)
+				for i := 0; i < length; i++ {
+					tree[i] = append(tree[i], bL)
+				}
+			}
+
+			if len(treeCopy) == 0 {
+				treeCopy = append(treeCopy, []int{bR})
+			} else {
+				length = len(treeCopy)
+				for i := 0; i < length; i++ {
+					treeCopy[i] = append(treeCopy[i], bR)
+				}
+			}
+
+			tree = append(tree, treeCopy...)
+		}
+
+	}
+
+	// Verification that branching works correctly and no buckets are lost.
+	// TODO: Disable once whole package got tested on large image sets.
+	length = len(point)
+	for i := 0; i < len(tree); i++ {
+		if len(tree[i]) != length {
+			panic(`Buckets slice length must be equal to len(point).`)
+		}
+	}
+
+	return tree
+}
diff --git a/buckets_test.go b/buckets_test.go
new file mode 100644
index 0000000..571ab27
--- /dev/null
+++ b/buckets_test.go
@@ -0,0 +1,46 @@
+package hyper
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestParams(t *testing.T) {
+	numBuckets, min, max, bucketPct := 10, 0.0, 255.0, 0.25
+	bucketWidth, eps := Params(numBuckets, min, max, bucketPct)
+	wantBucketWidth, wantEps := 25.5, 6.375
+	if bucketWidth != wantBucketWidth {
+		t.Errorf(`Got bucketWidth %v, want %v`, bucketWidth, wantBucketWidth)
+	}
+	if eps != wantEps {
+		t.Errorf(`Got eps %v, want %v`, eps, wantEps)
+	}
+}
+
+func TestParamsPanic(t *testing.T) {
+	defer func() { recover() }()
+	// Intentionally forbiden value for bucketPct.
+	numBuckets, min, max, bucketPct := 10, 0.0, 255.0, 0.51
+	_, _ = Params(numBuckets, min, max, bucketPct)
+	// Never reaches here if Params panics.
+	t.Errorf("Params did not panic on bucketPct > 0.5")
+}
+
+func TestBuckets(t *testing.T) {
+	numBuckets, min, max, bucketPct := 10, 0.0, 255.0, 0.25
+	values := []float64{25.5, 0.01, 210.3, 93.9, 6.6, 9.1, 254.9}
+	bucketWidth, eps := Params(numBuckets, min, max, bucketPct)
+	got := Buckets(values, min, max, bucketWidth, eps)
+	want := [][]int{{0, 0, 7, 3, 0, 0, 9}, {1, 0, 7, 3, 0, 0, 9},
+		{0, 0, 8, 3, 0, 0, 9}, {1, 0, 8, 3, 0, 0, 9}}
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf(`Got %v, want %v. Number of buckets is %v.`, got, want, numBuckets)
+	}
+
+	values = []float64{0.01, bucketWidth * 2 * 0.999, bucketWidth * 2 * 1.001}
+	got = Buckets(values, min, max, bucketWidth, eps)
+	want = [][]int{{0, 1, 1}, {0, 2, 1}, {0, 1, 2}, {0, 2, 2}}
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf(`Got %v, want %v. Number of buckets is %v.`, got, want, numBuckets)
+	}
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..97b44e2
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module github.com/vitali-fedulov/hyper
+
+go 1.16
diff --git a/hashes.go b/hashes.go
new file mode 100644
index 0000000..8f72209
--- /dev/null
+++ b/hashes.go
@@ -0,0 +1,28 @@
+package hyper
+
+import (
+	"bytes"
+	"encoding/gob"
+	"hash/fnv"
+)
+
+// For a specific hashing function to be (re)defined.
+type Hash func(buckets []int) uint64
+
+// Fnva64 is a specific hash implementation, which returns
+// a FVN-1a hash for a slice of bucket numbers.
+func Fnva64(buckets []int) uint64 {
+	var b bytes.Buffer
+	gob.NewEncoder(&b).Encode(buckets)
+	hash := fnv.New64a()
+	hash.Write(b.Bytes())
+	return hash.Sum64()
+}
+
+// HashSet returns a slice of hashes for a tree of bucket ids.
+func HashSet(tree [][]int, hash Hash) (hs []uint64) {
+	for i := 0; i < len(tree); i++ {
+		hs = append(hs, hash(tree[i]))
+	}
+	return hs
+}
diff --git a/hashes_test.go b/hashes_test.go
new file mode 100644
index 0000000..e9a0311
--- /dev/null
+++ b/hashes_test.go
@@ -0,0 +1,32 @@
+package hyper
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestFnva64(t *testing.T) {
+	buckets := []int{5, 59, 255, 9, 7, 12, 22, 31}
+	hash := Fnva64(buckets)
+	want := uint64(13992349377752315208)
+	if hash != want {
+		t.Errorf(`Got %v, want %v`, hash, want)
+	}
+}
+
+func TestHashSet(t *testing.T) {
+	tree := [][]int{
+		{0, 0, 7, 3, 0, 0, 9},
+		{1, 0, 7, 3, 0, 0, 9},
+		{0, 0, 8, 3, 0, 0, 9},
+		{1, 0, 8, 3, 0, 0, 9}}
+	hs := HashSet(tree, Fnva64)
+	want := []uint64{
+		14647827280143437043,
+		17530493565529410009,
+		7065940388079601005,
+		13953051952027146823}
+	if !reflect.DeepEqual(hs, want) {
+		t.Errorf(`Got %v, want %v`, hs, want)
+	}
+}