177 lines
4.4 KiB
Go
177 lines
4.4 KiB
Go
package hyper
|
|
|
|
// Hypercube is represented by a slice of its coordinates.
|
|
type Cube []int
|
|
type Cubes []Cube
|
|
|
|
// Parameters of space discretization.
|
|
type Params struct {
|
|
// Value limits per dimension. For example 0, 255 for pixel values.
|
|
Min, Max float64
|
|
// Uncertainty interval expressed as a fraction of bucketWidth
|
|
// (for example 0.25 for eps = 1/4 of bucketWidth).
|
|
EpsPercent float64
|
|
// Number of buckets per dimension.
|
|
NumBuckets int
|
|
}
|
|
|
|
// CubeSet returns a set of hypercubes, which represent
|
|
// fuzzy discretization of one n-dimensional vector,
|
|
// as described in
|
|
// https://vitali-fedulov.github.io/algorithm-for-hashing-high-dimensional-float-vectors.html
|
|
// One hupercube is defined by bucket numbers in each dimension.
|
|
// min and max are minimum and maximum possible values of
|
|
// the vector components. The assumption is that min and max
|
|
// are the same for all dimensions.
|
|
func CubeSet(vector []float64, params Params) (set Cubes) {
|
|
|
|
if params.EpsPercent >= 0.5 {
|
|
panic(`Error: EpsPercent must be less than 0.5.`)
|
|
}
|
|
|
|
var (
|
|
bC int // Central bucket number.
|
|
bL, bR int // Left and right bucket number.
|
|
setL, setR Cubes // Set clones (for Left and Right).
|
|
branching bool // Branching flag.
|
|
)
|
|
|
|
// Rescaling vector to avoid potential mistakes with
|
|
// divisions and offsets later on.
|
|
rescaled := rescale(vector, params)
|
|
// After the rescale value range of the vector are
|
|
// [0, numBuckets], and not [min, max].
|
|
|
|
// min = 0.0 from now on.
|
|
max := float64(params.NumBuckets)
|
|
|
|
for _, val := range rescaled {
|
|
|
|
branching = false
|
|
|
|
bL = int(val - params.EpsPercent)
|
|
bR = int(val + params.EpsPercent)
|
|
|
|
// Get extreme values out of the way.
|
|
if val-params.EpsPercent <= 0.0 { // This means that val >= 0.
|
|
bC = bR
|
|
goto branchingCheck // No branching.
|
|
}
|
|
|
|
// Get extreme values out of the way.
|
|
if val+params.EpsPercent >= max { // This means that val =< max.
|
|
// Above max = numBuckets.
|
|
bC = bL
|
|
goto branchingCheck // No branching.
|
|
}
|
|
|
|
if bL == bR {
|
|
bC = bL
|
|
goto branchingCheck // No branching.
|
|
} else { // Meaning bL != bR and not any condition above.
|
|
branching = true
|
|
}
|
|
|
|
branchingCheck:
|
|
|
|
if branching {
|
|
|
|
setL = clone(set)
|
|
setR = clone(set)
|
|
|
|
if len(setL) == 0 {
|
|
setL = append(setL, []int{bL})
|
|
} else {
|
|
for i := range setL {
|
|
setL[i] = append(setL[i], bL)
|
|
}
|
|
}
|
|
|
|
if len(setR) == 0 {
|
|
setR = append(setR, []int{bR})
|
|
} else {
|
|
for i := range setR {
|
|
setR[i] = append(setR[i], bR)
|
|
}
|
|
}
|
|
|
|
set = append(setL, setR...)
|
|
|
|
} else { // No branching.
|
|
if len(set) == 0 {
|
|
set = append(set, []int{bC})
|
|
} else {
|
|
for i := range set {
|
|
set[i] = append(set[i], bC)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Real use case verification that branching works correctly
|
|
// and no buckets are lost for a very large number of vectors.
|
|
// TODO: Remove once tested.
|
|
for i := 0; i < len(set); i++ {
|
|
if len(set[i]) != len(vector) {
|
|
panic(`Number of hypercube coordinates must equal
|
|
to len(vector).`)
|
|
}
|
|
}
|
|
|
|
return set
|
|
}
|
|
|
|
// CentralCube returns the hypercube containing the vector end.
|
|
// Arguments are the same as for the CubeSet function.
|
|
func CentralCube(vector []float64, params Params) (central Cube) {
|
|
|
|
if params.EpsPercent >= 0.5 {
|
|
panic(`Error: EpsPercent must be less than 0.5.`)
|
|
}
|
|
|
|
var bC int // Central bucket numbers.
|
|
|
|
// Rescaling vector to avoid potential mistakes with
|
|
// divisions and offsets later on.
|
|
rescaled := rescale(vector, params)
|
|
// After the rescale value range of the vector are
|
|
// [0, numBuckets], and not [min, max].
|
|
|
|
// min = 0.0 from now on.
|
|
max := float64(params.NumBuckets)
|
|
|
|
for _, val := range rescaled {
|
|
bC = int(val)
|
|
if val-params.EpsPercent <= 0.0 { // This means that val >= 0.
|
|
bC = int(val + params.EpsPercent)
|
|
}
|
|
if val+params.EpsPercent >= max { // Meaning val =< max.
|
|
bC = int(val - params.EpsPercent)
|
|
}
|
|
central = append(central, bC)
|
|
}
|
|
return central
|
|
}
|
|
|
|
// rescale is a helper function to offset and rescale all values
|
|
// to [0, numBuckets] range.
|
|
func rescale(vector []float64, params Params) []float64 {
|
|
rescaled := make([]float64, len(vector))
|
|
amp := params.Max - params.Min
|
|
for i := range vector {
|
|
// Offset to zero and rescale to [0, numBuckets] range.
|
|
rescaled[i] =
|
|
(vector[i] - params.Min) * float64(params.NumBuckets) / amp
|
|
}
|
|
return rescaled
|
|
}
|
|
|
|
// clone makes an unlinked copy of a 2D slice.
|
|
func clone(src Cubes) (dst Cubes) {
|
|
dst = make(Cubes, len(src))
|
|
for i := range src {
|
|
dst[i] = append(Cube{}, src[i]...)
|
|
}
|
|
return dst
|
|
}
|