mirror of
https://github.com/qwertyforce/scenery.git
synced 2025-05-31 11:42:35 +00:00
53 lines
1.6 KiB
TypeScript
53 lines
1.6 KiB
TypeScript
/* eslint-disable @typescript-eslint/no-var-requires */
|
|
import fs from 'fs'
|
|
import path from 'path'
|
|
import image_ops from "../helpers/image_ops"
|
|
|
|
// const PATH_TO_IMAGES = path.join("..", "..", "..", 'import_from_derpi', 'images')
|
|
const PATH_TO_IMAGES = path.join("D:", "derpi_downloader", 'Downloads', 'derpi2')
|
|
const IMAGES = fs.readdirSync(PATH_TO_IMAGES)
|
|
|
|
|
|
|
|
|
|
function hamming_distance(str1: string, str2: string) {
|
|
// if (str1.length !== str2.length) {
|
|
// console.log("Length must be the same")
|
|
// return
|
|
// }
|
|
let distance = 0;
|
|
for (let i = 0; i < str1.length; i += 1) {
|
|
if (str1[i] !== str2[i]) {
|
|
distance += 1;
|
|
}
|
|
}
|
|
return distance;
|
|
}
|
|
async function find_duplicates() {
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
const hashes: { [key: string]: any } = {};
|
|
for (const image_file_name of IMAGES) {
|
|
// console.log(IMAGES.indexOf(image_file_name))
|
|
|
|
try {
|
|
const phash = await image_ops.get_phash(`${PATH_TO_IMAGES}/${image_file_name}`);
|
|
for (const hash of Object.keys(hashes)) {
|
|
const distance = hamming_distance(hash, phash)
|
|
if (distance < 20) {
|
|
console.log("===========")
|
|
console.log(distance)
|
|
console.log(`${image_file_name} is a duplicate of ${hashes[hash]}`)
|
|
console.log("===========")
|
|
continue
|
|
}
|
|
}
|
|
hashes[phash] = image_file_name
|
|
} catch (err) {
|
|
console.log(err)
|
|
console.log(image_file_name)
|
|
}
|
|
}
|
|
}
|
|
find_duplicates()
|
|
|