-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfindDups.js
142 lines (125 loc) · 4.75 KB
/
findDups.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
const sharp = require('sharp');
const blockhash = require('blockhash');
const uri2path = require('file-uri-to-path');
const path = require('path');
const fs = require('fs');
const trash = require('trash');
sharp.cache(false);
const SIZE = 256;
var threshold = 1;
var newHashes = true;
var currentFolderHashes = [];
var distances = [];
function IMAGE(path, hash) {
this.path = path;
this.hash = hash;
}
// Receives and deals with messages from main process
process.on('message', (m) => {
switch (m.message) {
case 'go':
threshold = m.threshold;
generateHashes(m.files);
break;
case 'newhashes':
newHashes = true;
break;
case 'delete-selected':
deleteSelected(m.paths);
default:
break;
}
});
// Calculate hamming distance between a and b
function hd(a, b) {
var count = 0;
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) count++;
return count;
}
// Generates hashes for all the files in the folders given and stores the results in currentFolderHashes
async function generateHashes(files) {
if (newHashes) {
process.send( { message: 'display-status', status: 'Generating Hashes...' } );
function fn(f) {
var image = sharp(f);
return image.metadata().then((metadata) => {
return { w: metadata.width, h: metadata.height }
}).then((info) => {
return image.resize(SIZE, SIZE, { interpolator: sharp.interpolator.bilinear })
.raw().toBuffer().then((buff) => { return blockhash.blockhashData({ width: SIZE, height: SIZE, data: buff }, 16, 1) })
}).catch((reason)=>{console.log(reason)})
}
let temp = files.map(fn);
let results = await Promise.all(temp).catch((reason)=>{console.log(reason)});
currentFolderHashes = results.map((value, i) => { return new IMAGE(files[i], value) });
}
calculateDistances()
}
// Runs after generateHashes, analyses currentFolderHashes and makes N^2 / 2 comparisons, storing only the matches
// that have a hamming distance less than 11 in an array of arrays called distances
// [ [distance, i_path, J_path] , ...]
function calculateDistances() {
process.send( { message: 'display-status', status: 'Finding Duplicates...' } );
if (newHashes) {
var len = currentFolderHashes.length;
distances = [];
for (let i = 0; i < len; i++) {
for (let j = i+1; j < len; j++) {
let t = hd(currentFolderHashes[i].hash, currentFolderHashes[j].hash);
if(t < 11) {
distances.push( [t, currentFolderHashes[i].path, currentFolderHashes[j].path] );
}
}
}
newHashes = false;
currentFolderHashes = null;
}
findDups();
}
// Filters the array distances, with the given threshold(strict or relaxed)
// Stores the results in a different format, an array of arrays, with each subarray representing a "group"
// A group is made of similar images, which when displayed becomes a row on the table
// Sends the results to be displayed to the main process, which sends it to the renderer
function findDups() {
// console.log("SET DISTANCE "+threshold)
if (distances.length == 0) {
process.send({message: 'final-results', results: []});
return;
}
var groups = [[]]
var temp = []
var gi = 0;
var current_ipath = distances[0][1];
for (var i = 0; i < distances.length; i++)
{
var i_path = distances[i][1];
if(current_ipath != i_path) {current_ipath = i_path; gi++}
if(distances[i][0] < threshold) {
if (!groups[gi]) { groups[gi] = [] }
j_path = distances[i][2];
if (!groups[gi].includes(i_path) && !temp.includes(i_path)) {
groups[gi].push(i_path);
temp.push(i_path)
}
if (!temp.includes(j_path)) {
groups[gi].push(j_path);
temp.push(j_path)
}
}
}
groups = groups.filter( (n)=> n.length>1 && n.length<5 )
process.send( {message: 'final-results', results: groups} )
}
// Receives an array of paths to delete, deletes them from the computer and
// removes them from the array of stored duplicates (called distances)
function deleteSelected(paths) {
paths = paths.map((value, i) => { return uri2path(value) })
for (let i = 0; i < paths.length; i++) {
try { fs.unlinkSync(paths[i]) } catch(error) {console.log(error)}
}
distances = distances.filter(function(value) {
i_path = path.normalize(value[1]);
j_path = path.normalize(value[2]);
return !paths.includes(i_path) && !paths.includes(j_path)
})
}