-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFolderComparer.cs
More file actions
217 lines (182 loc) · 8.39 KB
/
FolderComparer.cs
File metadata and controls
217 lines (182 loc) · 8.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
using K4os.Hash.xxHash;
using MediaManager.Logging;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;
namespace MediaManager
{
internal class FolderComparer(ILogger logger)
{
private readonly ILogger log = logger ?? throw new ArgumentNullException(nameof(logger));
private string _sourceFolderA = "";
private string _sourceFolderB = "";
private string _destinationFolder = "";
private string _labelA = "A";
private string _labelB = "B";
private string _labelResults = "Results";
public void Run()
{
string destinationFolder = @"C:\Users\User\Downloads";
string folderOne = @"C:\Users\User\Downloads\test folder one";
string folderTwo = @"C:\Users\User\Downloads\test folder two";
_destinationFolder = destinationFolder;
_sourceFolderA = folderOne;
_sourceFolderB = folderTwo;
CompareFolders();
}
// compute file SHA256 - ~0.5 GB/s.
// Collision risk is negligible. Only a concern if two different files in the same folder produce the same hash (i.e. duplicate file with different name).
private string ComputeHashSHA256(string filePath)
{
using FileStream stream = File.OpenRead(filePath);
using SHA256 sha = SHA256.Create();
return BitConverter.ToString(sha.ComputeHash(stream)).Replace("-", "");
}
// compute file XXH64 - ~19.4 GB/s.
private string ComputeHashXXH64(string filePath)
{
byte[] data = File.ReadAllBytes(filePath); // Loads entire file into memory - problematic for large files
return XXH64.DigestOf(data).ToString("x16");
}
// start folder comparison
public void CompareFolders()
{
if (!Directory.Exists(_sourceFolderA) || !Directory.Exists(_sourceFolderB))
{
throw new DirectoryNotFoundException("One of the folders does not exist.");
}
CompareFoldersRecursive(_sourceFolderA, _sourceFolderB, _sourceFolderA, _sourceFolderB);
}
// folder comparison recursive
private void CompareFoldersRecursive(string currentFolder1, string currentFolder2, string root1, string root2)
{
log.Log($"Hashing folder1: {currentFolder1}", LogLevel.Verbose);
Dictionary<string, string> folder1Hashes = BuildFileHashes(currentFolder1);
log.Log($"Hashing folder2: {currentFolder2}", LogLevel.Verbose);
Dictionary<string, string> folder2Hashes = BuildFileHashes(currentFolder2);
// Compare both directions and separately to detect uniques in both directions
CompareFileSets(folder1Hashes, folder2Hashes, root1, _labelA, $"MediaManager_Unique_{_labelA}", $"MediaManager_{_labelResults}", true);
CompareFileSets(folder2Hashes, folder1Hashes, root2, _labelB, $"MediaManager_Unique_{_labelB}", null, false);
// Recurse into subfolders as before
string[] dirs1 = Directory.GetDirectories(currentFolder1);
string[] dirs2 = Directory.GetDirectories(currentFolder2);
var allDirs = new HashSet<string>(dirs1.Select(d => Path.GetFileName(d)));
allDirs.UnionWith(dirs2.Select(d => Path.GetFileName(d)));
foreach (var dirName in allDirs)
{
string sub1 = Path.Combine(currentFolder1, dirName);
string sub2 = Path.Combine(currentFolder2, dirName);
// If folder missing in one side, log it
if (!Directory.Exists(sub1))
{
// Folder only in B → copy to Unique_B
string relativePath = Path.GetRelativePath(root2, sub2);
string destPath = Path.Combine(_destinationFolder, $"MediaManager_Unique_{_labelB}", relativePath);
Directory.CreateDirectory(destPath);
CopyDirectory(sub2, destPath);
log.Log($"Folder missing in Folder1: {sub2}. Copied to Unique_{_labelB}.", LogLevel.Info);
continue;
}
if (!Directory.Exists(sub2))
{
// Folder only in A → copy to Unique_A
string relativePath = Path.GetRelativePath(root1, sub1);
string destPath = Path.Combine(_destinationFolder, $"MediaManager_Unique_{_labelA}", relativePath);
Directory.CreateDirectory(destPath);
CopyDirectory(sub1, destPath);
log.Log($"Folder missing in Folder2: {sub1}. Copied to Unique_{_labelA}.", LogLevel.Info);
continue;
}
CompareFoldersRecursive(sub1, sub2, root1, root2);
}
}
//
// Helpers
//
// key = file hash, value = file path
private Dictionary<string, string> BuildFileHashes(string folder)
{
string[] files = Directory.GetFiles(folder);
Dictionary<string, string> map = new Dictionary<string, string>();
// loop through each file, compute hash and save with file path
// if duplicate hash appears, log and discard
foreach (string f in files)
{
string hash = ComputeHashXXH64(f);
if (!map.ContainsKey(hash))
{
map[hash] = f; // keep first occurrence only
}
else
{
// optional: log duplicate and skip
log.Log($"Duplicate in the same folder detected: \"{f}\" discarded. Keeping the first file: \"{map[hash]}\"", LogLevel.Error);
}
}
return map;
}
private void CompareFileSets(
Dictionary<string, string> sourceHashes,
Dictionary<string, string> targetHashes,
string sourceRoot,
string label,
string uniqueFolder,
string? duplicateFolder,
bool checkDuplicates)
{
foreach (var kvp in sourceHashes)
{
string relativePath = Path.GetRelativePath(sourceRoot, kvp.Value);
if (!targetHashes.TryGetValue(kvp.Key, out string? matchingPath))
{
log.Log($"Unique in {label}: {relativePath}", LogLevel.Info);
CopyWithStructure(kvp.Value, Path.Combine(_destinationFolder, uniqueFolder), relativePath);
}
else if (checkDuplicates && duplicateFolder != null)
{
log.Log($"Duplicate found: {relativePath}", LogLevel.Verbose);
CopyWithStructure(kvp.Value, Path.Combine(_destinationFolder, duplicateFolder), relativePath);
}
}
}
private void CopyWithStructure(string sourcePath, string destRoot, string relativePath)
{
string destPath = Path.Combine(destRoot, relativePath);
Directory.CreateDirectory(Path.GetDirectoryName(destPath)!);
try
{
File.Copy(sourcePath, destPath, true);
}
catch (Exception ex)
{
log.Log($"Failed to copy \"{sourcePath}\" → \"{destPath}\": {ex.Message}", LogLevel.Error);
}
}
// to copy entire directory recursively
private void CopyDirectory(string sourceDir, string destDir)
{
foreach (string dir in Directory.GetDirectories(sourceDir, "*", SearchOption.AllDirectories))
{
Directory.CreateDirectory(dir.Replace(sourceDir, destDir));
}
foreach (string file in Directory.GetFiles(sourceDir, "*", SearchOption.AllDirectories))
{
string destFile = file.Replace(sourceDir, destDir);
Directory.CreateDirectory(Path.GetDirectoryName(destFile)!);
try
{
File.Copy(file, destFile, true);
}
catch (Exception ex)
{
log.Log($"Failed to copy \"{file}\" → \"{destFile}\": {ex.Message}", LogLevel.Error);
}
}
}
}
}