Duplicate file finder (logging only)

C Sharp - C# (programming language)
Post Reply
Posts: 94
Joined: Sun Mar 01, 2015 4:36 am

Duplicate file finder (logging only)

Post by dzcadii » Sun Mar 01, 2015 10:52 pm

I've been searching the internet for a program or script that would find duplicate files on my servers. I had no luck (minus a few) so I came up with this simple console application in C#.
Like I said this is a simple program and use it at your own risk. It does not move, rename, or delete files. It simply scans for them and logs.

System requirements:
  1. Windows XP and above
  2. .NET framework 4 or above
  3. At least 1GB in RAM
  4. Microsoft Visual Studio (or express C# version) version 10 or above (version 2008 does not support .NET 4 and above )
What does it do:
  1. Creates 2 log directories and 2 log files. 1 for the total list of files found and 1 for just the file name and MD5 hash of the file
  2. Scans either all directories or just one (see the notes for options)

  1. You will have to edit the target directory and target log log directory
  2. If you look in the code at about line 67 - 69 you can comment out and uncomment 2 lines to to get the script to run through all directories and sub directories or just one specific one.
  3. The solution files are not provided. You can simply create your own console application solution and copy and paste the code below into it.
This program is provided without warrant and you accept all responsibility!

Names and NameSpace:
  1. File Name: Program.cs
  2. NameSpace: DupeFinder

Code: Select all

//The purpose of this program is to run through a single directory or all and log path, filename, md5 hash, and file size
//We then look for duplicate files based from filename and md5 hash of the file
//::::Still need to decide what to do once we find duplicates::::
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Security.Cryptography;
using System.Text.RegularExpressions;
using System.Diagnostics;
using System.Threading;
//using System.Net.Mail;

namespace DupeFinder
    class Program
        static void Main(string[] args)
            Stopwatch stopwatch = new Stopwatch();
            //Current working directory (future use)
            //string cwdir = Directory.GetCurrentDirectory();
            //Target directory to search :: Long path (used for development)
            string targetdir = @"U:\inetpub\wwwroot";
            //Log file directory
            string logdir = @"U:\inetpub\DupeFinderLogs";
            //Hash table directory path
            string hashdir = logdir + @"\FileHashTableLog";
            //Log file path
            string logfile = logdir + @"\" + string.Format("{0:MM_dd_yyy_hh_mm_ss_tt}", DateTime.Now) + "_DupeFinderLog.log";
            //Hash table log file path
            string hashtablefile = hashdir + @"\" + string.Format("{0:MM_dd_yyy_hh_mm_ss_tt}", DateTime.Now) + "_DupeFinderHashTableLog.log";

            //This is where we start doing the work
            //Check for log dir and file etc.
            CreateLogDirAndFile(logdir, hashdir, logfile, hashtablefile);

            //Crawl the dir(s)
            DirCrawler(targetdir, logfile, hashtablefile);

            TimeSpan ts = stopwatch.Elapsed;
            string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);

            //If we make it here then everything worked out
            Console.WriteLine("All operations have completed in " + elapsedTime + "!\r\nPress Enter to exit!");


        //Crawl the dirs and look for files
        static void DirCrawler(string targetdir, string logfile, string hashtablefile)
            // string cwd = Directory.GetCurrentDirectory();
            Console.WriteLine("This will show all files in\r\n" + targetdir + "\r\nand log to\r\n" + logfile + " and " + hashtablefile + "\r\nPress Enter to begin");
            //Counter for found files
            int i = 0;
            //Loop through the dir and read the files
            using (var writer = new StreamWriter(logfile))
                //Runs through all directories and subdirectories
                foreach (string file in Directory.GetFiles(targetdir, "*.*", SearchOption.AllDirectories))
                //Run ONLY through this directory
                //foreach (string file in Directory.GetFiles(targetdir))
                    //Get MD5 of file
                    string m5 = GetMD5(file);
                    //File size (not really using it right now but there)
                    FileInfo fInfo = new FileInfo(file);
                    writer.Write("<|>" + Path.GetFileName(file) + "::<>::" + m5 + "<|>::::" + file + "::<>::" + fInfo.Length + "\r\n");
                    //Write to console for debug
                    Console.WriteLine(Path.GetFileName(file) + "::<>::" + m5 + "::::<|>" + file + "<|>" + fInfo.Length + "\r\n");

            //Stop after everyting and wait for user input to end
            Console.WriteLine("\r\n\r\n I have completed logging to " + logfile + "!\r\nWith " + i + " files found!\r\nPress enter to search for duplicates!");
            //Find duplicates from logfile
            FindDupes(logfile, hashtablefile);

        private static string GetMD5(string target)
            System.IO.FileStream fs = new FileStream(target, FileMode.Open, FileAccess.Read);
            System.Security.Cryptography.MD5 md5 = System.Security.Cryptography.MD5.Create();
            byte[] bytes = md5.ComputeHash(fs);
            StringBuilder sBuilder = new StringBuilder();
            for (int i = 0; i < bytes.Length; i++)
            return sBuilder.ToString();

        //Open log file and find all dupes then write them at the end of the file
        private static void FindDupes(string logfile, string hashtablefile)
            //Check for the logfile
            if (File.Exists(logfile))
                //Set the delims/pattern
                string pattern = "::::";
                //Create list for filename/hash values only
                List<string> filesFound = new List<string>();
                //Run through the file and read line by line
                foreach (string line in File.ReadAllLines(logfile))
                    //Break the lines into pieces IAW pattern
                    string[] pieces = Regex.Split(line, pattern);
                    //Breaks each piece up and ...
                    foreach (string piece in pieces)
                        //Look for specific text in resulting string (beginning/end of string in this case)
                        bool ins = piece.StartsWith("<|>");
                        if (ins == true)
                            //Remove <|> from beginning of string
                            string remBeg = piece.Remove(0, 3);
                            //Remove <|> from end of string
                            string cleanPiece = remBeg.Remove(remBeg.Length - 3);
                            //Add to list

                //Sort list
                foreach (string fhPair in filesFound)
                    File.AppendAllText(hashtablefile, fhPair + "\r\n");


        //Create the log dirs and files
        private static void CreateLogDirAndFile(string logdir, string hashdir, string logfile, string hashtablefile)
            if (!Directory.Exists(hashdir))
                if (Directory.Exists(hashdir))
                    Console.WriteLine("Log directory and hash directory created:\r\n" + logdir + "\r\n" + hashdir);
            //Check for the log file(s) etc.
            if (!File.Exists(logfile) || !File.Exists(hashtablefile))
                using (FileStream fs = File.Create(logfile))
                    if (File.Exists(logfile))
                        Console.WriteLine("Log file created:\r\n" + logfile + "\r\n");
                using (FileStream fs = File.Create(hashtablefile))
                    if (File.Exists(hashtablefile))
                        Console.WriteLine("Hash Table Log file created:\r\n" + hashtablefile + "\r\n");


Post Reply