diff --git a/.gitignore b/.gitignore index e69de29..2b412f2 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +dedup diff --git a/README.md b/README.md index a2d59bc..5c3c1f2 100644 --- a/README.md +++ b/README.md @@ -11,20 +11,43 @@ go install github.com/datumbrain/dedup@latest ## Usage ```bash -# Scan current directory +# Scan current directory (shows what would be deleted) dedup # Scan specific directory dedup /path/to/folder dedup "C:\Users\Username\Downloads" + +# Delete duplicates with confirmation +dedup -d /path/to/folder + +# Delete duplicates without confirmation (force) +dedup -d -f /path/to/folder + +# Verbose output with detailed information +dedup -v /path/to/folder + +# Combine flags +dedup -d -f -v ~/Downloads ``` +### Flags + +- **`-d`**: Delete duplicate files (asks for confirmation before deleting) +- **`-f`**: Force deletion without confirmation (**must be combined with `-d`**) +- **`-v`**: Verbose output showing detailed priority information and deletion commands + +**Note**: The `-f` flag only works when combined with `-d`. Using `-f` alone will not delete files. + ## Features - **Smart Detection**: Uses SHA-256 checksums for accurate duplicate detection - **Intelligent Recommendations**: Automatically identifies which files to keep vs delete -- **Platform-Specific Commands**: Generates deletion commands for your OS (Windows/macOS/Linux) -- **Safe by Default**: Only scans files, never deletes anything automatically +- **Simple & Clean UI**: Minimal output by default, verbose mode available with `-v` flag +- **Safe Deletion**: Delete duplicates with `-d` flag, confirmation prompt by default +- **Force Mode**: Skip confirmation with `-f` flag for automated workflows +- **Platform-Specific Commands**: Generates deletion commands in verbose mode for your OS +- **Safe by Default**: Scan-only mode unless `-d` flag is specified - **Non-Recursive**: Only scans the specified folder (doesn't go into subdirectories) ## How It Works @@ -47,38 +70,69 @@ The tool prioritizes files based on common naming patterns: ## Example Output -```raw -============================================================ -DUPLICATE FILES REPORT -============================================================ +### Default Mode (Simple & Clean) -Duplicate Group #1 (Checksum: a665a45920422f9d...) -File Size: 1024 bytes -Files: - āœ“ KEEP: /Users/john/Downloads/invoice.pdf (Priority: 0) - āœ— DELETE: /Users/john/Downloads/invoice (1).pdf (Priority: 1001) +```bash +$ dedup ~/Downloads -============================================================ -DELETION RECOMMENDATIONS -============================================================ +šŸ“ Scanned 15 files in /Users/john/Downloads + +šŸ” Found 3 duplicate group(s) +šŸ’¾ Can free 5.42 MB by deleting 8 file(s) + +šŸ’” Use -d to delete files (with confirmation) +šŸ’” Use -d -f to delete without confirmation +šŸ’” Use -v for detailed output +``` + +### Delete Mode with Confirmation + +```bash +$ dedup -d ~/Downloads -Files recommended for deletion: -1. /Users/john/Downloads/invoice (1).pdf +šŸ“ Scanned 15 files in /Users/john/Downloads -Summary: -- Total duplicate groups: 1 -- Files recommended for deletion: 1 -- Disk space that can be freed: 1.00 MB (1048576 bytes) +šŸ” Found 3 duplicate group(s) +šŸ’¾ Can free 5.42 MB by deleting 8 file(s) + +Files to delete: + 1. invoice (1).pdf + 2. report-2.xlsx + 3. image_copy.jpg + ... + +āš ļø About to delete 8 file(s). Continue? [y/N]: y + +šŸ—‘ļø Deleting files... + +āœ… Deleted 8 file(s), freed 5.42 MB +``` + +### Verbose Mode + +```bash +$ dedup -v ~/Downloads + +Scanning files in: /Users/john/Downloads +Calculating checksums... +Processing: invoice.pdf +Processing: invoice (1).pdf +... ============================================================ -DELETION COMMANDS FOR DARWIN +DUPLICATE FILES REPORT ============================================================ -# Terminal (recommended): -rm "/Users/john/Downloads/invoice (1).pdf" +Duplicate Group #1 (Checksum: a665a45920422f9d...) +File Size: 1024 bytes +Files with priorities: + Priority 0: invoice.pdf + Priority 1001: invoice (1).pdf +Decision: + āœ“ KEEP: /Users/john/Downloads/invoice.pdf (Priority: 0) + āœ— DELETE: /Users/john/Downloads/invoice (1).pdf (Priority: 1001) -# Move to Trash (safer option): -osascript -e "tell application \"Finder\" to delete POSIX file \"/Users/john/Downloads/invoice (1).pdf\"" +... ``` ## Safety Features @@ -103,6 +157,20 @@ cd dedup go build -o dedup ``` +## Testing + +Run the unit tests: + +```bash +go test -v +``` + +Run tests with coverage: + +```bash +go test -v -cover +``` + ## License MIT License - Feel free to use, modify, and distribute. diff --git a/main.go b/main.go index b9c0197..c56fea6 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,9 @@ package main import ( + "bufio" "crypto/sha256" + "flag" "fmt" "io" "os" @@ -231,8 +233,25 @@ func generateDeletionCommands(filesToDelete []*FileInfo) { fmt.Printf("\n• Consider creating a backup of important files first\n") } +// deleteFile deletes a file and returns an error if it fails +func deleteFile(filePath string) error { + return os.Remove(filePath) +} + +// confirmDeletion asks the user to confirm deletion +func confirmDeletion(filesToDelete []*FileInfo) bool { + fmt.Printf("\nāš ļø About to delete %d file(s). Continue? [y/N]: ", len(filesToDelete)) + reader := bufio.NewReader(os.Stdin) + response, err := reader.ReadString('\n') + if err != nil { + return false + } + response = strings.TrimSpace(strings.ToLower(response)) + return response == "y" || response == "yes" +} + // findDuplicates finds all duplicate files in the specified folder -func findDuplicates(folderPath string) error { +func findDuplicates(folderPath string, deleteMode bool, forceDelete bool, verbose bool) error { // Map to store checksum -> list of files with that checksum checksumMap := make(map[string][]*FileInfo) @@ -242,8 +261,10 @@ func findDuplicates(folderPath string) error { return fmt.Errorf("error reading directory: %v", err) } - fmt.Printf("Scanning files in: %s\n", folderPath) - fmt.Println("Calculating checksums...") + if verbose { + fmt.Printf("Scanning files in: %s\n", folderPath) + fmt.Println("Calculating checksums...") + } // Process each file (skip directories) for _, entry := range entries { @@ -253,7 +274,9 @@ func findDuplicates(folderPath string) error { filePath := filepath.Join(folderPath, entry.Name()) - fmt.Printf("Processing: %s\n", entry.Name()) + if verbose { + fmt.Printf("Processing: %s\n", entry.Name()) + } fileInfo, err := getFileInfo(filePath) if err != nil { @@ -266,9 +289,13 @@ func findDuplicates(folderPath string) error { } // Find and display duplicates - fmt.Println("\n" + strings.Repeat("=", 60)) - fmt.Println("DUPLICATE FILES REPORT") - fmt.Println(strings.Repeat("=", 60)) + if !verbose { + fmt.Printf("\nšŸ“ Scanned %d files in %s\n", len(checksumMap), folderPath) + } else { + fmt.Println("\n" + strings.Repeat("=", 60)) + fmt.Println("DUPLICATE FILES REPORT") + fmt.Println(strings.Repeat("=", 60)) + } duplicateGroups := 0 var filesToDelete []*FileInfo @@ -283,24 +310,31 @@ func findDuplicates(folderPath string) error { return files[i].Priority < files[j].Priority }) - fmt.Printf("\nDuplicate Group #%d (Checksum: %s)\n", duplicateGroups, checksum[:16]+"...") - fmt.Printf("File Size: %d bytes\n", files[0].Size) - fmt.Println("Files with priorities:") + if verbose { + fmt.Printf("\nDuplicate Group #%d (Checksum: %s)\n", duplicateGroups, checksum[:16]+"...") + fmt.Printf("File Size: %d bytes\n", files[0].Size) + fmt.Println("Files with priorities:") + + // Show all files with their priorities for debugging + for _, file := range files { + fmt.Printf(" Priority %d: %s\n", file.Priority, filepath.Base(file.Path)) + } - // Show all files with their priorities for debugging - for _, file := range files { - fmt.Printf(" Priority %d: %s\n", file.Priority, filepath.Base(file.Path)) + fmt.Println("Decision:") } - fmt.Println("Decision:") // First file (lowest priority number) should be kept keepFile := files[0] - fmt.Printf(" āœ“ KEEP: %s (Priority: %d)\n", keepFile.Path, keepFile.Priority) + if verbose { + fmt.Printf(" āœ“ KEEP: %s (Priority: %d)\n", keepFile.Path, keepFile.Priority) + } // Rest should be deleted for i := 1; i < len(files); i++ { file := files[i] - fmt.Printf(" āœ— DELETE: %s (Priority: %d)\n", file.Path, file.Priority) + if verbose { + fmt.Printf(" āœ— DELETE: %s (Priority: %d)\n", file.Path, file.Priority) + } filesToDelete = append(filesToDelete, file) totalSizeToSave += file.Size } @@ -308,13 +342,23 @@ func findDuplicates(folderPath string) error { } if duplicateGroups == 0 { - fmt.Println("\nNo duplicate files found!") - } else { + fmt.Println("\nāœ… No duplicate files found!") + return nil + } + + // Summary output + if verbose { fmt.Printf("\n" + strings.Repeat("=", 60)) fmt.Printf("\nDELETION RECOMMENDATIONS") fmt.Printf("\n" + strings.Repeat("=", 60)) + } - if len(filesToDelete) > 0 { + if len(filesToDelete) > 0 { + if !verbose { + fmt.Printf("\nšŸ” Found %d duplicate group(s)\n", duplicateGroups) + fmt.Printf("šŸ’¾ Can free %.2f MB by deleting %d file(s)\n\n", + float64(totalSizeToSave)/(1024*1024), len(filesToDelete)) + } else { fmt.Printf("\nFiles recommended for deletion:\n") for i, file := range filesToDelete { fmt.Printf("%d. %s\n", i+1, file.Path) @@ -325,9 +369,49 @@ func findDuplicates(folderPath string) error { fmt.Printf("- Files recommended for deletion: %d\n", len(filesToDelete)) fmt.Printf("- Disk space that can be freed: %.2f MB (%.0f bytes)\n", float64(totalSizeToSave)/(1024*1024), float64(totalSizeToSave)) + } - // Generate platform-specific deletion commands - generateDeletionCommands(filesToDelete) + // Delete mode + if deleteMode { + // Ask for confirmation unless force flag is set + if !forceDelete { + if !verbose { + fmt.Println("Files to delete:") + for i, file := range filesToDelete { + fmt.Printf(" %d. %s\n", i+1, filepath.Base(file.Path)) + } + } + if !confirmDeletion(filesToDelete) { + fmt.Println("\nāŒ Deletion cancelled.") + return nil + } + } + + // Perform deletion + fmt.Println("\nšŸ—‘ļø Deleting files...") + deleted := 0 + var deletedSize int64 + for _, file := range filesToDelete { + if err := deleteFile(file.Path); err != nil { + fmt.Printf(" āŒ Failed to delete %s: %v\n", filepath.Base(file.Path), err) + } else { + deleted++ + deletedSize += file.Size + if verbose { + fmt.Printf(" āœ“ Deleted: %s\n", file.Path) + } + } + } + fmt.Printf("\nāœ… Deleted %d file(s), freed %.2f MB\n", deleted, float64(deletedSize)/(1024*1024)) + } else { + // Show deletion commands only in verbose mode + if verbose { + generateDeletionCommands(filesToDelete) + } else { + fmt.Println("šŸ’” Use -d to delete files (with confirmation)") + fmt.Println("šŸ’” Use -d -f to delete without confirmation") + fmt.Println("šŸ’” Use -v for detailed output") + } } } @@ -335,10 +419,16 @@ func findDuplicates(folderPath string) error { } func main() { - // Get folder path from command line argument or use current directory + // Define flags + deleteFlag := flag.Bool("d", false, "Delete duplicate files") + forceFlag := flag.Bool("f", false, "Force deletion without confirmation (use with -d)") + verboseFlag := flag.Bool("v", false, "Verbose output with detailed information") + flag.Parse() + + // Get folder path from remaining arguments or use current directory folderPath := "." - if len(os.Args) > 1 { - folderPath = os.Args[1] + if flag.NArg() > 0 { + folderPath = flag.Arg(0) } // Verify the path exists and is a directory @@ -354,7 +444,7 @@ func main() { } // Find duplicates - if err := findDuplicates(folderPath); err != nil { + if err := findDuplicates(folderPath, *deleteFlag, *forceFlag, *verboseFlag); err != nil { fmt.Printf("Error: %v\n", err) os.Exit(1) } diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..c992e61 --- /dev/null +++ b/main_test.go @@ -0,0 +1,262 @@ +package main + +import ( + "os" + "path/filepath" + "testing" +) + +func TestCalculateFilePriority(t *testing.T) { + tests := []struct { + filename string + expected int + }{ + // Original files (priority 0) + {"document.pdf", 0}, + {"image.jpg", 0}, + {"report.xlsx", 0}, + {"file.txt", 0}, + + // Numbered copies (priority 1000+) + {"document (1).pdf", 1001}, + {"document (2).pdf", 1002}, + {"image-1.jpg", 1001}, + {"image-2.jpg", 1002}, + {"file_1.txt", 1001}, + {"file_10.txt", 1010}, + + // Copy indicators (priority 500) + {"document_copy.pdf", 500}, + {"image copy.jpg", 500}, + {"file_backup.txt", 500}, + {"report_duplicate.xlsx", 500}, + {"temp_file.txt", 500}, + + // Download suffixes (priority 300) + {"document_final.pdf", 300}, + {"image_latest.jpg", 300}, + {"report_v2.xlsx", 300}, + {"file_updated.txt", 300}, + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + result := calculateFilePriority(tt.filename) + if result != tt.expected { + t.Errorf("calculateFilePriority(%q) = %d, want %d", tt.filename, result, tt.expected) + } + }) + } +} + +func TestCalculateChecksum(t *testing.T) { + // Create a temporary file + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "test.txt") + content := []byte("Hello, World!") + + if err := os.WriteFile(testFile, content, 0644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + // Calculate checksum + checksum1, err := calculateChecksum(testFile) + if err != nil { + t.Fatalf("calculateChecksum failed: %v", err) + } + + // Verify checksum is not empty + if checksum1 == "" { + t.Error("Checksum should not be empty") + } + + // Verify same content produces same checksum + checksum2, err := calculateChecksum(testFile) + if err != nil { + t.Fatalf("calculateChecksum failed on second call: %v", err) + } + + if checksum1 != checksum2 { + t.Errorf("Same file should produce same checksum: %s != %s", checksum1, checksum2) + } + + // Create another file with different content + testFile2 := filepath.Join(tmpDir, "test2.txt") + content2 := []byte("Different content") + if err := os.WriteFile(testFile2, content2, 0644); err != nil { + t.Fatalf("Failed to create second test file: %v", err) + } + + checksum3, err := calculateChecksum(testFile2) + if err != nil { + t.Fatalf("calculateChecksum failed for second file: %v", err) + } + + if checksum1 == checksum3 { + t.Error("Different files should produce different checksums") + } +} + +func TestGetFileInfo(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "document (1).pdf") + content := []byte("Test content") + + if err := os.WriteFile(testFile, content, 0644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + fileInfo, err := getFileInfo(testFile) + if err != nil { + t.Fatalf("getFileInfo failed: %v", err) + } + + // Verify path + if fileInfo.Path != testFile { + t.Errorf("Path = %s, want %s", fileInfo.Path, testFile) + } + + // Verify size + if fileInfo.Size != int64(len(content)) { + t.Errorf("Size = %d, want %d", fileInfo.Size, len(content)) + } + + // Verify checksum is not empty + if fileInfo.Checksum == "" { + t.Error("Checksum should not be empty") + } + + // Verify priority (document (1).pdf should have priority 1001) + if fileInfo.Priority != 1001 { + t.Errorf("Priority = %d, want 1001", fileInfo.Priority) + } +} + +func TestDeleteFile(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "test.txt") + + // Create a test file + if err := os.WriteFile(testFile, []byte("test"), 0644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + // Verify file exists + if _, err := os.Stat(testFile); os.IsNotExist(err) { + t.Fatal("Test file should exist") + } + + // Delete the file + if err := deleteFile(testFile); err != nil { + t.Fatalf("deleteFile failed: %v", err) + } + + // Verify file no longer exists + if _, err := os.Stat(testFile); !os.IsNotExist(err) { + t.Error("File should not exist after deletion") + } +} + +func TestFindDuplicates(t *testing.T) { + tmpDir := t.TempDir() + + // Create test files + content1 := []byte("Same content") + content2 := []byte("Different content") + + // Create duplicates with same content + file1 := filepath.Join(tmpDir, "original.txt") + file2 := filepath.Join(tmpDir, "original (1).txt") + file3 := filepath.Join(tmpDir, "original (2).txt") + + // Create a unique file + file4 := filepath.Join(tmpDir, "unique.txt") + + if err := os.WriteFile(file1, content1, 0644); err != nil { + t.Fatalf("Failed to create file1: %v", err) + } + if err := os.WriteFile(file2, content1, 0644); err != nil { + t.Fatalf("Failed to create file2: %v", err) + } + if err := os.WriteFile(file3, content1, 0644); err != nil { + t.Fatalf("Failed to create file3: %v", err) + } + if err := os.WriteFile(file4, content2, 0644); err != nil { + t.Fatalf("Failed to create file4: %v", err) + } + + // Run findDuplicates in non-delete mode + err := findDuplicates(tmpDir, false, false, false) + if err != nil { + t.Fatalf("findDuplicates failed: %v", err) + } + + // Verify all files still exist (no deletion in scan mode) + for _, file := range []string{file1, file2, file3, file4} { + if _, err := os.Stat(file); os.IsNotExist(err) { + t.Errorf("File %s should still exist in scan mode", file) + } + } +} + +func TestFindDuplicatesWithDeletion(t *testing.T) { + tmpDir := t.TempDir() + + // Create test files + content := []byte("Duplicate content") + + file1 := filepath.Join(tmpDir, "keep.txt") + file2 := filepath.Join(tmpDir, "delete (1).txt") + + if err := os.WriteFile(file1, content, 0644); err != nil { + t.Fatalf("Failed to create file1: %v", err) + } + if err := os.WriteFile(file2, content, 0644); err != nil { + t.Fatalf("Failed to create file2: %v", err) + } + + // Run findDuplicates with delete and force flags + err := findDuplicates(tmpDir, true, true, false) + if err != nil { + t.Fatalf("findDuplicates with deletion failed: %v", err) + } + + // Verify original file still exists + if _, err := os.Stat(file1); os.IsNotExist(err) { + t.Error("Original file should still exist") + } + + // Verify duplicate file was deleted + if _, err := os.Stat(file2); !os.IsNotExist(err) { + t.Error("Duplicate file should have been deleted") + } +} + +func TestNoDuplicates(t *testing.T) { + tmpDir := t.TempDir() + + // Create unique files + file1 := filepath.Join(tmpDir, "file1.txt") + file2 := filepath.Join(tmpDir, "file2.txt") + + if err := os.WriteFile(file1, []byte("Content 1"), 0644); err != nil { + t.Fatalf("Failed to create file1: %v", err) + } + if err := os.WriteFile(file2, []byte("Content 2"), 0644); err != nil { + t.Fatalf("Failed to create file2: %v", err) + } + + // Run findDuplicates + err := findDuplicates(tmpDir, false, false, false) + if err != nil { + t.Fatalf("findDuplicates failed: %v", err) + } + + // Both files should still exist + if _, err := os.Stat(file1); os.IsNotExist(err) { + t.Error("file1 should exist") + } + if _, err := os.Stat(file2); os.IsNotExist(err) { + t.Error("file2 should exist") + } +}