#!/usr/bin/env python3 # -*- coding: utf-8 -*- ''' Created on Wed Jun 21 13:58:42 2023 @author: michael ''' def deDupe(inFile, outFile): """Reads file line by line and removes duplicates. Saves deduplicated lines into another file. Args: inFile (string): Path to file that shall be deduplicated. outFile (string): Path to output-file. """ from collections import Counter with open(inFile) as f: lines = f.readlines() count = Counter(lines) dupes = [ k for (k,v) in count.items() if v > 1] # uncomment to get duplicates ''' for dup in dupes: print(f'Duplicate: {dup}', end='') ''' skips = [] outFileName = outFile with open(outFile, 'w') as outFile: for line in lines: if line not in skips: outFile.write(line) if line not in skips and line in dupes: skips.append(line) print(f'{len(dupes)} duplicate Keywords removed and saved into {outFileName}.')