2023-06-23 16:41:20 +02:00

31 lines
1.0 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
Created on Wed Jun 21 13:58:42 2023
@author: michael
'''
def deDupe(inFile, outFile):
"""Reads file line by line and removes duplicates. Saves deduplicated lines into another file.
Args:
inFile (string): Path to file that shall be deduplicated.
outFile (string): Path to output-file.
"""
from collections import Counter
with open(inFile) as f:
lines = f.readlines()
count = Counter(lines)
dupes = [ k for (k,v) in count.items() if v > 1]
# uncomment to get duplicates
''' for dup in dupes:
print(f'Duplicate: {dup}', end='') '''
skips = []
outFileName = outFile
with open(outFile, 'w') as outFile:
for line in lines:
if line not in skips:
outFile.write(line)
if line not in skips and line in dupes:
skips.append(line)
print(f'{len(dupes)} duplicate Keywords removed and saved into {outFileName}.')