31 lines
1.0 KiB
Python
31 lines
1.0 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
'''
|
|
Created on Wed Jun 21 13:58:42 2023
|
|
|
|
@author: michael
|
|
'''
|
|
def deDupe(inFile, outFile):
|
|
"""Reads file line by line and removes duplicates. Saves deduplicated lines into another file.
|
|
|
|
Args:
|
|
inFile (string): Path to file that shall be deduplicated.
|
|
outFile (string): Path to output-file.
|
|
"""
|
|
from collections import Counter
|
|
with open(inFile) as f:
|
|
lines = f.readlines()
|
|
count = Counter(lines)
|
|
dupes = [ k for (k,v) in count.items() if v > 1]
|
|
# uncomment to get duplicates
|
|
''' for dup in dupes:
|
|
print(f'Duplicate: {dup}', end='') '''
|
|
skips = []
|
|
outFileName = outFile
|
|
with open(outFile, 'w') as outFile:
|
|
for line in lines:
|
|
if line not in skips:
|
|
outFile.write(line)
|
|
if line not in skips and line in dupes:
|
|
skips.append(line)
|
|
print(f'{len(dupes)} duplicate Keywords removed and saved into {outFileName}.') |