Verzeichnisse vergleichen mit Python

von sparrow242

Vielleicht kann es ja jemand gebrauchen. Ich musste zwei Verzeichnisse miteinander vergleichen und mit dem Ergebnis weiterarbeiten.
Die Grundlage zum Vergleichen von zwei Verzeichnissen, die Dateien werden per md5-Checksumme miteinander verglichen.

import hashlib
import os
import sys

def md5checksum(filePath):
    with open(filePath, 'rb') as fi:
        h = hashlib.md5()
        while True:
            data = fi.read(1024)
            if not data:
                break
            h.update(data)
    return h.hexdigest()
        
def scandir(dir):
    print "Scanning %s" % (dir,)
    fdata = {}
    for root, dirs, files in os.walk(dir):
        for name in files:
            fullpath = os.path.join(root, name)
            path = fullpath[len(dir):]
            fdata[path] = md5checksum(fullpath)
    print "Found: %i Files" % (len(fdata),)
    return fdata


def main(firstpath, secondpath):
    """ Give me two directories to compare """ 
    first_data = scandir(firstpath)
    second_data = scandir(secondpath)
    for file, md5 in first_data.items():
        if file not in second_data:
            print "%s only in %s" % (file, firstpath,)
        else:
            if second_data[file] == md5:
                del second_data[file]
            else:
                print "%s not the same" % (file,)
                del second_data[file]
    for file, md5 in second_data.items():
        if file not in (first_data):
            print "%s only in %s" % (file, secondpath,)
        else:
            print "%s <-- You see this? Something gone WROOOOOONG!" % (file,)

if __name__ == "__main__":
    main(sys.argv[1], sys.argv[2])


Advertisements