/ Published in: Python
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
#------------------------------------------------------------------------------- # Name: findduplicates # Purpose: # # Author: ysharma # # Created: 09-09-2011 # Copyright: (c) ysharma 2011 # Licence: <your licence> #------------------------------------------------------------------------------- #!/usr/bin/env python import glob import re import md5 def get_duplicates(filelist): ## { exclude = [ re.compile(pattern) for pattern in ['^\s*#','^\s*$']] dup={} for file in filelist: ## { m=md5.new() for line in open(file).readlines(): ## { skip=0 for pattern in exclude: if pattern.search(line): skip=1 if skip: continue m.update(line) ##} filehash=m.hexdigest() dup.setdefault(filehash,[]).append(file) ##} return [paths for paths in dup.values() if len(paths) > 1] ##} def main(): ## { duplicate_files = get_duplicates(glob.glob("omx_proj/impl_1/jobs/job_*/*arun.tcl")) if len(duplicate_files): ## { print "Following files are duplicate:" for files in duplicate_files: ## { print files ##} ##} ##} if __name__ == '__main__': main()