Revision: 50947
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at September 9, 2011 19:52 by mail2yogs
Initial Code
#------------------------------------------------------------------------------- # Name: findduplicates # Purpose: # # Author: ysharma # # Created: 09-09-2011 # Copyright: (c) ysharma 2011 # Licence: <your licence> #------------------------------------------------------------------------------- #!/usr/bin/env python import glob import re import md5 def get_duplicates(filelist): ## { exclude = [ re.compile(pattern) for pattern in ['^\s*#','^\s*$']] dup={} for file in filelist: ## { m=md5.new() for line in open(file).readlines(): ## { skip=0 for pattern in exclude: if pattern.search(line): skip=1 if skip: continue m.update(line) ##} filehash=m.hexdigest() dup.setdefault(filehash,[]).append(file) ##} return [paths for paths in dup.values() if len(paths) > 1] ##} def main(): ## { duplicate_files = get_duplicates(glob.glob("omx_proj/impl_1/jobs/job_*/*arun.tcl")) if len(duplicate_files): ## { print "Following files are duplicate:" for files in duplicate_files: ## { print files ##} ##} ##} if __name__ == '__main__': main()
Initial URL
Initial Description
Initial Title
Find duplicate files with regex exclude
Initial Tags
python
Initial Language
Python