#!/usr/bin/env python # """ Because sometimes the porn we recursively download from sample sites isn't all unique. """ # Use sha for speed, OS for laziness import sha, os hashes = [] dir = os.listdir( "." ) for file in dir: try: hash = sha.new( open( file, 'r' ).read() ).hexdigest() if hashes.count( hash ) == 1: # File exists os.remove( file ) elif hashes.count( hash ) == 0: # Why are we not doing else? # One file, and too lazy to structure properly hashes.append( hash ) except: pass