############################### ### Example for a hash function ############################### def hash4strings(s): """ ord(c) is the ascii value of character c 2**120+451 is a prime number """ sum = 0 for i in range(len(s)): sum = (128*sum + ord(s[i])) % (2**120+451) return sum**2 % (2**120+451) ############################# ### Common Substring Problem ############################# #naive solution def common_substring_naive(s1,s2,l): """ find common substring of s1 and s2 of length l """ for i in range(len(s1)-l+1): for j in range(len(s2)-l+1): if s1[i:i+l]==s2[j:j+l]: return s1[i:i+l] return None #hash stuff def create_hash_table(m): """ initial hash table, m empty entries """ return [[] for i in range(m)] def find(htable, key): """ return True if key is in htable, else False """ ht_size = len(htable) index = hash(key) % ht_size if key in htable[index]: return index return None def insert(htable, key): """ insert key into htable """ if find(htable, key) == None: ht_size = len(htable) index = hash(key) % ht_size htable[index].append(key) #solution using hash def common_substring_hash(s1,s2,l): """ find common substring of s1 and s2 of length l uses hash tables """ if len(s2) < len(s1): return common_substring_hash(s2,s1,l) size = len(s1) - l + 1 #s1 cannot be longer here htable = create_hash_table(size) for i in range(len(s1)-l+1): insert(htable, s1[i:i+l]) for i in range(len(s2)-l+1): if find(htable, s2[i:i+l]) != None: return s2[i:i+l] return None #solution using python sets def common_substring_hash2(s1,s2,l): """ find common substring of s1 and s2 of length l uses python built-in sets """ if len(s2) < len(s1): return common_substring_hash(s2,s1,l) htable = set() for i in range(len(s1)-l+1): htable.add(s1[i:i+l]) for i in range(len(s2)-l+1): if s2[i:i+l] in htable: return s2[i:i+l] return None # "competition" between the 3 solutions for common substring #function for generating random sequence import random def gen_str(size, alphabet = "ATCG"): ''' Generate a random string of length size ''' s="" for i in range(size): s+=random.choice(alphabet) return s import time str_len = int(input("Enter length of strings: ")) l = int(input("Enter l (length of substring searched: ")) s1 = gen_str(str_len) #random string s2 = gen_str(str_len) #another one for f in [common_substring_naive,common_substring_hash,common_substring_hash2]: t0=time.clock() res=f(s1, s2, l) t1=time.clock() print(t1-t0, f.__name__,"found?",res)