|
MIT OpenCourseWare 6.00 Introduction to Computer Science and ProgrammingClass length: 13 weeks. Start anytime. Creator: duallain Status: Under Construction |
|
Assignment 1Assignment 3: Matching strings: a biological perspective Homework Submissions (12 total):I found this assignment really hard at first. I found it became easier the more closely I read the instructions on the handout (I think I psyched myself out about what the problem required). I did not like my solution to countSubStringMatchRecursive() (problem 1). I'm looking forward to seeing other people's solutions. Also, the professor gave us a program whose parameters were (key, target), but then advised that we make all the other programs (target, key). Did anyone else find this strange? It almost tripped me up at the end. Did anyone just tweak the program we were given to make it follow the convention in the module? #! /usr/bin/env python
# -*- coding: utf8 -*-
# these are some example strings for use in testing your code
# target strings
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
targets = (target1, target2)
# key strings
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
keys = (key10, key11, key12, key13)
### the following procedure you will use in Problem 3
def subStringMatchOneSub(key,target):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
##print 'breaking key',key,'into',key1,key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
##print 'match1',match1
##print 'match2',match2
##print 'possible matches for',key1,key2,'start at',filtered
return allAnswers
#######################################################################
## PROBLEM 1
from string import *
def countSubStringMatch(target,key):
counter = [-1]
tryrange = range(0, len(target) + 1)
for x in tryrange:
location = find(target, key, x)
if location not in counter:
counter.append(location)
if counter == -1:
return 0
else:
counter.remove(-1)
return len(counter)
def helpcountSubStringMatchRecursive(target, key, incr, counter):
position = find(target, key, incr)
incr += 1
if position not in counter:
counter.append(position)
helpcountSubStringMatchRecursive(target, key, incr, counter)
if incr -1 >= len(target):
return counter
else:
helpcountSubStringMatchRecursive(target, key, incr, counter)
def countSubStringMatchRecursive(target, key):
counter = [-1]
helpcountSubStringMatchRecursive(target, key, 0, counter)
counter.remove(-1)
return len(counter)
## PROBLEM 2
def subStringMatchExact(target,key):
counter = [-1]
tryrange = range(0, len(target) + 1)
for x in tryrange:
location = find(target, key, x)
if location not in counter:
counter.append(location)
if counter == -1:
return 0
else:
counter.remove(-1)
return tuple(counter)
## PROBLEM 3
def constrainedMatchPair(firstMatch, secondMatch, length):
matches = []
for n in firstMatch:
for k in secondMatch:
m = length
if n + m + 1 == k:
matches.append(n)
return tuple(matches)
## PROBLEM 4
def subStringMatchExactlyOneSub(target,key):
exact = subStringMatchExact(target,key)
close = subStringMatchOneSub(key,target)
onesub = []
for index in close:
if index not in exact:
onesub.append(index)
return tuple(onesub)
##TEST FUNCTION
for target in targets:
for key in keys:
print "\nTarget: ", target
print "Key: ", key
print subStringMatchExactlyOneSub(target, key)
Permalink
copied from lecture 4's page ps3a.py from string import *
def countSubStringMatch(target, key):
"""counts the number of times 'key' appears in 'target'"""
counter = 0
next = 0
while find(target, key, next) != -1:
next = find(target, key, next) + 1
counter += 1
return counter
def countSubStringMatchRecursive(target, key):
"""counts the number of times 'key' appears in 'target'"""
counter = 0
if find(target, key) != -1:
counter = 1 + countSubStringMatchRecursive(target[find(target, key)+1:], key)
return counter
Permalink
Comments:ps3b-d.py from string import *
# this is a code file that you can use as a template for submitting your
# solutions
# these are some example strings for use in testing your code
# target strings
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
# key strings
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
### the following procedure you will use in Problem 3
def subStringMatchExact(target,key):
counter = []
next = 0
while find(target, key, next) != -1:
counter += [find(target, key, next)]
next = find(target, key, next) + 1
return tuple(counter)
def constrainedMatchPair(firstMatch, secondMatch, length):
counter = []
for x in firstMatch:
for y in secondMatch:
if x + length + 1 == y:
counter += [x]
return tuple(counter)
def subStringMatchExactlyOneSub(target,key):
exactMatch = subStringMatchExact(target, key)
subMatch = subStringMatchOneSub(key, target)
counter = list(subMatch)
for x in subMatch:
for y in exactMatch:
if x == y:
counter.remove(x)
return tuple(counter)
def subStringMatchOneSub(key,target):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
print 'breaking key',key,'into',key1,key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
print 'match1',match1
print 'match2',match2
print 'possible matches for',key1,key2,'start at',filtered
return allAnswers
I tried out your constrainedMatchPair, it's a little different from mine, but seems to work better. Maybe you can help me figure out where mine is wrong. The problem with yours is if I try subStringMatchOneSub("atgc","atgc") I'm still not getting all the answers I want. I've come to the conclusion the problem is we need the size of key2 which we're not given so there's a bug in the given code. I definitely was overthinking this. def constrainedMatchPair(firstMatch,secondMatch,length):
pairList = []
for n in range(0,len(firstMatch)):
for k in range(0,len(secondMatch)):
if n + length + 1 == k:
pairList.append(n)
else:
pass
return tuple(pairList)
I found the problem in mine. I was using range and in the for statements. The use of the variable name "counter" in constrainedMatchPair() and other functions when it is storing a list of matches is a bit confusing for readers. def countSubStringMatch(target,key):
count = 0
pos = -1
while True:
pos = target.find(key,pos+1)
if pos >= 0:
count += 1
if pos == -1:
break
return count
def countSubStringMatchRecursive(target,key):
if target.find(key) >=0:
bef,sep,target = target.partition(key)
return 1 + countSubStringMatchRecursive(target,key)
return 0
def subStringMatchExact(target,key):
pos = target.find(key)
allpos = []
allpos.append(pos)
while True:
if pos <= -1:
return tuple(allpos)
pos = target.find(key,pos+1)
if pos >= 0:
allpos.append(pos)
return tuple(allpos)
def constrainedMatchPair(firstMatch, secondMatch, length):
allMembers = []
for i in firstMatch:
for j in secondMatch:
if i + length + 1 == j:
allMembers.append(i)
return tuple(allMembers)
Permalink
Comments:I like your use of the partition attribute the recursive function; very clean. def countSubStringMatchRecursive(target,key):
if target.find(key) >=0:
bef,sep,target = target.partition(key)
return 1 + countSubStringMatchRecursive(target,key)
return 0
# File ps3a.py
#===============================================================
def countSubStringMatch(target,key):
""" Count the number of substring returns int """
index = count = 0
while index != -1:
index = target.find(key)
if index >= 0 :
count += 1
target = target[index+len(key):]
return count
def countSubStringMatchRecursive(target,key):
""" Count the number of substring returns int """
index = target.find(key)
if index == -1:
return 0;
else:
return 1 + countSubStringMatchRecursive(target[index+len(key):],key)
#================================================================
# File ps3b.py
#================================================================
def subStringMatchExact(target,key):
""" return the tuple of indices of the exact matches of key in the target"""
index = 0
indexes = ()
if len(key) == 0:
return tuple(range(len(target)))
while index != -1:
index = target.find(key,index)
if index >= 0 :
indexes = indexes + (index,)
index = index + len(key)
return tuple(indexes)
#================================================================
# File ps3c.py
#================================================================
def findSubStringsKey(key):
""" Finds combination of keys """
strings = []
for i in range(len(key)):
t = key[:i],key[i+1:]
strings.append(t)
return strings
def constrainedMatchPair(firstMatch,secondMatch,length):
return tuple(fM for fM in firstMatch for sM in secondMatch if fM + length + 1 == sM)
### the following procedure you will use in Problem 3
def subStringMatchOneSub(key,target):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
#print 'breaking key',key,'into',key1, ' : ' , key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
if filtered:
allAnswers = allAnswers + filtered
#print 'match1',match1
#print 'match2',match2
#print 'possible matches for',key1,key2,'start at',filtered
return allAnswers
#================================================================
#File ps3d.py
#================================================================
def subStringMatchExactlyOneSub(target,key):
return set(subStringMatchOneSub(key,target)) - set(subStringMatchExact(target,key))
#================================================================
Permalink
No comments. Sign up or log in to comment #Problem Set 3a
#jayd
from string import *
def countSubStringMatch(target,key):
count=0
while target:
location=find(target,key)
if location == -1:
break
count += 1
target=target[location + 1:]
return count
print countSubStringMatch("ABXXABXXAB","AB")
#Result = 3
def countSubStringMatchRecursive(target,key):
location = find(target,key)
if location == -1:
return 0
else:
return 1 + countSubStringMatchRecursive(target[location+len(key):],key)
print countSubStringMatchRecursive("ABXXABXXAB","AB")
#Result = 3
--------------------------------------------------------------
Problem set 3b
#jayd
from string import *
def subStringMatchExact(target,key):
matchLocation = []
start = 0
while True:
location = find(target,key,start)
#break loop if no more matches found
if location == -1:
break
matchLocation.append(location)
start = location + 1
return tuple(matchLocation)
#TESTS
targets = ('atgacatgcacaagtatgcat','atgaatgcatggatgtaaatgcag')
keys = ('a','atg','atgc','atgca')
for target in targets:
for key in keys:
print "Target: " + target
print "Key: " + key
print "Solution: " + str(subStringMatchExact(target,key))
#Output:
# Target: atgacatgcacaagtatgcat
# Key: a
# Solution: (0, 3, 5, 9, 11, 12, 15, 19)
# Target: atgacatgcacaagtatgcat
# Key: atg
# Solution: (0, 5, 15)
# Target: atgacatgcacaagtatgcat
# Key: atgc
# Solution: (5, 15)
# Target: atgacatgcacaagtatgcat
# Key: atgca
# Solution: (5, 15)
# Target: atgaatgcatggatgtaaatgcag
# Key: a
# Solution: (0, 3, 4, 8, 12, 16, 17, 18, 22)
# Target: atgaatgcatggatgtaaatgcag
# Key: atg
# Solution: (0, 4, 8, 12, 18)
# Target: atgaatgcatggatgtaaatgcag
# Key: atgc
# Solution: (4, 18)
# Target: atgaatgcatggatgtaaatgcag
# Key: atgca
# Solution: (4, 18)
------------------------------------------------------------
#Problem set 3d
#jayd
from string import *
# target strings
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
# key strings
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
def subStringMatchExact(target,key):
"""Returns a tuple of the exact matches"""
matchLocation = []
start = 0
while True:
location = find(target,key,start)
#break loop if no more matches found
if location == -1:
break
matchLocation.append(location)
start = location + 1
return tuple(matchLocation)
def constrainedMatchPair(firstMatch,secondMatch,length):
matches=[]
for match in firstMatch:
for match2 in secondMatch:
if match + length + 1 == match2:
matches.append(match)
return tuple(matches)
def subStringMatchOneSub(target,key):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
key1 = key[:miss]
key2 = key[miss+1:]
print 'breaking key',key,'into',key1,',',key2
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
print 'match1',match1
print 'match2',match2
print 'possible matches for',key1,key2,'start at',filtered
return allAnswers
def subStringMatchExactlyOneSub(target,key):
"""Returns tuple of ONLY partial matches"""
partialMatch = []
exactMatch = subStringMatchExact(target,key)
subMatch = subStringMatchOneSub(target,key)
for match in subMatch:
# Check to make sure the value isn't an exact match or already in our list
if match not in exactMatch and match not in partialMatch:
partialMatch.append(match)
return tuple(partialMatch)
matches = subStringMatchExactlyOneSub(target2,key13)
print "The partial only matches are:",matches
Permalink
No comments. Sign up or log in to comment from string import *
def countSubStringMatch(target, key):
"""counts the instances of key in target iteratively (exact matches only)"""
count = 0
while find(target,key) != -1:
count += 1
target = target[find(target,key)+1:]
return count
def countSubStringMatchRecursive(target,key):
"""counts the instances of key in target recursively (exact matches only)"""
idx = find(target,key)
if idx == -1: return 0
else: return 1 + countSubStringMatchRecursive(target[idx+1:],key)
def subStringMatchExact(target, key):
"""searches for instances of key in target (exact matches only)"""
out = ()
idx = -1
# The target is searched from one past the previously found index
# (initialized at -1 so as to start searching at 0) to find
# each instance of the key in the target.
while find(target,key,idx+1) != -1:
idx = find(target,key,idx+1)
out += (idx,)
return out
def constrainedMatchPair(firstMatch, secondMatch, length):
"""for each element in firstMatch, searches for an element in secondMatch
that represents an instance of the key with one substution"""
out = ()
for i in firstMatch:
for j in secondMatch:
if i + length + 1 == j:
out += (i,)
return out
#####Provided with assignment##############################################
def subStringMatchOneSub(key,target):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
return allAnswers
###########################################################################
def subStringMatchExactlyOneSub(target,key):
"""searches for instances of key in target with exactly one substitution"""
exact = str(subStringMatchExact(target,key))
both = subStringMatchOneSub(key, target) # has exact matches & single subs
out = ()
# The element is added to the output if it (1) is not in the
# list of exact matches and (2) has not already been added
# to the output.
for i in both:
if countSubStringMatch(exact,str(i)) == 0 and\
countSubStringMatch(str(out),str(i)) == 0:
out += (i,)
return out
Permalink
No comments. Sign up or log in to comment from string import *
# this is a code file that you can use as a template for submitting your
# solutions
# these are some example strings for use in testing your code
# target strings
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
# key strings
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
#problem 1
def countSubStringMatch(target,key):
"""Given a target string and a given substring
returns the number of occurances of the substring"""
countOfMatches = 0
currentPosition = 0
while currentPosition <= len(target):
if find(target, key, currentPosition) == -1:
if countOfMatches > 0:
return countOfMatches
else:
return 0
else:
countOfMatches += 1
currentPosition = find(target, key, currentPosition) + 1
return countOfMatches
def countSubStringMatchRecursive(target, key):
"""Given a target string and a given substring
returns the number of occurances of the substring
(recursive)"""
currentPosition = find(target, key)
if find(target, key) == -1:
return 0
else:
return 1 + countSubStringMatchRecursive(target[currentPosition+1:], key)
#problem 2
def subStringMatchExact(target,key):
"""Given a target string and a given substring
returns the number of occurances of the substring"""
locationOfMatches = []
countOfMatches = 0
currentPosition = 0
while currentPosition <= len(target):
if find(target, key, currentPosition) == -1:
if countOfMatches > 0:
result = tuple(locationOfMatches)
return result
else:
return ()
else:
countOfMatches += 1
currentPosition = find(target, key, currentPosition)
locationOfMatches.append(currentPosition)
currentPosition += 1
result = tuple(locationOfMatches)
return result
#problem 3
def subStringMatchOneSub(key,target):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
#print 'breaking key',key,'into',key1,key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
#print 'match1',match1
#print 'match2',match2
#print 'possible matches for',key1,key2,'start at',filtered
return allAnswers
def constrainedMatchPair(first, second, length):
locationOfMatches = []
for position in first:
for location in second:
if position + length + 1 == location:
locationOfMatches.append(position)
#print "location of matches = "
#print locationOfMatches
#else:
#print "position = %i, length = %i, location = %i" % (position, length, location)
result = tuple(locationOfMatches)
return result
#problem 4
def subStringMatchExactlyOneSub(key,target):
result = list(subStringMatchOneSub(key, target))
matchesWithOneSubOrLess = subStringMatchOneSub(key, target)
matchesExact = subStringMatchExact(target, key)
for match in matchesWithOneSubOrLess:
if match in matchesExact:
result.remove(match)
readiedResult = tuple(result)
return readiedResult
Permalink
No comments. Sign up or log in to comment
# Set 3
# jyen
from string import *
def countSubStringMatch (target, key):
""" Iteratively counts the number of times a term appears in a string"""
count = 0
place = 0
while find(target, key, place) != -1:
place = find(target, key, place) + 1
count += 1
return count
def countSubStringMatchRecursive (target, key):
""" Recursively counts the number of times a term appears in a string"""
if find(target, key) != -1:
return 1 + countSubStringMatchRecursive(target[find(target, key)+1:], key)
return 0
def subStringMatchExact (target, key):
"""Iteratively finds the locations where a term appears in a string"""
places = []
index = 0
while find(target, key, index) != -1:
places.append(find(target, key, index))
index = find(target, key, index) + 1
return tuple(places)
def constrainedMatchPair (firstMatch, secondMatch, length):
places = ()
for x in firstMatch:
for y in secondMatch:
if x + length + 1 == y:
places += (x,)
return places
def subStringMatchOneSub(target,key):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
#print 'breaking key',key,'into',key1,key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
#print 'match1',match1
#print 'match2',match2
#print 'possible matches for',key1,key2,'start at',filtered
return allAnswers
def subStringMatchExactlyOneSub (target, key):
nearmatches = ()
exactmatches = subStringMatchExact(target, key)
allmatches = subStringMatchOneSub(target, key)
for x in allmatches:
if x not in exactmatches:
nearmatches += (x,)
return nearmatches
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
print countSubStringMatch(target1,key12)
print countSubStringMatchRecursive(target1,key12)
print subStringMatchExact(target2,key12)
print subStringMatchOneSub(target2, key12)
print subStringMatchExactlyOneSub(target2, key12)
Permalink
No comments. Sign up or log in to comment Did they do this to make sure we were paying attention? close = subStringMatchOneSub(key, target) # Problem Set 3
from string import *
# target strings
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
target3 = 'accaccaccaccaccaccaccacca'
# key strings
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
key14 = 'acca'
###################
# Problem 3a #
###################
def countSubStringMatch(target, key):
""" Iteratively counts the number of times a term appears in a string"""
count = next = 0
while find(target, key, next) != -1:
next = find(target, key, next) + 1
count += 1
return count
def countSubStringMatchRecursive (target, key):
""" Recursively counts the number of times a term appears in a string"""
index = find(target, key)
if index == -1:
return 0
else:
# Slice notation says "Everything except the first part that we already searched"
# Eg, start next recursion one place after the previous match that we have found.
return 1 + countSubStringMatchRecursive(target[index+1:], key)
###################
# Problem 3b #
###################
def subStringMatchExact(target,key):
matchList = []
startFrom = 0
index = find(target, key, startFrom)
while index != -1:
matchList.append(index)
startFrom = index + 1
index = find(target, key, startFrom)
return tuple(matchList)
###################
# Problem 3c #
###################
def constrainedMatchPair(firstMatch,secondMatch,length):
# firstMatch and secondMatch are tuples
# n is the starting point of the first substring match
# m is the length of the first substring
# k is the sum of n + m + 1
matches = []
for n in firstMatch:
for k in secondMatch:
m = length
if n + m + 1 == k:
matches.append(n)
return tuple(matches)
def subStringMatchOneSub(key, target):
"""search for all locations of key in target, with one substitution"""
# Uncomment the print statements to get more output.
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
# print 'breaking key',key,'into',key1,key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
# print 'match1',match1
# print 'match2',match2
# print 'possible matches for',key1,key2,'start at',filtered
return allAnswers
###################
# Problem 3d #
###################
def subStringMatchExactlyOneSub(key, target):
"""Returns tuple of ONLY partial matches"""
onesub = () # One heart, lets get together and feel all right
exact = subStringMatchExact(target, key)
close = subStringMatchOneSub(key, target)
for i in close:
if i not in exact:
onesub += (i,)
return onesub
###################
# Code test output #
###################
print "Testing code..."
print "The target strand of DNA is",(target2),"and the search key is",(key13)
print "Exact matches for",(key13),"were found at these positions:",subStringMatchExact(target2, key13)
print "Possible one-substitution matches were found at:",subStringMatchOneSub(key13, target2)
print "DNA sequences with only one different base pair found at:",subStringMatchExactlyOneSub(key13, target2)
Permalink
No comments. Sign up or log in to comment As I remember, I took a really straightforward way of solving these problems =/ Test functions do all the calls. ###############
## Problem 1 ##
###############
from string import *
def count_sub_string_match(target, key):
"""Find all occurrences of a string in another"""
counter = 0
position = 0
pointer = 0
while(pointer < len(target)):
position = find(target, key, pointer)
if position != -1:
counter += 1
pointer = position + 1
else:
pointer += 1
return counter
def count_sub_string_match_recursive(target, key):
"""Find all occurrences of a string in another with recursion"""
counter = 0
position = 0
position = find(target, key)
if position == -1:
return 0
else:
position += 1
return 1 + count_sub_string_match_recursive(target[position:], key)
def test_count_sub_string():
"""testing our functions"""
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
# Testing iterative version
print "Iterative function"
print count_sub_string_match(target1, key10)
print count_sub_string_match(target1, key11)
print count_sub_string_match(target1, key12)
print count_sub_string_match(target1, key13)
print count_sub_string_match(target2, key10)
print count_sub_string_match(target2, key11)
print count_sub_string_match(target2, key12)
print count_sub_string_match(target2, key13)
# Testing recursive version
print "Recursive function"
print count_sub_string_match_recursive(target1, key10)
print count_sub_string_match_recursive(target1, key11)
print count_sub_string_match_recursive(target1, key12)
print count_sub_string_match_recursive(target1, key13)
print count_sub_string_match_recursive(target2, key10)
print count_sub_string_match_recursive(target2, key11)
print count_sub_string_match_recursive(target2, key12)
print count_sub_string_match_recursive(target2, key13)
test_count_sub_string()
###############
## Problem 2 ##
###############
from string import *
def count_sub_string_match_exact(target, key):
"""Find all occurrences of a string in another"""
position_list = []
position = 0
pointer = 0
while(pointer < len(target)):
position = find(target, key, pointer)
if position != -1:
position_list.append(position)
pointer = position + 1
else:
pointer += 1
return tuple(position_list)
def test_count_sub_string():
"""testing our functions"""
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
# Testing iterative version
print "Iterative function"
print count_sub_string_match_exact(target1, key10)
print count_sub_string_match_exact(target1, key11)
print count_sub_string_match_exact(target1, key12)
print count_sub_string_match_exact(target1, key13)
print count_sub_string_match_exact(target2, key10)
print count_sub_string_match_exact(target2, key11)
print count_sub_string_match_exact(target2, key12)
print count_sub_string_match_exact(target2, key13)
test_count_sub_string()
###############
## Problem 3 ##
###############
from string import *
def sub_string_match_exact(target, key):
"""Find all occurrences of a string in another"""
position_list = []
position = 0
pointer = 0
while(pointer < len(target)):
position = find(target, key, pointer)
if position != -1:
position_list.append(position)
pointer = position + 1
else:
pointer += 1
return tuple(position_list)
def constrained_match_pair(first_match, second_match, length):
""" Find intersection """
intersection = []
for n in first_match:
for m in second_match:
if n + length + 1 == m:
intersection.append(n)
return tuple(intersection)
def sub_string_match_one_sub(target, key):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0, len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
print 'breaking key', key, 'into', key1, key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = sub_string_match_exact(target, key1)
match2 = sub_string_match_exact(target, key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrained_match_pair(match1, match2, len(key1))
allAnswers = allAnswers + filtered
print 'match1', match1
print 'match2', match2
print 'possible matches for', key1, key2, 'start at', filtered
print ''
return allAnswers
def test_count_sub_string():
"""testing our functions"""
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
#key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
# Testing problem 3
# print sub_string_match_one_sub(target1, key10)
print sub_string_match_one_sub(target1, key11)
print sub_string_match_one_sub(target1, key12)
print sub_string_match_one_sub(target1, key13)
# print sub_string_match_one_sub(target2, key10)
print sub_string_match_one_sub(target2, key11)
print sub_string_match_one_sub(target2, key12)
print sub_string_match_one_sub(target2, key13)
test_count_sub_string()
###############
## Problem 4 ##
###############
from string import *
def sub_string_match_exact(target, key):
"""Find all occurrences of a string in another"""
position_list = ()
position = 0
pointer = 0
while(pointer < len(target)):
position = find(target, key, pointer)
if position != -1:
position_list += (position,)
pointer = position + 1
else:
pointer += 1
return position_list
def constrained_match_pair(first_match, second_match, length):
""" Find intersection """
intersection = ()
for n in first_match:
for m in second_match:
if n + length + 1 == m:
intersection += (n,)
return intersection
def sub_string_match_one_exactly_one_sub(target, key):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0, len(key)):
print allAnswers
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
print 'breaking key', key, 'into', key1, key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = sub_string_match_exact(target, key1)
match2 = sub_string_match_exact(target, key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrained_match_pair(match1, match2, len(key1))
for one_sub in filtered:
if one_sub not in match2:
allAnswers = allAnswers + (one_sub,)
print 'match1', match1
print 'match2', match2
print 'possible matches for', key1, key2, 'start at', filtered
print ''
return allAnswers
def test_count_sub_string():
"""testing our functions"""
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
# Testing problem 3
# print sub_string_match_one_exactly_one_sub(target1, key10)
# print sub_string_match_one_exactly_one_sub(target1, key11)
# print sub_string_match_one_exactly_one_sub(target1, key12)
# print sub_string_match_one_exactly_one_sub(target1, key13)
# print sub_string_match_one_exactly_one_sub(target2, key10)
# print sub_string_match_one_exactly_one_sub(target2, key11)
# print sub_string_match_one_exactly_one_sub(target2, key12)
print sub_string_match_one_exactly_one_sub(target2, key13)
test_count_sub_string()
Permalink
No comments. Sign up or log in to comment Matching strings: a biological perspective # Problem Set 3 (Part I)
# Name: Joe Li
# Time: 2:00
#
from string import *
def countSubStringMatch(target,key):
count=0
index=0
while find(target,key,index)!=-1: # while searching the target from index and find a key
index=find(target,key,index)+1 # search from the next character
count+=1
return count
def countSubStringMatchRecursive (target, key):
target=target[find(target,key)+1:] # slice 'target'
if find(target,key)==-1: # if the substring doesn't contain the key
return 1 # the base case return 1 in order to count the last removed one
else:
recurse=countSubStringMatchRecursive(target,key) # recerse
return recurse+1 # count
# Problem Set 3 (Part II)
# Name: Joe Li
# Time 1:00
#
from string import *
def subStringMatchExact(target,key):
index=0
match=()
while find(target,key,index)!=-1:
index=find(target,key,index) # bind the position of found key to index
match+=(index,) # add to tuple
index+=1 # search from next character
return match
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
print subStringMatchExact(target1,key10)
print subStringMatchExact(target1,key11)
print subStringMatchExact(target1,key12)
print subStringMatchExact(target1,key13)
print subStringMatchExact(target2,key10)
print subStringMatchExact(target2,key11)
print subStringMatchExact(target2,key12)
print subStringMatchExact(target2,key13)
# Problem Set 3 (Part III)
# Name: Joe Li
# Time 0:30
#
from string import *
def subStringMatchExact(target,key):
index=0
match=()
while find(target,key,index)!=-1:
index=find(target,key,index) # bind the position of found key to index
match+=(index,) # add to tuple
index+=1 # search from next character
return match
def constrainedMatchPair(firstMatch,secondMatch,length):
filtered=()
for n in range(0,len(firstMatch)): # for every element in 1st & 2nd Match
for k in range(0,len(secondMatch)):
if firstMatch[n]+length+1==secondMatch[k]: # if n+m+1=k holds
filtered+=(firstMatch[n],) # add such n to result
return filtered
def subStringMatchOneSub(key,target):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
print 'breaking key',key,'into',key1,key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
print 'match1',match1
print 'match2',match2
print 'possible matches for',key1,key2,'start at',filtered
return allAnswers
# Problem Set 3 (Part IV)
# Name: Joe Li
# Time 0:30
#
from string import *
def subStringMatchExact(target,key):
index=0
match=()
while find(target,key,index)!=-1:
index=find(target,key,index) # bind the position of found key to index
match+=(index,) # add to tuple
index+=1 # search from next character
return match
def constrainedMatchPair(firstMatch,secondMatch,length):
filtered=()
for n in range(0,len(firstMatch)): # for every element in 1st & 2nd Match
for k in range(0,len(secondMatch)):
if firstMatch[n]+length+1==secondMatch[k]: # if n+m+1=k holds
filtered+=(firstMatch[n],) # add such n to result
return filtered
def subStringMatchOneSub(key,target):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
print 'breaking key',key,'into',key1,key2
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
print 'match1',match1
print 'match2',match2
print 'possible matches for',key1,key2,'start at',filtered
allAnswers=tuple(set(allAnswers))
return allAnswers
def subStringMatchExactlyOneSub(target,key):
exact=list(subStringMatchExact(target,key))
one=list(subStringMatchOneSub(key,target))
for index in range(0,len(exact)):
one.remove(exact[index]) #remove those elements occur in the 1st tuple from the 2nd tuple
return tuple(one)
Permalink
No comments. Sign up or log in to comment #Problem Set 3
#Name: chip
#Time: 0:40
target1 = 'atgacatgcacaagtatgcat'
target2 = 'atgaatgcatggatgtaaatgcag'
key10 = 'a'
key11 = 'atg'
key12 = 'atgc'
key13 = 'atgca'
#Problem 1
def countSubStringMatch(target, key):
result = target.find(key)
count = 0
while result != -1:
count += 1
result = target.find(key, result + 1)
return count
def countSubStringMatchRecursive(target, key):
result = target.find(key)
if result == -1:
return 0;
else:
return 1 + countSubStringMatchRecursive(target[result+len(key):], key)
#Problem 2
def subStringMatchExact(target,key):
result = target.find(key)
index = ()
while result != -1:
index += (result,)
result = target.find(key, result + 1)
return index
#Problem 3
def constrainedMatchPair(firstMatch, secondMatch, length):
matches = ()
for n in firstMatch:
k = n + length + 1
if k in secondMatch:
matches += (n,)
return matches
def subStringMatchOneSub(key,target):
"""search for all locations of key in target, with one substitution"""
allAnswers = ()
for miss in range(0,len(key)):
# miss picks location for missing element
# key1 and key2 are substrings to match
key1 = key[:miss]
key2 = key[miss+1:]
print('breaking key',key,'into',key1,key2)
# match1 and match2 are tuples of locations of start of matches
# for each substring in target
match1 = subStringMatchExact(target,key1)
match2 = subStringMatchExact(target,key2)
# when we get here, we have two tuples of start points
# need to filter pairs to decide which are correct
filtered = constrainedMatchPair(match1,match2,len(key1))
allAnswers = allAnswers + filtered
print('match1',match1)
print('match2',match2)
print('possible matches for',key1,key2,'start at',filtered)
return allAnswers
#Problem 4
def subStringMatchExactlyOneSub(target, key):
return tuple(set(subStringMatchOneSub(key, target)) - set(subStringMatchExact(target, key)))
Permalink
No comments. Sign up or log in to comment |
Comments:
5 months ago
I've been working on constrainedMatchPair and had the exact smae code as yours. When I put subStringMatchOneSub("atgc","atgc") In the idle, I feel like i should get back 4 possibles matches at 0, but don't get any.
5 months ago
I dont think you would need that check for counter if in the list , you are incrementing the index anyways, recursive function can be done better :). Have a look at list comprehensions, generators and sets. Just my two cents.
5 months ago
Yeah, i have trouble writing good recursive functions. At least I know my weakness!
Sign up or log in to comment