Hardware | AshesiGhana

Recognition Site Modifier

Select an enzyme

Select sequence direction

Insert DNA Sequence

OUTPUT:

- pandas - numpy

import pandas as pd import numpy as np from pyodide.http import open_url #Read restriction enzyme data and corresponding recognition site EnzymeFile = pd.read_csv(open_url('https://raw.githubusercontent.com/NanaAdwoaNewman/RecogSiteMod/main/RestrictionEnzymes.csv')) res = EnzymeFile['Enzyme'].values recog = EnzymeFile['Sequence'].values resEnzyme = [i for i in res if i is not np.nan] recogSite = [i for i in recog if i is not np.nan] #AMINO ACID SEQUENCES Ala = ["GCT", "GCC", "GCA", "GCG"] #Ala Arg = ["CGT", "CGC", "CGA", "CGG", "AGA", "AGG"] #Arg Asn = ["AAT", "AAC"] #Asn Asp = ["GAT", "GAC"] #Asp Cys = ["TGT", "TGC"] #Cys Glu = ["GAA", "GAG"] #Glu Gln = ["CAA", "CAG"] #Gln Gly = ["GGT", "GGC", "GGA", "GGG"] #Gly His = ["CAT", "CAC"] #His Ile = ["ATT", "ATC", "ATA"] #Ile Leu = ["TTA", "TTG", "CTT", "CTC", "CTA", "CTG"] #Leu Lys = ["AAA","AAG"] #Lys #Met = ["ATG"] #Met Phe = ["TTT", "TTC"] #Phe Pro = ["CCT", "CCC", "CCA", "CCG"] #Pro Ser = ["TCT", "TCC", "TCA", "TCG", "AGT", "AGC"] #Ser Thr = ["ACT", "ACC", "ACA", "ACG"] #Thr #Trp = ["TGG"] #Trp Tyr = ["TAT", "TAC"] #Tyr Val = ["GTT", "GTC", "GTA", "GTG"] #Val #aminoAcids = np.array(['A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V']) aminoAcidsArray = [Ala,Arg,Asn,Asp,Cys,Glu,Gln,Gly,His,Ile,Leu,Lys,Phe,Pro,Ser,Thr,Tyr,Val] #BASES R = ['A', 'G'] #Purine Y = ['T', 'C'] #Pyrimidine W = ['A', 'T'] S = ['G', 'C'] M = ['A', 'C'] K = ['G', 'T'] H = ['A', 'T', 'C'] B = ['G', 'C', 'T'] V = ['G', 'A', 'C'] D = ['G', 'A', 'T'] N = ['G', 'A', 'T', 'C'] bases = np.array(['R','Y','W','S','M','K','H','B','V','D']) basesArray = [R, Y, W, S, M, K, H, B, V, D] #Check length of DNA Sequence def CheckSequenceLen(sequence): lenSeq = len(sequence) msg = "DNA sequence is mutated or length of DNA sequence is invalid" if lenSeq%3 == 0: return "done" else: return msg #Check start and end codons def StartEndCodonCheck(sequence, direction): if direction == "F": startCodon = sequence[0:3].lower() stopCodon = sequence[-3:].lower() elif direction == "R": startCodon = sequence[-3:][::-1].lower() stopCodon = sequence[0:3][::-1].lower() if stopCodon == "tag" or stopCodon == "tga" or stopCodon == "taa": if startCodon == "atg": return "1" else: return "Invalid Start Codon" else: return "Invalid Stop Codon" #Checking the compatibilty of seq2modify and recogSiteCode when 'N' is present #in recogSiteCode def NInRecogSiteCompare(seq2modify, recogSiteCode): x = 0 for i in recogSiteCode: if seq2modify[x] == i or i == 'N' or i in bases: x = x + 1 else: return 0 break if x == len(recogSiteCode): return 1 def reverseStr(sequence, direction): if direction == 'F': sequence = sequence elif direction == 'R': if sequence != 'Could not modify sequence': sequence = sequence[::-1] return sequence def recogSiteModification(sequence, resEnzIndex, direction): sequence = reverseStr(sequence, direction) resEnz = resEnzyme[resEnzIndex] recogSiteCode = recogSite[resEnzIndex] #print(recogSiteCode) lenSequence = len(sequence) lenRecogSite = len(recogSiteCode) origSequence = sequence Xs = [] #Loop through the gene sequence for x in range(lenSequence - lenRecogSite + 1): recogSiteCode = recogSite[resEnzIndex] seq2modify = sequence[x:x+lenRecogSite].upper() for i in range(lenRecogSite): if recogSiteCode[i] == seq2modify[i]: continue else: if recogSiteCode[i] in bases: baseIndex = np.where(bases==recogSiteCode[i])[0][0] for j in basesArray[baseIndex]: if j == seq2modify[i]: recogSiteCode = recogSiteCode[:i]+ seq2modify[i] + recogSiteCode[i+1:] break else: break #Compare the modified recognition code with the section of the sequence if seq2modify.lower() == recogSiteCode.lower(): if lenRecogSite == 3: if x%3 == 1: for c in aminoAcidsArray: if seq2modify in c: for d in c: if d == seq2modify[:-1]: continue else: sequence = sequence[:x] + d + sequence[x+3:] Xs.append(x) break else: continue elif lenRecogSite == 4: #NB: * shows the start and end of the restriction enzyme recognition site #ATG|C*AG|CT*C|TAG if x%3 == 1: continue #ATG|CA*A|GCT*|TAG elif x%3 == 2: for a in aminoAcidsArray: if seq2modify[1:] in a: for b in a: if b == seq2modify[1:]: continue else: sequence = sequence[:x+1] + b + sequence[x+4:] Xs.append(x+1) break else: continue #ATG|*AGC|T*CG|TAG else: for c in aminoAcidsArray: if seq2modify[:-1] in c: for d in c: if d == seq2modify[:-1]: continue else: sequence = sequence[:x] + d + sequence[x+3:] Xs.append(x) break else: continue else: #ATG|C*GG|ATC*|CGA|TAG if x%3 == 1: for a in aminoAcidsArray: if seq2modify[2:5] in a: for b in a: if b == seq2modify[2:5]: continue else: sequence = sequence[:x+2] + b + sequence[x+5:] Xs.append(x+2) break else: continue #ATG|CA*G|GAT|C*TG|TAG elif x%3 == 2: for c in aminoAcidsArray: if seq2modify[1:4] in c: for d in c: if d == seq2modify[1:4]: continue else: sequence = sequence[:x+1] + d + sequence[x+4:] Xs.append(x+1) break else: continue #ATG|*GGA|TC*G|CGA|TAG elif x%3 == 0: for e in aminoAcidsArray: if seq2modify[:3] in e: for f in e: if f == seq2modify[:3]: continue else: sequence = sequence[:x] + f + sequence[x+3:] Xs.append(x) break else: continue #Special Case when there are Ns in the recognition site elif NInRecogSiteCompare(seq2modify, recogSiteCode) == 1: subOrigSeq = sequence countOfNs = recogSiteCode.count('N') firstNPos = recogSiteCode.find('N') lastNPos = recogSiteCode.rfind('N') for i in range(lenRecogSite): if recogSiteCode[i] == seq2modify[i]: continue elif recogSiteCode[i] == 'N': recogSiteCode = recogSiteCode[:i]+ seq2modify[i] + recogSiteCode[i+1:] elif recogSiteCode[i] in bases: baseIndex = np.where(bases==recogSiteCode[i])[0][0] for j in basesArray[baseIndex]: if j == seq2modify[i]: recogSiteCode = recogSiteCode[:i]+ seq2modify[i] + recogSiteCode[i+1:] break else: break #print(recogSiteCode) if seq2modify == recogSiteCode: check = 0 if x%3 == 0: for a in aminoAcidsArray: if seq2modify[:3] in a: for b in a: if b == seq2modify[:3]: continue else: sequence = sequence[:x] + b + sequence[x+3:] check = check + 1 if firstNPos == 1: if sequence[x] == subOrigSeq[x]: sequence = subOrigSeq check = 0 else: Xs.append(x) elif firstNPos == 2: if sequence[x:x+2] == subOrigSeq[x:x+2]: sequence = subOrigSeq check = 0 else: Xs.append(x) else: Xs.append(x) break else: continue elif x%3 == 1: if firstNPos > 2: for a in aminoAcidsArray: if seq2modify[2:5] in a: for b in a: if b == seq2modify[2:5]: continue else: sequence = sequence[:x+2] + b + sequence[x+5:] check = check + 1 if firstNPos == 3: if sequence[x+2] == subOrigSeq[x+2]: sequence = subOrigSeq check = 0 else: Xs.append(x+2) elif firstNPos == 4: if sequence[x+2:x+4] == subOrigSeq[x+2:x+4]: sequence = subOrigSeq check = 0 else: Xs.append(x+2) else: Xs.append(x+2) break else: continue elif x%3 == 2: if firstNPos >1: for a in aminoAcidsArray: if seq2modify[1:4] in a: for b in a: if b == seq2modify[1:4]: continue else: sequence = sequence[:x+1] + b + sequence[x+4:] check = check + 1 if firstNPos == 2: if sequence[x+1] == subOrigSeq[x+1]: sequence = subOrigSeq check = 0 else: Xs.append(x+1) elif firstNPos == 3: if sequence[x+1:x+3] == subOrigSeq[x+1:x+3]: sequence = subOrigSeq check = 0 else: Xs.append(x+1) else: Xs.append(x+1) break else: continue #looking at the end of the seq2modify in case looking at start is not fruitful if check == 0: #print(check) LastBaseIdx = x+lenRecogSite-1 if (LastBaseIdx)%3 == 2: for a in aminoAcidsArray: if seq2modify[-3:] in a: for b in a: if b == seq2modify[-3:]: continue else: sequence = sequence[:LastBaseIdx-2] + b + sequence[LastBaseIdx+1:] if lenRecogSite -1 - lastNPos == 1: if sequence[x+lastNPos+1] == subOrigSeq[x+lastNPos+1]: sequence = subOrigSeq else: Xs.append(LastBaseIdx-2) elif lenRecogSite - 1 - lastNPos == 2: if sequence[x+lastNPos+1:x+lastNPos+3] == subOrigSeq[x+lastNPos+1:x+lastNPos+3]: sequence = subOrigSeq else: Xs.append(LastBaseIdx-2) else: Xs.append(LastBaseIdx-2) break else: continue elif (LastBaseIdx)%3 == 1: if lenRecogSite < 5: continue else: for a in aminoAcidsArray: if seq2modify[-5:-2] in a: for b in a: if b == seq2modify[-5:-2]: continue else: sequence = sequence[:LastBaseIdx-4] + b + sequence[LastBaseIdx-1:] if lenRecogSite - 1 - lastNPos < 3: sequence = subOrigSeq elif lenRecogSite - 1 - lastNPos == 3: if sequence[x+lastNPos+1] == subOrigSeq[x+lastNPos+1]: sequence = subOrigSeq else: Xs.append(LastBaseIdx-4) elif lenRecogSite - 1 - lastNPos == 4: if sequence[x+lastNPos+1:x+lastNPos+3] == subOrigSeq[x+lastNPos+1:x+lastNPos+3]: sequence = subOrigSeq else: Xs.append(LastBaseIdx-4) else: Xs.append(LastBaseIdx-4) break else: continue elif (LastBaseIdx)%3 == 0: if lenRecogSite < 4: continue else: for a in aminoAcidsArray: if seq2modify[-4:-1] in a: for b in a: if b == seq2modify[-4:-1]: continue else: sequence = sequence[:LastBaseIdx-3] + b + sequence[LastBaseIdx:] if lenRecogSite - 1 - lastNPos < 2: sequence = subOrigSeq elif lenRecogSite - 1 - lastNPos == 2: if sequence[x+lastNPos+1] == subOrigSeq[x+lastNPos+1]: sequence = subOrigSeq else: Xs.append(LastBaseIdx-3) elif lenRecogSite - 1 - lastNPos == 3: if sequence[x+lastNPos+1:x+lastNPos+3] == subOrigSeq[x+lastNPos+1:x+lastNPos+3]: sequence = subOrigSeq else: Xs.append(LastBaseIdx-3) else: Xs.append(LastBaseIdx-3) break else: continue if sequence == origSequence: sequence = "Could not modify sequence" else: for i in Xs: sequence = sequence[:i] + ' *' + sequence[i:i+3] + '* ' + sequence[i+3:] sequence = reverseStr(sequence, direction) #print(sequence) return sequence #Final def Final(*args,**kwargs): sequence = Element('seq').value direction = Element('dire').value resEnzIndex = int(Element('enzyme').value) msg = Element('output') #msg.write(CheckSequenceLen(sequence)) if CheckSequenceLen(sequence) == "done": if StartEndCodonCheck(sequence, direction) == "1": msg.write(recogSiteModification(sequence, resEnzIndex, direction)) else: msg.write(StartEndCodonCheck(sequence, direction)) else: msg.write(CheckSequenceLen(sequence))