Available on Github
01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | import numpy as np import matplotlib.pyplot as plt import random def random_AA_seq(length): result = 'M' for i in range (length - 1 ): result = result + str (random.choice( 'ACDEFGHIKLMNPQRSTVWY' )) return result def compare_AA_seq(s1,s2): l = len (s1) c = 0 for i in range ( 0 ,l): if (s1[i:i + 1 ] = = s2[i:i + 1 ]): c + = 1 result = c * 100 / l # print (c,l,result) return result maxpam = 10 length_sequence = 1000 aa = [ 'A' , 'R' , 'N' , 'D' , 'C' , 'Q' , 'E' , 'G' , 'H' , 'I' , 'L' , 'K' , 'M' , 'F' , 'P' , 'S' , 'T' , 'W' , 'Y' , 'V' ] #print (aa) PAM = { "AA" : 98.67 , "AR" : 0.01 , "AN" : 0.04 , "AD" : 0.06 , "AC" : 0.01 , "AQ" : 0.03 , "AE" : 0.1 , "AG" : 0.21 , "AH" : 0.01 , "AI" : 0.02 , "AL" : 0.03 , "AK" : 0.02 , "AM" : 0.01 , "AF" : 0.01 , "AP" : 0.13 , "AS" : 0.28 , "AT" : 0.22 , "AW" : 0 , "AY" : 0.01 , "AV" : 0.13 , "RA" : 0.02000200020002 , "RR" : 99.1399139913991 , "RN" : 0.01000100010001 , "RD" : 0 , "RC" : 0.01000100010001 , "RQ" : 0.09000900090009 , "RE" : 0 , "RG" : 0.01000100010001 , "RH" : 0.08000800080008 , "RI" : 0.02000200020002 , "RL" : 0.01000100010001 , "RK" : 0.37003700370037 , "RM" : 0.01000100010001 , "RF" : 0.01000100010001 , "RP" : 0.05000500050005 , "RS" : 0.11001100110011 , "RT" : 0.02000200020002 , "RW" : 0.02000200020002 , "RY" : 0 , "RV" : 0.02000200020002 , "NA" : 0.09 , "NR" : 0.01 , "NN" : 98.22 , "ND" : 0.42 , "NC" : 0 , "NQ" : 0.04 , "NE" : 0.07 , "NG" : 0.12 , "NH" : 0.18 , "NI" : 0.03 , "NL" : 0.03 , "NK" : 0.25 , "NM" : 0 , "NF" : 0.01 , "NP" : 0.02 , "NS" : 0.34 , "NT" : 0.13 , "NW" : 0 , "NY" : 0.03 , "NV" : 0.01 , "DA" : 0.1 , "DR" : 0 , "DN" : 0.36 , "DD" : 98.59 , "DC" : 0 , "DQ" : 0.05 , "DE" : 0.56 , "DG" : 0.11 , "DH" : 0.03 , "DI" : 0.01 , "DL" : 0 , "DK" : 0.06 , "DM" : 0 , "DF" : 0 , "DP" : 0.01 , "DS" : 0.07 , "DT" : 0.04 , "DW" : 0 , "DY" : 0 , "DV" : 0.01 , "CA" : 0.03 , "CR" : 0.01 , "CN" : 0 , "CD" : 0 , "CC" : 99.73 , "CQ" : 0 , "CE" : 0 , "CG" : 0.01 , "CH" : 0.01 , "CI" : 0.02 , "CL" : 0 , "CK" : 0 , "CM" : 0 , "CF" : 0 , "CP" : 0.01 , "CS" : 0.11 , "CT" : 0.01 , "CW" : 0 , "CY" : 0.03 , "CV" : 0.03 , "QA" : 0.08 , "QR" : 0.1 , "QN" : 0.04 , "QD" : 0.06 , "QC" : 0 , "QQ" : 98.76 , "QE" : 0.35 , "QG" : 0.03 , "QH" : 0.2 , "QI" : 0.01 , "QL" : 0.06 , "QK" : 0.12 , "QM" : 0.02 , "QF" : 0 , "QP" : 0.08 , "QS" : 0.04 , "QT" : 0.03 , "QW" : 0 , "QY" : 0 , "QV" : 0.02 , "EA" : 0.17 , "ER" : 0 , "EN" : 0.06 , "ED" : 0.53 , "EC" : 0 , "EQ" : 0.27 , "EE" : 98.65 , "EG" : 0.07 , "EH" : 0.01 , "EI" : 0.02 , "EL" : 0.01 , "EK" : 0.07 , "EM" : 0 , "EF" : 0 , "EP" : 0.03 , "ES" : 0.06 , "ET" : 0.02 , "EW" : 0 , "EY" : 0.01 , "EV" : 0.02 , "GA" : 0.21 , "GR" : 0 , "GN" : 0.06 , "GD" : 0.06 , "GC" : 0 , "GQ" : 0.01 , "GE" : 0.04 , "GG" : 99.35 , "GH" : 0 , "GI" : 0 , "GL" : 0.01 , "GK" : 0.02 , "GM" : 0 , "GF" : 0.01 , "GP" : 0.02 , "GS" : 0.16 , "GT" : 0.02 , "GW" : 0 , "GY" : 0 , "GV" : 0.03 , "HA" : 0.02000200020002 , "HR" : 0.1000100010001 , "HN" : 0.21002100210021 , "HD" : 0.04000400040004 , "HC" : 0.01000100010001 , "HQ" : 0.23002300230023 , "HE" : 0.02000200020002 , "HG" : 0.01000100010001 , "HH" : 99.1299129912991 , "HI" : 0 , "HL" : 0.04000400040004 , "HK" : 0.02000200020002 , "HM" : 0 , "HF" : 0.02000200020002 , "HP" : 0.05000500050005 , "HS" : 0.02000200020002 , "HT" : 0.01000100010001 , "HW" : 0 , "HY" : 0.04000400040004 , "HV" : 0.03000300030003 , "IA" : 0.05999400059994 , "IR" : 0.02999700029997 , "IN" : 0.02999700029997 , "ID" : 0.00999900009999 , "IC" : 0.00999900009999 , "IQ" : 0.00999900009999 , "IE" : 0.02999700029997 , "IG" : 0 , "IH" : 0 , "II" : 98.7101289871013 , "IL" : 0.21997800219978 , "IK" : 0.03999600039996 , "IM" : 0.04999500049995 , "IF" : 0.07999200079992 , "IP" : 0.00999900009999 , "IS" : 0.01999800019998 , "IT" : 0.10998900109989 , "IW" : 0 , "IY" : 0.00999900009999 , "IV" : 0.56994300569943 , "LA" : 0.04 , "LR" : 0.01 , "LN" : 0.01 , "LD" : 0 , "LC" : 0 , "LQ" : 0.03 , "LE" : 0.01 , "LG" : 0.01 , "LH" : 0.01 , "LI" : 0.09 , "LL" : 99.47 , "LK" : 0.01 , "LM" : 0.08 , "LF" : 0.06 , "LP" : 0.02 , "LS" : 0.01 , "LT" : 0.02 , "LW" : 0 , "LY" : 0.01 , "LV" : 0.11 , "KA" : 0.01999600079984 , "KR" : 0.18996200759848 , "KN" : 0.12997400519896 , "KD" : 0.02999400119976 , "KC" : 0 , "KQ" : 0.0599880023995201 , "KE" : 0.0399920015996801 , "KG" : 0.01999600079984 , "KH" : 0.00999800039992002 , "KI" : 0.01999600079984 , "KL" : 0.01999600079984 , "KK" : 99.2401519696061 , "KM" : 0.0399920015996801 , "KF" : 0 , "KP" : 0.01999600079984 , "KS" : 0.0699860027994401 , "KT" : 0.0799840031993601 , "KW" : 0 , "KY" : 0 , "KV" : 0.00999800039992002 , "MA" : 0.06000600060006 , "MR" : 0.04000400040004 , "MN" : 0 , "MD" : 0 , "MC" : 0 , "MQ" : 0.04000400040004 , "ME" : 0.01000100010001 , "MG" : 0.01000100010001 , "MH" : 0 , "MI" : 0.12001200120012 , "ML" : 0.45004500450045 , "MK" : 0.2000200020002 , "MM" : 98.7498749874988 , "MF" : 0.04000400040004 , "MP" : 0.01000100010001 , "MS" : 0.04000400040004 , "MT" : 0.06000600060006 , "MW" : 0 , "MY" : 0 , "MV" : 0.17001700170017 , "FA" : 0.01999600079984 , "FR" : 0.00999800039992002 , "FN" : 0.00999800039992002 , "FD" : 0 , "FC" : 0 , "FQ" : 0 , "FE" : 0 , "FG" : 0.00999800039992002 , "FH" : 0.01999600079984 , "FI" : 0.0699860027994401 , "FL" : 0.12997400519896 , "FK" : 0 , "FM" : 0.00999800039992002 , "FF" : 99.4401119776045 , "FP" : 0.00999800039992002 , "FS" : 0.02999400119976 , "FT" : 0.00999800039992002 , "FW" : 0.00999800039992002 , "FY" : 0.20995800839832 , "FV" : 0.00999800039992002 , "PA" : 0.21995600879824 , "PR" : 0.0399920015996801 , "PN" : 0.01999600079984 , "PD" : 0.00999800039992002 , "PC" : 0.00999800039992002 , "PQ" : 0.0599880023995201 , "PE" : 0.02999400119976 , "PG" : 0.02999400119976 , "PH" : 0.02999400119976 , "PI" : 0 , "PL" : 0.02999400119976 , "PK" : 0.02999400119976 , "PM" : 0 , "PF" : 0 , "PP" : 99.2401519696061 , "PS" : 0.16996600679864 , "PT" : 0.0499900019996001 , "PW" : 0 , "PY" : 0 , "PV" : 0.02999400119976 , "SA" : 0.35 , "SR" : 0.06 , "SN" : 0.2 , "SD" : 0.05 , "SC" : 0.05 , "SQ" : 0.02 , "SE" : 0.04 , "SG" : 0.21 , "SH" : 0.01 , "SI" : 0.01 , "SL" : 0.01 , "SK" : 0.08 , "SM" : 0.01 , "SF" : 0.02 , "SP" : 0.12 , "SS" : 98.4 , "ST" : 0.32 , "SW" : 0.01 , "SY" : 0.01 , "SV" : 0.02 , "TA" : 0.31993601279744 , "TR" : 0.00999800039992002 , "TN" : 0.0899820035992801 , "TD" : 0.02999400119976 , "TC" : 0.00999800039992002 , "TQ" : 0.01999600079984 , "TE" : 0.01999600079984 , "TG" : 0.02999400119976 , "TH" : 0.00999800039992002 , "TI" : 0.0699860027994401 , "TL" : 0.02999400119976 , "TK" : 0.10997800439912 , "TM" : 0.01999600079984 , "TF" : 0.00999800039992002 , "TP" : 0.0399920015996801 , "TS" : 0.379924015196961 , "TT" : 98.6902619476105 , "TW" : 0 , "TY" : 0.00999800039992002 , "TV" : 0.0999800039992002 , "WA" : 0 , "WR" : 0.08 , "WN" : 0.01 , "WD" : 0 , "WC" : 0 , "WQ" : 0 , "WE" : 0 , "WG" : 0 , "WH" : 0.01 , "WI" : 0 , "WL" : 0.04 , "WK" : 0 , "WM" : 0 , "WF" : 0.03 , "WP" : 0 , "WS" : 0.05 , "WT" : 0 , "WW" : 99.76 , "WY" : 0.02 , "WV" : 0 , "YA" : 0.02000400080016 , "YR" : 0 , "YN" : 0.0400080016003201 , "YD" : 0 , "YC" : 0.03000600120024 , "YQ" : 0 , "YE" : 0.01000200040008 , "YG" : 0 , "YH" : 0.0400080016003201 , "YI" : 0.01000200040008 , "YL" : 0.02000400080016 , "YK" : 0.01000200040008 , "YM" : 0 , "YF" : 0.28005601120224 , "YP" : 0 , "YS" : 0.02000400080016 , "YT" : 0.02000400080016 , "YW" : 0.01000200040008 , "YY" : 99.4698939787958 , "YV" : 0.02000400080016 , "VA" : 0.18 , "VR" : 0.01 , "VN" : 0.01 , "VD" : 0.01 , "VC" : 0.02 , "VQ" : 0.01 , "VE" : 0.02 , "VG" : 0.05 , "VH" : 0.01 , "VI" : 0.33 , "VL" : 0.15 , "VK" : 0.01 , "VM" : 0.04 , "VF" : 0 , "VP" : 0.02 , "VS" : 0.02 , "VT" : 0.09 , "VW" : 0 , "VY" : 0.01 , "VV" : 99.01 } #print(PAM,sep="\n") x_ax = [] y_ax = [] yy_ax = [] sequence = random_AA_seq(length_sequence) print ( "Evolution\n" ) print (sequence) #print ("PAM\tidentity\t\t%4d\n" % (len(sequence))) sequence_start = sequence for pg in range ( 1 ,maxpam + 1 ): evolution = "" for aaa in range ( 0 ,length_sequence): found = 0 sum = 0 r = random.random() * 100 #print (sequence[aaa:aaa+1]) random.shuffle(aa) evolved = "" #print(aa) for aminoacid in aa: #print (sequence[aaa],aminoacid,PAM[sequence[aaa]+aminoacid]) sum + = PAM[sequence[aaa] + aminoacid] # maximum is 100, generate random number if ( sum >r and found = = 0 ): found = 1 evolved = aminoacid #print (r,sum,evolved) evolution = evolution + evolved print (evolution) difference = compare_AA_seq(sequence_start,evolution) print (pg,difference) sequence = evolution if (pg % 50 = = 0 ): print (r '.' ,end = "",flush = True ) x_ax.append(pg) y_ax.append(difference) yy_ax.append( 100 - difference) #similarity plot plt.figure( 'Evolution' ) plt.title(r '% Identity as a function of PAM' ) plt.xlabel(r 'PAM' ) plt.ylabel(r '% identity' ) plt.xlim( 1 ,maxpam) plt.ylim( 0 , 100 ) plt.scatter(x_ax, y_ax) plt.plot(x_ax, np.full(maxpam, 5 ), 'r--' ) plt.annotate( 'random protein' , xy = ( 25 , 7 ), xytext = ( 25 , 7 ),color = 'red' ) plt.show() ''' #distance plot plt.figure('Evolution') plt.title(r'% Distance as a function of PAM') plt.xlabel(r'PAM') plt.ylabel(r'% distance') plt.xlim(1,350) plt.ylim(0,100) plt.scatter(x_ax, yy_ax) plt.show() ''' |
You must be logged in to post a comment.