Evolution.Py available in Github

Available on Github

import numpy as np
import matplotlib.pyplot as plt
import random

def random_AA_seq(length):
    result='M'
    for i in range(length-1):
        result = result+str(random.choice('ACDEFGHIKLMNPQRSTVWY'))
    return result

def compare_AA_seq(s1,s2):
    l = len(s1)
    c = 0
    for i in range(0,l):
        if (s1[i:i+1] == s2[i:i+1]):
            c+=1
    result = c*100/l
#   print (c,l,result)
    return result

maxpam = 10
length_sequence = 1000

aa=['A','R','N','D','C','Q','E','G','H','I','L','K','M','F','P','S','T','W','Y','V']
#print (aa)
PAM={"AA":98.67,"AR":0.01,"AN":0.04,"AD":0.06,"AC":0.01,"AQ":0.03,"AE":0.1,"AG":0.21,"AH":0.01,"AI":0.02,"AL":0.03,"AK":0.02,"AM":0.01,"AF":0.01,"AP":0.13,"AS":0.28,"AT":0.22,"AW":0,"AY":0.01,"AV":0.13,"RA":0.02000200020002,"RR":99.1399139913991,"RN":0.01000100010001,"RD":0,"RC":0.01000100010001,"RQ":0.09000900090009,"RE":0,"RG":0.01000100010001,"RH":0.08000800080008,"RI":0.02000200020002,"RL":0.01000100010001,"RK":0.37003700370037,"RM":0.01000100010001,"RF":0.01000100010001,"RP":0.05000500050005,"RS":0.11001100110011,"RT":0.02000200020002,"RW":0.02000200020002,"RY":0,"RV":0.02000200020002,"NA":0.09,"NR":0.01,"NN":98.22,"ND":0.42,"NC":0,"NQ":0.04,"NE":0.07,"NG":0.12,"NH":0.18,"NI":0.03,"NL":0.03,"NK":0.25,"NM":0,"NF":0.01,"NP":0.02,"NS":0.34,"NT":0.13,"NW":0,"NY":0.03,"NV":0.01,"DA":0.1,"DR":0,"DN":0.36,"DD":98.59,"DC":0,"DQ":0.05,"DE":0.56,"DG":0.11,"DH":0.03,"DI":0.01,"DL":0,"DK":0.06,"DM":0,"DF":0,"DP":0.01,"DS":0.07,"DT":0.04,"DW":0,"DY":0,"DV":0.01,"CA":0.03,"CR":0.01,"CN":0,"CD":0,"CC":99.73,"CQ":0,"CE":0,"CG":0.01,"CH":0.01,"CI":0.02,"CL":0,"CK":0,"CM":0,"CF":0,"CP":0.01,"CS":0.11,"CT":0.01,"CW":0,"CY":0.03,"CV":0.03,"QA":0.08,"QR":0.1,"QN":0.04,"QD":0.06,"QC":0,"QQ":98.76,"QE":0.35,"QG":0.03,"QH":0.2,"QI":0.01,"QL":0.06,"QK":0.12,"QM":0.02,"QF":0,"QP":0.08,"QS":0.04,"QT":0.03,"QW":0,"QY":0,"QV":0.02,"EA":0.17,"ER":0,"EN":0.06,"ED":0.53,"EC":0,"EQ":0.27,"EE":98.65,"EG":0.07,"EH":0.01,"EI":0.02,"EL":0.01,"EK":0.07,"EM":0,"EF":0,"EP":0.03,"ES":0.06,"ET":0.02,"EW":0,"EY":0.01,"EV":0.02,"GA":0.21,"GR":0,"GN":0.06,"GD":0.06,"GC":0,"GQ":0.01,"GE":0.04,"GG":99.35,"GH":0,"GI":0,"GL":0.01,"GK":0.02,"GM":0,"GF":0.01,"GP":0.02,"GS":0.16,"GT":0.02,"GW":0,"GY":0,"GV":0.03,"HA":0.02000200020002,"HR":0.1000100010001,"HN":0.21002100210021,"HD":0.04000400040004,"HC":0.01000100010001,"HQ":0.23002300230023,"HE":0.02000200020002,"HG":0.01000100010001,"HH":99.1299129912991,"HI":0,"HL":0.04000400040004,"HK":0.02000200020002,"HM":0,"HF":0.02000200020002,"HP":0.05000500050005,"HS":0.02000200020002,"HT":0.01000100010001,"HW":0,"HY":0.04000400040004,"HV":0.03000300030003,"IA":0.05999400059994,"IR":0.02999700029997,"IN":0.02999700029997,"ID":0.00999900009999,"IC":0.00999900009999,"IQ":0.00999900009999,"IE":0.02999700029997,"IG":0,"IH":0,"II":98.7101289871013,"IL":0.21997800219978,"IK":0.03999600039996,"IM":0.04999500049995,"IF":0.07999200079992,"IP":0.00999900009999,"IS":0.01999800019998,"IT":0.10998900109989,"IW":0,"IY":0.00999900009999,"IV":0.56994300569943,"LA":0.04,"LR":0.01,"LN":0.01,"LD":0,"LC":0,"LQ":0.03,"LE":0.01,"LG":0.01,"LH":0.01,"LI":0.09,"LL":99.47,"LK":0.01,"LM":0.08,"LF":0.06,"LP":0.02,"LS":0.01,"LT":0.02,"LW":0,"LY":0.01,"LV":0.11,"KA":0.01999600079984,"KR":0.18996200759848,"KN":0.12997400519896,"KD":0.02999400119976,"KC":0,"KQ":0.0599880023995201,"KE":0.0399920015996801,"KG":0.01999600079984,"KH":0.00999800039992002,"KI":0.01999600079984,"KL":0.01999600079984,"KK":99.2401519696061,"KM":0.0399920015996801,"KF":0,"KP":0.01999600079984,"KS":0.0699860027994401,"KT":0.0799840031993601,"KW":0,"KY":0,"KV":0.00999800039992002,"MA":0.06000600060006,"MR":0.04000400040004,"MN":0,"MD":0,"MC":0,"MQ":0.04000400040004,"ME":0.01000100010001,"MG":0.01000100010001,"MH":0,"MI":0.12001200120012,"ML":0.45004500450045,"MK":0.2000200020002,"MM":98.7498749874988,"MF":0.04000400040004,"MP":0.01000100010001,"MS":0.04000400040004,"MT":0.06000600060006,"MW":0,"MY":0,"MV":0.17001700170017,"FA":0.01999600079984,"FR":0.00999800039992002,"FN":0.00999800039992002,"FD":0,"FC":0,"FQ":0,"FE":0,"FG":0.00999800039992002,"FH":0.01999600079984,"FI":0.0699860027994401,"FL":0.12997400519896,"FK":0,"FM":0.00999800039992002,"FF":99.4401119776045,"FP":0.00999800039992002,"FS":0.02999400119976,"FT":0.00999800039992002,"FW":0.00999800039992002,"FY":0.20995800839832,"FV":0.00999800039992002,"PA":0.21995600879824,"PR":0.0399920015996801,"PN":0.01999600079984,"PD":0.00999800039992002,"PC":0.00999800039992002,"PQ":0.0599880023995201,"PE":0.02999400119976,"PG":0.02999400119976,"PH":0.02999400119976,"PI":0,"PL":0.02999400119976,"PK":0.02999400119976,"PM":0,"PF":0,"PP":99.2401519696061,"PS":0.16996600679864,"PT":0.0499900019996001,"PW":0,"PY":0,"PV":0.02999400119976,"SA":0.35,"SR":0.06,"SN":0.2,"SD":0.05,"SC":0.05,"SQ":0.02,"SE":0.04,"SG":0.21,"SH":0.01,"SI":0.01,"SL":0.01,"SK":0.08,"SM":0.01,"SF":0.02,"SP":0.12,"SS":98.4,"ST":0.32,"SW":0.01,"SY":0.01,"SV":0.02,"TA":0.31993601279744,"TR":0.00999800039992002,"TN":0.0899820035992801,"TD":0.02999400119976,"TC":0.00999800039992002,"TQ":0.01999600079984,"TE":0.01999600079984,"TG":0.02999400119976,"TH":0.00999800039992002,"TI":0.0699860027994401,"TL":0.02999400119976,"TK":0.10997800439912,"TM":0.01999600079984,"TF":0.00999800039992002,"TP":0.0399920015996801,"TS":0.379924015196961,"TT":98.6902619476105,"TW":0,"TY":0.00999800039992002,"TV":0.0999800039992002,"WA":0,"WR":0.08,"WN":0.01,"WD":0,"WC":0,"WQ":0,"WE":0,"WG":0,"WH":0.01,"WI":0,"WL":0.04,"WK":0,"WM":0,"WF":0.03,"WP":0,"WS":0.05,"WT":0,"WW":99.76,"WY":0.02,"WV":0,"YA":0.02000400080016,"YR":0,"YN":0.0400080016003201,"YD":0,"YC":0.03000600120024,"YQ":0,"YE":0.01000200040008,"YG":0,"YH":0.0400080016003201,"YI":0.01000200040008,"YL":0.02000400080016,"YK":0.01000200040008,"YM":0,"YF":0.28005601120224,"YP":0,"YS":0.02000400080016,"YT":0.02000400080016,"YW":0.01000200040008,"YY":99.4698939787958,"YV":0.02000400080016,"VA":0.18,"VR":0.01,"VN":0.01,"VD":0.01,"VC":0.02,"VQ":0.01,"VE":0.02,"VG":0.05,"VH":0.01,"VI":0.33,"VL":0.15,"VK":0.01,"VM":0.04,"VF":0,"VP":0.02,"VS":0.02,"VT":0.09,"VW":0,"VY":0.01,"VV":99.01}
#print(PAM,sep="\n")    
x_ax=[]
y_ax=[]
yy_ax=[]

sequence = random_AA_seq(length_sequence)
print ("Evolution\n")
print (sequence)
#print ("PAM\tidentity\t\t%4d\n" % (len(sequence)))
sequence_start = sequence

for pg in range(1,maxpam+1):
    evolution=""
    for aaa in range(0,length_sequence):
        found=0
        sum=0
        r = random.random()*100
        #print (sequence[aaa:aaa+1])
        random.shuffle(aa)
        evolved=""
        #print(aa)
        for aminoacid in aa:
            #print (sequence[aaa],aminoacid,PAM[sequence[aaa]+aminoacid])
            sum += PAM[sequence[aaa]+aminoacid]
            # maximum is 100, generate random number
            if(sum>r and found==0):
                found=1
                evolved=aminoacid
            #print (r,sum,evolved)
        evolution = evolution + evolved 
    print (evolution)
    difference=compare_AA_seq(sequence_start,evolution)    
    print (pg,difference)
    sequence = evolution
    if (pg%50 == 0):
        print (r'.',end="",flush=True)
    x_ax.append(pg)
    y_ax.append(difference)
    yy_ax.append(100-difference)

    
#similarity plot
plt.figure('Evolution')
plt.title(r'% Identity as a function of PAM')
plt.xlabel(r'PAM')
plt.ylabel(r'% identity')
plt.xlim(1,maxpam)
plt.ylim(0,100)
plt.scatter(x_ax, y_ax)
plt.plot(x_ax, np.full(maxpam,5),'r--')
plt.annotate('random protein', xy=(25,7), xytext=(25,7),color='red')
plt.show()
'''
#distance plot    
plt.figure('Evolution')
plt.title(r'% Distance as a function of PAM')
plt.xlabel(r'PAM')
plt.ylabel(r'% distance')
plt.xlim(1,350)
plt.ylim(0,100)
plt.scatter(x_ax, yy_ax)
plt.show()
'''