Python code to take in two files, a pfam lookup table and a domain-domain interaction file.
#!/usr/bin/python
# loadin loopkup table
f = open("pfam-table.txt", "r")
pfam = {}
while True:
line = f.readline()
if (not line):
break
line = line.rstrip()
cols = line.split()
if (len(cols) < 2):
break
pfam[cols[0]] = cols[1]
f.close()
# load data to normalize
f = open("tmp2-clean.txt", "r")
dict = {}
while True:
line = f.readline()
if (not line):
break
line = line.rstrip()
cols = line.split()
if (len(cols) < 5):
break
d1 = pfam[cols[0]] #domain1
p1 = cols[1] #protein1
int = cols[2]
d2 = pfam[cols[3]]
p2 = cols[4]
key = p1+'-'+p2
key2 = p2+'-'+p1
if dict.has_key(key):
dict[key]=dict[key]+d1+'-'+d2+';'
elif dict.has_key(key2):
dict[key2]=dict[key2]+d1+'-'+d2+';'
else:
dict[key]=d1+'-'+d2+';'
f.close()
for e in dict.items():
srcdest = e[0].split('-')
print srcdest[0], srcdest[1], len(e[1].split(';'))-1, e[1]
No comments:
Post a Comment