Tuesday, November 18, 2008

Bioperl in action

Reads a FASTA file with multiple sequences (delimited by >) then print seq of length greater than 6000.

1 #!/usr/bin/perl
2 #
3 # print seq of length greater than 6000
4 #
5 #
6
7 use Bio::SeqIO;
8
9 unless ($#ARGV+1 == 1) { die "Need one argument\n"; }
10
11 my $infile = shift;
12
13 # !!!! use => not ->
14 my $file_obj = Bio::SeqIO->new(-file=>$infile,-formatr=>"Fasta");
15 my $cutoff = 6000;
16
17 while (my $seq_obj = $file_obj->next_seq() ) {
18 my $id = $seq_obj->id();
19 my $desc = $seq_obj->desc();
20 my $len = $seq_obj->length();
21
22 if ($len > $cutoff) {
23 # !!!!!!!!!!!! note: this prints reference
24 #print "$seq_obj->id()\t$seq_obj->desc()\n";
25
26 print "ID=$id\tDESC=$desc\tLEN=$len\n";
27 }
28 }
29
30

No comments: