#!/usr/bin/perl # Identify all possible open reading frames # Used Only for .fna files # Used Only for genome dna # Initialize variables my $dna = ''; my $revcom = ''; my $protein = ''; my $numorf=0; my $okorf=0; my $tv1=0; my $tv2=0; my $fname=''; # parameter input print "STOP code (presented by _ ) will not be included in the LENGTH of a protein.\n\n"; print "Please Enter the Threshold Value for Storing the Protein,\nThe Max (included) : "; $tv2=<>; chop($tv2); print "The Min (included) : "; $tv1=<>; chop($tv1); if ($tv1>=$tv2) {print"Wrong Threshold Value.\n";exit;} print "Please Enter the File Name for Analysis : "; $dna = sequence_from_data(); $revcom = revcom($dna); my $resultfile="ORF_".$fname.".txt"; open(RESULT,">$resultfile"); # core code for ORF finding with anti-sense strand for (my $frame=0;$frame<3;$frame++){ for (my $i=$frame;$i\n"; my $a=length($protein)-1; my $b=$i+3*$a; print_sequence($protein,10,6); print "\n"; if( $a <= $tv2 && $a >= $tv1 ){ $okorf++; print RESULT "\n+-----ORF of Sense Strand $okorf#----->\n"; print RESULT "> Length(not STOP codes) = $a.\n"; print RESULT "> Sense Strand Position frome $i to $b (not include STOP code).\n"; print RESULT "$protein"; print RESULT "\n"; } $i=$b; } } } } for (my $frame=0;$frame<3;$frame++){ for (my $i=$frame;$i= $tv1 ){ $okorf++; print RESULT "\n<--ORF of Anti-Sense Strand $okorf#--+\n"; print RESULT "> Length(include STOP codes) = $a.\n"; print RESULT "> Anti-Sense Strand Position frome $i to $b (not include STOP code).\n"; print RESULT "$protein"; print RESULT "\n"; } $i=$b; } } } } print RESULT "\n\nEnd!"; close (RESULT); exit; sub sequence_from_data { $fname=<>; chop($fname); open(FH, $fname); local $/; my $data = ; $data =~ s/\>.+?\n//; $data =~ s/\n//g; close(FH); return $data; } sub revcom { my($dna) = @_; my $revcom = reverse $dna; $revcom =~ tr/ACGTacgt/TGCAtgca/; return $revcom; } sub dna2peptide { my($dna) = @_; my $protein = ''; my $codon2aa = ''; for(my $i=0; $i < (length($dna) - 2) ; $i += 3) { $codon2aa=codon2aa( substr($dna,$i,3) ); $protein .= $codon2aa; if($codon2aa eq '_'){ return $protein; exit;} } return '0'; } sub codon2aa { my($codon) = @_; $codon = uc $codon; my(%genetic_code) = ( 'TCA' => 'S', # Serine 'TCC' => 'S', # Serine 'TCG' => 'S', # Serine 'TCT' => 'S', # Serine 'TTC' => 'F', # Phenylalanine 'TTT' => 'F', # Phenylalanine 'TTA' => 'L', # Leucine 'TTG' => 'L', # Leucine 'TAC' => 'Y', # Tyrosine 'TAT' => 'Y', # Tyrosine 'TAA' => '_', # Stop 'TAG' => '_', # Stop 'TGC' => 'C', # Cysteine 'TGT' => 'C', # Cysteine 'TGA' => '_', # Stop 'TGG' => 'W', # Tryptophan 'CTA' => 'L', # Leucine 'CTC' => 'L', # Leucine 'CTG' => 'L', # Leucine 'CTT' => 'L', # Leucine 'CCA' => 'P', # Proline 'CCC' => 'P', # Proline 'CCG' => 'P', # Proline 'CCT' => 'P', # Proline 'CAC' => 'H', # Histidine 'CAT' => 'H', # Histidine 'CAA' => 'Q', # Glutamine 'CAG' => 'Q', # Glutamine 'CGA' => 'R', # Arginine 'CGC' => 'R', # Arginine 'CGG' => 'R', # Arginine 'CGT' => 'R', # Arginine 'ATA' => 'I', # Isoleucine 'ATC' => 'I', # Isoleucine 'ATT' => 'I', # Isoleucine 'ATG' => 'M', # Methionine 'ACA' => 'T', # Threonine 'ACC' => 'T', # Threonine 'ACG' => 'T', # Threonine 'ACT' => 'T', # Threonine 'AAC' => 'N', # Asparagine 'AAT' => 'N', # Asparagine 'AAA' => 'K', # Lysine 'AAG' => 'K', # Lysine 'AGC' => 'S', # Serine 'AGT' => 'S', # Serine 'AGA' => 'R', # Arginine 'AGG' => 'R', # Arginine 'GTA' => 'V', # Valine 'GTC' => 'V', # Valine 'GTG' => 'V', # Valine 'GTT' => 'V', # Valine 'GCA' => 'A', # Alanine 'GCC' => 'A', # Alanine 'GCG' => 'A', # Alanine 'GCT' => 'A', # Alanine 'GAC' => 'D', # Aspartic Acid 'GAT' => 'D', # Aspartic Acid 'GAA' => 'E', # Glutamic Acid 'GAG' => 'E', # Glutamic Acid 'GGA' => 'G', # Glycine 'GGC' => 'G', # Glycine 'GGG' => 'G', # Glycine 'GGT' => 'G', # Glycine ); if(exists $genetic_code{$codon}) { return $genetic_code{$codon}; }else{ print "Bad codon \"$codon\"!\n"; exit; } } sub print_sequence { my($sequence, $string, $group) = @_; for ( my $pos = 0 ; $pos < length($sequence) ; ) { for (my $b=0;$b<$group;$pos+=$string){ print substr($sequence, $pos, $string), " "; $b++;} print "\n"; } } sub checkORF { return 1; }