#!/usr/bin/Rscript

# instead of readFASTA, full Biostrings library is not neccessary here
read_fasta_sequence_from_file <- function(file) paste(grep("^[^>]", readLines(file), value=TRUE), collapse="")

# split the sequence into 3-nucleotide-triples, optionally shifted (for different reading frames)
read_codons <- function(sequence, shift=0) sapply(seq(1+shift,nchar(sequence), 3), function(x) substr(sequence, x, x+2))

DNA_stop_codons <- c("TAA", "TAG", "TGA")  # RNA's UAA, UAG and UGA stop codons encoded in DNA
rDNA_stop_codons <- c("TTA", "CTA", "TCA") # reverse complement of each DNA encoded stop codons

DNA <- read_fasta_sequence_from_file("04_orf.fasta")
# DNA <- readFASTA("04_orf.fasta", strip.descs=TRUE)[[1]]$seq # if you want to use Biostrings instead
for(shift in 0:2)
{
	frame <- read_codons(DNA, shift)
	if(!sum(DNA_stop_codons %in% frame))
		cat("Open Reading Frame ( found in DNA, shift:", shift, ")\n", paste(frame), "\n")
	if(!sum(rDNA_stop_codons %in% frame))
		cat("Open Reading Frame ( found in reverse compliment of DNA, shift:", shift, ")\n", paste(frame), "\n")
}