#!/usr/bin/Rscript # instead of readFASTA, full Biostrings library is not neccessary here read_fasta_sequence_from_file <- function(file) paste(grep("^[^>]", readLines(file), value=TRUE), collapse="") # split the sequence into 3-nucleotide-triples, optionally shifted (for different reading frames) read_codons <- function(sequence, shift=0) sapply(seq(1+shift,nchar(sequence), 3), function(x) substr(sequence, x, x+2)) DNA_stop_codons <- c("TAA", "TAG", "TGA") # RNA's UAA, UAG and UGA stop codons encoded in DNA rDNA_stop_codons <- c("TTA", "CTA", "TCA") # reverse complement of each DNA encoded stop codons DNA <- read_fasta_sequence_from_file("04_orf.fasta") # DNA <- readFASTA("04_orf.fasta", strip.descs=TRUE)[[1]]$seq # if you want to use Biostrings instead for(shift in 0:2) { frame <- read_codons(DNA, shift) if(!sum(DNA_stop_codons %in% frame)) cat("Open Reading Frame ( found in DNA, shift:", shift, ")\n", paste(frame), "\n") if(!sum(rDNA_stop_codons %in% frame)) cat("Open Reading Frame ( found in reverse compliment of DNA, shift:", shift, ")\n", paste(frame), "\n") }