#!/usr/bin/env Rscript


#./bin/fa_reheader --prefix xxx tests/r19b16.hdr.fasta

#-#-#-#-#-#-#-#-#-#-#-#-#
# Argument parsing
#-#-#-#-#-#-#-#-#-#-#-#-#
library(optparse)
option_list <- list(
    make_option("--out", help="Name of the output FASTA file. If missing the script display the metadata table."),
		make_option("--glue", help="Glue expression to use for the new header",default = "{simplify_seq_id(seq_id)} {fa_tags_to_str(fa_tags_standardize(tags))} {title}"),
		make_option("--flyeinfo", help="Path to assembly_info.txt generated by FLYE"),
		make_option("--tsv", help="Path to a tab-separated table with additional sequence metadata. The file must contain a header line with `seq_id` column."),
		make_option("--prefix", help="A prefix to add to every seq_id",default = "")
)
opt <- parse_args(
	OptionParser(
		description = "Change headers lines in a FASTA file",
		usage = "%prog [options] input.fasta",
		epilogue = "Example usage:
      %prog --prefix r34b14. --flyeinfo out/r34b14.fasta.flyeinfo out/r34b14.fasta
      %prog --prefix r10b14. out/r10b14.fasta
		",
		option_list = option_list
	),
	positional_arguments = 1
)


#-#-#-#-#-#-#-#-#-#-#-#-#
# Script
#-#-#-#-#-#-#-#-#-#-#-#-#
suppressPackageStartupMessages({
  source(fs::path(Sys.getenv("FATOOLS_DIR","."),"bin/lib_fa.R"))
})


simplify_seq_id <- function(seq_id) {
	str_replace(seq_id,"^contig_","")
}




# Read FASTA
dna <- readDNAStringSet(opt$args)

# Parse header line
headers <- fa_parse_header_line(names(dna))


# Add FLYE info tags if available
if (!is.null(opt$options$flyeinfo)) {
	flye <- read_flye_info(opt$options$flyeinfo) |>
		rename_with(~str_glue("flye.{.}"),!seq_id)
	headers$tags <- left_join(headers$tags,flye,by = "seq_id", relationship = "one-to-one")
}

# Add TSV info tags if available
if (!is.null(opt$options$tsv)) {
	tsv <- read_tsv(opt$options$tsv,comment="",quote="")
	stopifnot("no `seq_id` column found in given TSV" = "seq_id" %in% names(tsv))
	headers$tags <- left_join(headers$tags,tsv,by = "seq_id", relationship = "one-to-one", suffix = c("",".tsv"))
}


# Apply glue rule
headers <- headers |>
	mutate(new_header_line = str_glue(opt$options$glue)) |>
	mutate(new_header_line = str_glue("{opt$options$prefix}{new_header_line}"))


# Show metadata table on stdout()
print(headers$tags)
print(headers$new_header_line)

# Output FASTA with new header line if requested
if (!is.null(opt$options$out)) {
	names(dna) <- pull(headers,new_header_line)
	writeXStringSet(dna,opt$options$out)	
} 






