Skip to content

Commit 75b9a47

Browse files
committed
Merge branch 'main' of github.com:patrick-barth/PARANOID into main
2 parents 82f1efa + 5c1a94c commit 75b9a47

2 files changed

Lines changed: 113 additions & 141 deletions

File tree

bin/generate-peak-height-histogram.R

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@ library(ggplot2)
55
library(wig)
66

77
option_list <- list(
8-
make_option(c("-i", "--input_path"), type = "character", default = ".",
8+
make_option(c("-i", "--input_path"), type = "character", default = ".",
99
metavar = "character", help = "Tsv file containing strand distribution information"),
10-
make_option(c("-o", "--output"), type = "character", default = "output",
10+
make_option(c("-o", "--output"), type = "character", default = "output",
1111
metavar = "character", help = "Output file"),
12-
make_option(c("-t", "--type"), type = "character", default = "png",
12+
make_option(c("-t", "--type"), type = "character", default = "png",
1313
metavar = "character", help = "File type of output plot"),
14-
make_option(c("-c", "--color"), type = "character", default = "#69b3a2",
14+
make_option(c("-c", "--color"), type = "character", default = "#69b3a2",
1515
metavar = "character", help = "File type of output plot"),
16-
make_option(c("-p", "--percentile"),type = "double", default = 0.9,
16+
make_option(c("-p", "--percentile"), type = "double", default = 0.9,
1717
metavar = "double", help = "Percentile being used for other analyses")
1818
)
1919
opt_parser <- OptionParser(option_list=option_list)
@@ -29,7 +29,7 @@ percentile <- opt$percentile / 100
2929
files <- list.files(path = input_path, pattern = "\\.wig$")
3030

3131
# currently only 2 wig files are supposed to be the input: Error if more or less
32-
if(length(files) > 2){
32+
if (length(files) > 2) {
3333
stop("Error: Too many files provided")
3434
}
3535

@@ -47,26 +47,28 @@ max_freq <- max(value_counts_total$Freq)
4747
quant <- quantile(all_peak_values, probs = percentile)
4848

4949
# generate plot: Freq*10 & y-axis/10 make values of 1 or less visible
50-
plot <- ggplot(value_counts_total, aes(x=value,y=Freq*10))+
51-
geom_col(width = 1,fill = color) +
52-
scale_x_continuous(breaks = seq(0,max_peak,20)) +
53-
scale_y_log10(labels = function(x) x/10) +
54-
geom_vline(xintercept = quant,
55-
color = "red",
56-
linetype = "dashed") +
57-
geom_text(aes(x=quant,y=max_freq),
58-
label=paste("Chosen percentile cutoff (",quant,")", sep = ""),
59-
color = "red",
60-
hjust = 0, size = 3,
61-
position = position_dodge(width = 1)) +
62-
labs(title = paste("Peak height distribution of sample ", gsub("_forward.wig","",files[1]), sep = ""),
63-
x = "Peak height", y = "# of occurences") +
50+
plot <- ggplot(value_counts_total, aes(x = value, y = Freq * 10)) +
51+
geom_col(width = 1, fill = color) +
52+
scale_x_continuous(breaks = seq(0, max_peak, 20)) +
53+
scale_y_log10(labels = function(x) x / 10) +
54+
geom_vline(xintercept = quant,
55+
color = "red",
56+
linetype = "dashed") +
57+
geom_text(aes(x = quant,
58+
y = max_freq),
59+
label = paste("Chosen percentile cutoff (", quant, ")", sep = ""),
60+
color = "red",
61+
hjust = 0,
62+
size = 3,
63+
position = position_dodge(width = 1)) +
64+
labs(title = paste("Peak height distribution of sample ", gsub("_forward.wig","",files[1]), sep = ""),
65+
x = "Peak height", y = "# of occurences") +
6466
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 0.5),
6567
axis.text.y = element_text(),
6668
plot.title = element_text(hjust = 0.5))
67-
69+
6870
ggsave(
6971
paste(output, ".", type, sep = ""),
7072
plot = plot,
7173
device = type
72-
)
74+
)

bin/wig_file_statistics.R

100644100755
Lines changed: 89 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,119 +1,89 @@
1-
#!/usr/bin/env Rscript
2-
3-
library(optparse)
4-
library(ggplot2)
5-
library(wig)
6-
7-
filelist_fw <- c(
8-
"RVFV_virions/raw-wig-files/A_rep_1_forward.wig",
9-
"RVFV_virions/raw-wig-files/A_rep_2_forward.wig",
10-
"RVFV_virions/raw-wig-files/A_rep_3_forward.wig"
11-
)
12-
13-
filelist_rev <- c(
14-
"RVFV_virions/raw-wig-files/A_rep_1_reverse.wig",
15-
"RVFV_virions/raw-wig-files/A_rep_2_reverse.wig",
16-
"RVFV_virions/raw-wig-files/A_rep_3_reverse.wig"
17-
)
18-
19-
20-
#### Get arguments ##
21-
option_list = list(
22-
make_option(c("-l","--logs"), type="character",default=NULL,
23-
help="Log file containing information about barcode splitting", metavar="character"),
24-
make_option(c("-o","--output"), type="character",default="barcode_distribution",
25-
help="Name of output plot", metavar="character"),
26-
make_option(c("-t","--type"), type="character",default="png",
27-
help="File type of output plot", metavar="character"),
28-
make_option(c("-c", "--color"), type="character", default="#69b3a2",
29-
help="Color", metavar="character")
30-
);
31-
32-
library(wig)
33-
34-
chromosome_lengths <- list(
35-
"",
36-
"",
37-
""
38-
)
39-
40-
import_wig_files<-function(files=vector())
41-
{
42-
chromosomes = vector();
43-
chromosome_length = list();
44-
chromosome_values = list();
45-
chr_dat = list();
46-
filelist = list();
47-
48-
for(i in seq(1,length(files)))
49-
{
50-
filelist[[i]] <- import_wig(files[i])
51-
# generate a list of chromosome names
52-
chromosomes=levels(factor(c(chromosomes, levels(as.factor(filelist[[i]]$chr)))))
53-
}
54-
55-
# estimate the length of the chromosomes
56-
for(chr in chromosomes)
57-
{
58-
chromosome_length[[chr]] = 0
59-
for (i in seq(1,length(files)))
60-
{
61-
chromosome_length[[chr]] = max(
62-
chromosome_length[[chr]],
63-
as.vector(filelist[[i]][filelist[[i]]$chr==chr,]$pos)
64-
);
65-
}
66-
}
67-
68-
# generate a list of chromosome matrices with all values
69-
for(chr in chromosomes)
70-
{
71-
chromosome_values[[chr]] = matrix(nrow=chromosome_length[[chr]], ncol=length(files),data=c(0))
72-
# loop through our input files and import the values into our matrix
73-
for (i in seq(1,length(files)))
74-
{
75-
lines = filelist[[i]]$chr==chr
76-
positions = filelist[[i]][lines,]$pos
77-
values = filelist[[i]][lines,]$val
78-
chromosome_values[[chr]][positions, i] = values
79-
}
80-
}
81-
82-
# build the result list
83-
for(chr in chromosomes)
84-
{
85-
chr_dat[[chr]] = list(name=chr, len=chromosome_length[[chr]], val=chromosome_values[[chr]])
86-
}
87-
88-
return(chr_dat);
89-
}
90-
91-
do_cor_analysis<-function(files=vector(), dat=list(), output_filename_dir=paste(getwd(),"/",sep=""))
92-
{
93-
for(chr in seq(1, length(dat)))
94-
{
95-
chr_name <- dat[[chr]]$name
96-
chr_len <- dat[[chr]]$len
97-
cat("Chromosome: ", chr_name, "; length ", chr_len, " bp\n");
98-
99-
cor_res <- cor(dat[[chr]]$val)
100-
colnames(cor_res)<-basename(files)
101-
rownames(cor_res)<-colnames(cor_res)
102-
103-
csv_output_file<-paste(output_filename_dir, chr_name, "_", chr_len, "bp.csv", sep="")
104-
cat("Writing tabular output to ", csv_output_file, "...\n");
105-
write.csv(cor_res, file=csv_output_file)
106-
107-
print(cor_res)
108-
109-
# print all the stuff
110-
heatmap(cor_res,symm=TRUE)
111-
heatmap_output_file<-paste(output_filename_dir, chr_name, "_", chr_len, "bp.pdf", sep="")
112-
pdf(file=heatmap_output_file, paper="a4")
113-
heatmap(cor_res,symm=TRUE)
114-
dev.off()
115-
}
116-
}
117-
118-
do_cor_analysis(filelist_fw, import_wig_files(filelist_fw), output_filename_dir = paste(getwd(), "/forward_", sep=""))
119-
do_cor_analysis(filelist_rev, import_wig_files(filelist_rev), output_filename_dir = paste(getwd(), "/reverse_", sep=""))
1+
#!/usr/bin/env Rscript
2+
3+
library(optparse)
4+
library(ggplot2)
5+
library(wig)
6+
7+
#### Get arguments ##
8+
option_list <- list(
9+
make_option(c("-i", "--input_path"), type = "character", default = ".",
10+
metavar = "character", help = "Working directory containing wig files to compare"),
11+
make_option(c("-l", "--chrom_length"), type = "character", default = ".",
12+
metavar = "character", help = "File containing lengths of chromosomes"),
13+
make_option(c("-o", "--output"), type = "character", default = "output",
14+
metavar = "character", help = "Output base name"),
15+
make_option(c("-t", "--type"), type = "character", default = "png",
16+
metavar = "character", help = "File type of output plot")
17+
)
18+
opt_parser <- OptionParser(option_list=option_list)
19+
opt <- parse_args(opt_parser)
20+
21+
input_path <- opt$input_path
22+
chrom_length <- opt$chrom_length
23+
output <- opt$output
24+
type <- opt$type
25+
26+
chrom_length <- read.csv(chrom_length, header = FALSE, sep = "\t", row.names = 1)
27+
# This line is only an adaption to the import_wig function which cuts chrom names at the first encountered dot
28+
row.names(chrom_length) <- unlist(lapply(strsplit(row.names(chrom_length),"\\."), `[`, 1))
29+
files <- list.files(path = input_path, pattern = "\\.wig$")
30+
31+
import_wig_files <- function(files = vector())
32+
{
33+
chromosomes <- row.names(chrom_length)
34+
chromosome_values <- list()
35+
chr_dat <- list()
36+
filelist <- list()
37+
38+
for (i in seq(1, length(files))) {
39+
full_path <- paste(input_path, "/", files[i], sep = "")
40+
filelist[[i]] <- import_wig(full_path)
41+
}
42+
43+
# generate a list of chromosome matrices with all values
44+
for (chr in chromosomes) {
45+
chromosome_values[[chr]] <- matrix(nrow = chrom_length[chr, ], ncol = length(files), data = c(0))
46+
# loop through our input files and import the values into our matrix
47+
for (i in seq(1, length(files))) {
48+
lines <- filelist[[i]]$chr == chr
49+
positions <- filelist[[i]][lines, ]$pos
50+
values <- filelist[[i]][lines, ]$val
51+
chromosome_values[[chr]][positions, i] <- values
52+
}
53+
}
54+
55+
# build the result list
56+
for (chr in chromosomes) {
57+
chr_dat[[chr]] <- list(name = chr, len = chrom_length[chr, ], val = chromosome_values[[chr]])
58+
}
59+
60+
return(chr_dat)
61+
}
62+
63+
do_cor_analysis <- function(files = vector(), dat = list()) {
64+
appended_values <- data.frame(dat[[1]]$val)
65+
for (chr in seq(2, length(dat))) {
66+
tmp_df <- data.frame(dat[[chr]]$val)
67+
appended_values <- rbind(appended_values, tmp_df)
68+
}
69+
70+
cor_res <- cor(dat[[chr]]$val)
71+
colnames(cor_res) <- basename(files)
72+
rownames(cor_res) <- colnames(cor_res)
73+
74+
csv_output_file <- paste(output, "_correlation.csv", sep = "")
75+
cat("Writing tabular output to ", csv_output_file, "...\n")
76+
write.csv(cor_res, file = csv_output_file)
77+
78+
print(cor_res)
79+
80+
# print all the stuff
81+
heatmap(cor_res, symm = TRUE)
82+
heatmap_output_file <- paste(output, "_correlation.", type, sep = "")
83+
pdf(file=heatmap_output_file, paper = "a4")
84+
heatmap(cor_res, symm = TRUE)
85+
dev.off()
86+
87+
}
88+
89+
do_cor_analysis(files, import_wig_files(files))

0 commit comments

Comments
 (0)