Skip to content

Commit 786ef08

Browse files
committed
update seqOptimize()
1 parent 2f3d65a commit 786ef08

File tree

3 files changed

+38
-17
lines changed

3 files changed

+38
-17
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ UTILITIES
3838
o `seqGDS2VCF()` generates an indexing file (.tbi) according to the
3939
vcf.gz output file, requiring the Rsamtools package
4040

41+
o the first argument of `seqOptimize()` can be a SeqVarGDSClass object
42+
4143
BUG FIXES
4244

4345
o Previous `seqGetAF_AC_Missing(, minor=FALSE)` returns the AF for the

R/Utilities.R

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,8 +1167,12 @@ seqTranspose <- function(gdsfile, var.name, compress=NULL, digest=TRUE, verbose=
11671167
# Optimize data by transposing
11681168
#
11691169

1170+
# Run-length encoding on chromosome codes
11701171
.optim_chrom <- function(gdsfile)
11711172
{
1173+
# check
1174+
stopifnot(inherits(gdsfile, "gds.class"))
1175+
# read
11721176
n <- index.gdsn(gdsfile, "chromosome")
11731177
readmode.gdsn(n)
11741178
chr <- read.gdsn(n)
@@ -1179,22 +1183,36 @@ seqTranspose <- function(gdsfile, var.name, compress=NULL, digest=TRUE, verbose=
11791183
visible=FALSE)
11801184
moveto.gdsn(n2, n)
11811185
moveto.gdsn(n1, n)
1186+
# return
11821187
invisible()
11831188
}
11841189

1185-
seqOptimize <- function(gdsfn, target=c("chromosome", "by.sample"),
1190+
seqOptimize <- function(gdsfile, target=c("chromosome", "by.sample"),
11861191
format.var=TRUE, cleanup=TRUE, verbose=TRUE)
11871192
{
11881193
# check
1189-
stopifnot(is.character(gdsfn), length(gdsfn)==1L)
1194+
stopifnot(inherits(gdsfile, "SeqVarGDSClass") || is.character(gdsfile))
11901195
target <- match.arg(target)
11911196
stopifnot(is.logical(format.var) || is.character(format.var))
11921197
stopifnot(is.logical(cleanup), length(cleanup)==1L)
11931198
stopifnot(is.logical(verbose), length(verbose)==1L)
11941199

1195-
gdsfile <- seqOpen(gdsfn, readonly=FALSE)
1196-
on.exit({ seqClose(gdsfile) })
1200+
# the open file
1201+
if (is.character(gdsfile))
1202+
{
1203+
stopifnot(length(gdsfile) == 1L)
1204+
if (isTRUE(verbose))
1205+
.cat("Open ", sQuote(basename(gdsfile)))
1206+
gdsfn <- gdsfile
1207+
gdsfile <- seqOpen(gdsfn, readonly=FALSE)
1208+
on.exit(seqClose(gdsfile))
1209+
} else {
1210+
cleanup <- FALSE
1211+
}
1212+
if (gdsfile$readonly)
1213+
stop("'gdsfile' should not be read-only.")
11971214

1215+
# process
11981216
if ("by.sample" %in% target)
11991217
{
12001218
# genotype
@@ -1206,14 +1224,13 @@ seqOptimize <- function(gdsfn, target=c("chromosome", "by.sample"),
12061224
.Transpose(gdsfile, "phase/data", "~")
12071225

12081226
# annotation - format
1209-
if (identical(format.var, TRUE) || is.character(format.var))
1227+
if (isTRUE(format.var) || is.character(format.var))
12101228
{
12111229
n <- index.gdsn(gdsfile, "annotation/format", silent=TRUE)
12121230
if (!is.null(n))
12131231
{
12141232
nm <- ls.gdsn(n)
1215-
if (identical(format.var, TRUE))
1216-
format.var <- nm
1233+
if (isTRUE(format.var)) format.var <- nm
12171234
for (i in nm)
12181235
{
12191236
if (i %in% format.var)
@@ -1229,21 +1246,21 @@ seqOptimize <- function(gdsfn, target=c("chromosome", "by.sample"),
12291246
}
12301247
}
12311248
}
1249+
12321250
} else if ("chromosome" %in% target)
12331251
{
1234-
if (verbose)
1235-
cat("Adding run-length encoding for chromosome coding ...")
1252+
if (isTRUE(verbose))
1253+
cat("Adding run-length encoding for chromosomes ...")
1254+
# run-length encoding on chromosome codes
12361255
.optim_chrom(gdsfile)
1237-
if (verbose)
1238-
cat(" [Done]\n")
1256+
if (isTRUE(verbose)) cat(" [Done]\n")
12391257
}
12401258

1241-
if (cleanup)
1259+
if (isTRUE(cleanup))
12421260
{
12431261
on.exit()
12441262
seqClose(gdsfile)
12451263
cleanup.gds(gdsfn, verbose=verbose)
12461264
}
1247-
12481265
invisible()
12491266
}

man/seqOptimize.Rd

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
Transpose data array or matrix for possibly higher-speed access.
66
}
77
\usage{
8-
seqOptimize(gdsfn, target=c("chromosome", "by.sample"), format.var=TRUE,
8+
seqOptimize(gdsfile, target=c("chromosome", "by.sample"), format.var=TRUE,
99
cleanup=TRUE, verbose=TRUE)
1010
}
1111
\arguments{
12-
\item{gdsfn}{the file name of GDS}
12+
\item{gdsfile}{a \code{\link{SeqVarGDSClass}} object or a GDS file name}
1313
\item{target}{"chromosome", "by.sample"; see details}
1414
\item{format.var}{a character vector for selected variable names,
1515
or \code{TRUE} for all variables, according to "annotation/format"}
@@ -43,12 +43,14 @@ seqOptimize(gdsfn, target=c("chromosome", "by.sample"), format.var=TRUE,
4343
# convert
4444
seqVCF2GDS(vcf.fn, "tmp.gds", storage.option="ZIP_RA")
4545

46-
# prepare data for the SeqVarTools package
46+
# prepare data
4747
seqOptimize("tmp.gds", target="by.sample")
4848

4949

5050
# list the structure of GDS variables
51-
(f <- seqOpen("tmp.gds"))
51+
(f <- seqOpen("tmp.gds", readonly=FALSE))
52+
seqOptimize(f, "chromosome")
53+
5254
# close
5355
seqClose(f)
5456

0 commit comments

Comments
 (0)