Skip to content

Commit a64cb89

Browse files
committed
fix sex inference, tag new version
1 parent 4af26b8 commit a64cb89

File tree

5 files changed

+33
-20
lines changed

5 files changed

+33
-20
lines changed

CHANGES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
v0.3.0
2+
======
3+
+ fix compilation with nim > 2 and some sex inference stuff
4+
15
v0.2.19
26
=======
37
+ [relate/infer] fix check that would prevent some inference (#123)

nim.cfg

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,5 @@
22
-d:nimNoGetRandom
33
--passC:"-mpopcnt"
44
--path:"$projectPath/src"
5-
-d:openmp
6-
-d:openblas
75
-d:"blas=openblas"
86
-d:"lapack=openblas"

somalier.nimble

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#import somalierpkg/version as _
22

3-
version = "0.2.19" #somalierVersion
3+
version = "0.3.0" #somalierVersion
44
author = "Brent Pedersen"
55
description = "sample-swap checking directly on BAMs/CRAMs for cancer data"
66
license = "MIT"

src/somalierpkg/relate.nim

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ type Stat4 = object
3232
y_dp: RunningStat
3333

3434

35-
type relation_matrices = object {.shallow.}
35+
type relation_matrices = object
3636
sites_tested: int
3737
ibs: seq[uint16]
3838
n: seq[uint16]
@@ -472,15 +472,17 @@ proc read_extracted*(paths: seq[string], min_ab: float, min_depth: int,
472472

473473
const missing = [".", "0", "-9", ""]
474474

475-
proc high_quality(gt_counts: array[5, seq[uint16]], i: int): bool {.inline.} =
475+
proc high_quality(gt_counts: array[5, seq[uint16]], i: int,
476+
check_hom_alts: bool): bool {.inline.} =
476477
# less than 3% of sites with allele balance outside of 0.1 .. 0.9
477478
result = gt_counts[4][i].float / (gt_counts[0][i] + gt_counts[1][i] +
478479
gt_counts[2][i] + gt_counts[3][i] + gt_counts[4][i]).float < 0.06
479480
if not result:
480481
return false
481482

482-
# should have fewer hom-alts[2] than hets[1]
483-
result = gt_counts[2][i].float / max(1, gt_counts[1][i].float) < 0.7
483+
if check_hom_alts:
484+
# should have fewer hom-alts[2] than hets[1]
485+
result = gt_counts[2][i].float / max(1, gt_counts[1][i].float) < 0.7
484486

485487

486488
proc samples_have_y_depth(stats: seq[Stat4]): bool =
@@ -534,9 +536,10 @@ proc related(final: var relation_matrices, L: SampleLooker,
534536

535537

536538
proc add_parents_and_check_sex(final: var relation_matrices, stats: seq[Stat4],
537-
gt_counts: array[5, seq[uint16]], i: int, L: var SampleLooker) =
539+
gt_counts: array[5, seq[uint16]], i: int, L: var SampleLooker,
540+
check_hom_alts: bool) =
538541
# first pass here updates sample parents as needed.
539-
if not gt_counts.high_quality(i):
542+
if not gt_counts.high_quality(i, check_hom_alts):
540543
return
541544
let sample_name = L.sample_names[i]
542545
var sample = L.sample_table.getOrDefault(sample_name, Sample(id: sample_name,
@@ -548,6 +551,7 @@ proc add_parents_and_check_sex(final: var relation_matrices, stats: seq[Stat4],
548551
if sample.sex == 2:
549552
stderr.write_line &"[somalier] setting sex to male for {sample.id}"
550553
sample.sex = 1
554+
assert sample.sex == 1
551555
elif stats[i].x_het / stats[i].x_hom_alt > 0.4 and stats[i].x_dp.n > 10:
552556
if sample.sex != 2:
553557
if sample.sex == 1:
@@ -574,6 +578,7 @@ proc add_parents_and_check_sex(final: var relation_matrices, stats: seq[Stat4],
574578
if sample.maternal_id != possible_parents[1]:
575579
stderr.write_line &"[somalier] NOTE: updating maternal_id for {sample.id} to {possible_parents[1]}"
576580
sample.maternal_id = possible_parents[1]
581+
echo "sample:", $sample
577582
L.sample_table[sample.id] = sample
578583

579584
proc add_parent_to_sibs(final: var relation_matrices, stats: seq[Stat4],
@@ -631,12 +636,13 @@ proc remove_spurious_parent_ids(final: var relation_matrices, L: SampleLooker,
631636

632637

633638
proc add_siblings(final: var relation_matrices, stats: seq[Stat4],
634-
gt_counts: array[5, seq[uint16]], L: var SampleLooker) =
639+
gt_counts: array[5, seq[uint16]], L: var SampleLooker,
640+
check_hom_alts: bool) =
635641
for sample_name, sib_names in L.sib_pairs:
636642
let isample = L.sample_table[sample_name]
637643
let iset = sib_names.toHashSet
638644
let i = isample.i
639-
if i >= 0 and not gt_counts.high_quality(i): continue
645+
if i >= 0 and not gt_counts.high_quality(i, check_hom_alts): continue
640646
var ipids = [isample.paternal_id, isample.maternal_id]
641647
let parent_order = ["dad", "mom"]
642648
for sn in sib_names:
@@ -653,7 +659,7 @@ proc add_siblings(final: var relation_matrices, stats: seq[Stat4],
653659
j = jsample.i
654660
except KeyError:
655661
continue
656-
if j >= 0 and not gt_counts.high_quality(j): continue
662+
if j >= 0 and not gt_counts.high_quality(j, check_hom_alts): continue
657663
#if isample.paternal_id notin missing and isample.maternal_id notin missing and isample.paternal_id == jsample.paternal_id and isample.maternal_id == jsample.maternal_id: continue
658664
## TODO: some logic problems below. check in CEPH
659665
var jpids = [jsample.paternal_id, jsample.maternal_id]
@@ -740,14 +746,15 @@ proc add_siblings(final: var relation_matrices, stats: seq[Stat4],
740746

741747

742748
proc update_family_ids(final: relation_matrices, stats: seq[Stat4],
743-
gt_counts: array[5, seq[uint16]], i: int, L: var SampleLooker) =
749+
gt_counts: array[5, seq[uint16]], i: int, L: var SampleLooker,
750+
check_hom_alts: bool) =
744751
# 2nd pass here updates family ids
745752
let sample_name = L.sample_names[i]
746753
if sample_name notin L.sample_table:
747754
let s = Sample(id: sample_name, family_id: sample_name, sex: -9,
748755
phenotype: "-9", maternal_id: "-9", paternal_id: "-9")
749756
var sample = L.sample_table[sample_name]
750-
if not gt_counts.high_quality(i): return
757+
if not gt_counts.high_quality(i, check_hom_alts): return
751758

752759
let pids = [sample.paternal_id, sample.maternal_id]
753760
for pid in pids:
@@ -785,6 +792,7 @@ proc write_sample(fh: File, stats: seq[Stat4], gt_counts: array[5, seq[uint16]],
785792
var sample = L.sample_table.getOrDefault(sample_name, Sample(id: sample_name,
786793
family_id: sample_name, sex: -9, phenotype: "-9", maternal_id: "-9",
787794
paternal_id: "-9"))
795+
echo "sample final:", $sample, " sex:", sample.sex
788796
fh.write(&"{sample.family_id}\t{sample.id}\t{sample.paternal_id}\t{sample.maternal_id}\t{sample.sex}\t{sample.phenotype}\t")
789797
fh.write(&"{L.sample_sex.getOrDefault(sample.id, \"-9\")}\t")
790798
fh.write(&"{stats[i].gtdp.mean:.1f}\t{stats[i].gtdp.standard_deviation():.1f}\t")
@@ -880,15 +888,16 @@ proc write_ped(fh: File, final: var relation_matrices, stats: seq[Stat4],
880888

881889

882890
if infer:
891+
let check_hom_alts = getEnv("SOMALIER_CHECK_HOM_ALTS") != ""
883892
final.remove_spurious_parent_ids(L, stats)
884893
for i, sample_name in L.sample_names:
885-
add_parents_and_check_sex(final, stats, gt_counts, i, L)
886-
add_siblings(final, stats, gt_counts, L)
894+
add_parents_and_check_sex(final, stats, gt_counts, i, L, check_hom_alts)
895+
add_siblings(final, stats, gt_counts, L, check_hom_alts)
887896

888897
add_parent_to_sibs(final, stats, gt_counts, L)
889898

890899
for i, sample_name in L.sample_names:
891-
update_family_ids(final, stats, gt_counts, i, L)
900+
update_family_ids(final, stats, gt_counts, i, L, check_hom_alts)
892901
final.remove_spurious_parent_ids(L, stats)
893902

894903
for i, sample_name in L.sample_names:
@@ -1037,6 +1046,8 @@ proc rel_main*() =
10371046

10381047
t0 = cpuTime()
10391048

1049+
let check_hom_alts = getEnv("SOMALIER_CHECK_HOM_ALTS") != ""
1050+
10401051
var tmpls = tmpl_html.split("<INPUT_JSON>")
10411052
fh_html.write(tmpls[0].replace("<SAMPLE_JSON>", toj(final.samples,
10421053
final.stats, final.gt_counts, samples.to_sex_lookup)))
@@ -1092,8 +1103,8 @@ proc rel_main*() =
10921103
stderr.write_line &"[somalier] apparent identical twins or sample duplicate found with {rel.sample_a} and {rel.sample_b} assuming siblings as these were specified as such in the pedigree file"
10931104
sib_pairs.mgetOrPut(rel.sample_a, @[]).add(rel.sample_b)
10941105
sib_pairs.mgetOrPut(rel.sample_b, @[]).add(rel.sample_a)
1095-
elif rr > 0.2 and final.gt_counts.high_quality(T.i) and
1096-
final.gt_counts.high_quality(T.j):
1106+
elif rr > 0.2 and final.gt_counts.high_quality(T.i, check_hom_alts) and
1107+
final.gt_counts.high_quality(T.j, check_hom_alts):
10971108
relGt0p2.mgetOrPut(rel.sample_a, @[]).add(rel.sample_b)
10981109
relGt0p2.mgetOrPut(rel.sample_b, @[]).add(rel.sample_a)
10991110

src/somalierpkg/version.nim

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
let somalierVersion* = "0.2.20"
1+
let somalierVersion* = "0.3.0"

0 commit comments

Comments
 (0)