Skip to content

Use ASCIISet #72

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions filters/ref-confidence.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,14 @@ func getBasesAndBaseQualitiesAlignedOneToOne(aln *sam.Alignment) (sam.Sequence,
paddedBaseQualities := make([]byte, 0, nofRefBases)
var pos int32
for _, element := range aln.CIGAR {
if operatorConsumesReadBases[element.Operation] {
if operatorConsumesReadBases.Contains(element.Operation) {
end := pos + element.Length
if operatorConsumesReferenceBases[element.Operation] {
if operatorConsumesReferenceBases.Contains(element.Operation) {
paddedBases = paddedBases.AppendSlice(nibbles.Nibbles(bases.Slice(int(pos), int(end))))
paddedBaseQualities = append(paddedBaseQualities, baseQualities[pos:end]...)
}
pos = end
} else if operatorConsumesReferenceBases[element.Operation] {
} else if operatorConsumesReferenceBases.Contains(element.Operation) {
for j := int32(0); j < element.Length; j++ {
paddedBases = paddedBases.Append('-')
paddedBaseQualities = append(paddedBaseQualities, 0)
Expand Down
5 changes: 3 additions & 2 deletions filters/simple-filters.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"math/rand"
"strconv"

"github.com/elliotwutingfeng/asciiset"
"github.com/exascience/elprep/v5/bed"
"github.com/exascience/elprep/v5/intervals"
"github.com/exascience/elprep/v5/sam"
Expand Down Expand Up @@ -82,15 +83,15 @@ func RemoveUnmappedReadsStrict(_ *sam.Header) sam.AlignmentFilter {
}
}

var nonExactMappingOperator = map[byte]bool{'I': true, 'D': true, 'N': true, 'H': true, 'P': true, 'X': true, '=': true}
var nonExactMappingOperator, _ = asciiset.MakeASCIISet("IDNHPX=")

// RemoveNonExactMappingReads is a filter that removes all reads that
// are not exact matches with the reference (soft-clipping ok), based
// on CIGAR string (only M and S allowed).
func RemoveNonExactMappingReads(_ *sam.Header) sam.AlignmentFilter {
return func(aln *sam.Alignment) bool {
for _, op := range aln.CIGAR {
if nonExactMappingOperator[op.Operation] {
if nonExactMappingOperator.Contains(op.Operation) {
return false
}
}
Expand Down
11 changes: 6 additions & 5 deletions filters/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package filters
import (
"log"

"github.com/elliotwutingfeng/asciiset"
"github.com/exascience/elprep/v5/sam"
)

Expand Down Expand Up @@ -74,13 +75,13 @@ func absInt32(x int32) int32 {
}

var (
operatorConsumesReadBases = map[byte]bool{'M': true, 'I': true, 'S': true, '=': true, 'X': true}
operatorConsumesReferenceBases = map[byte]bool{'M': true, 'D': true, 'N': true, '=': true, 'X': true}
operatorConsumesReadBases, _ = asciiset.MakeASCIISet("MIS=X")
operatorConsumesReferenceBases, _ = asciiset.MakeASCIISet("MDN=X")
)

func elementStradlessClippedRead(newCigar []sam.CigarOperation, operator byte, relativeClippingPosition, clippedBases int32) []sam.CigarOperation {
if operatorConsumesReadBases[operator] {
if operatorConsumesReferenceBases[operator] {
if operatorConsumesReadBases.Contains(operator) {
if operatorConsumesReferenceBases.Contains(operator) {
if relativeClippingPosition > 0 {
newCigar = append(newCigar, sam.CigarOperation{
Length: relativeClippingPosition,
Expand Down Expand Up @@ -279,7 +280,7 @@ func computeReadCoordinateForReferenceCoordinate(cigarVec []sam.CigarOperation,
index++
elementLength := int(element.Length)
var shift int
if operatorConsumesReferenceBases[element.Operation] || element.Operation == 'S' {
if operatorConsumesReferenceBases.Contains(element.Operation) || element.Operation == 'S' {
if refBases+elementLength < goal {
shift = elementLength
} else {
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.18

require (
github.com/bits-and-blooms/bitset v1.2.2
github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab
github.com/exascience/pargo v1.1.0
github.com/google/uuid v1.3.0
golang.org/x/sys v0.0.0-20220422013727-9388b58f7150
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
github.com/bits-and-blooms/bitset v1.2.2 h1:J5gbX05GpMdBjCvQ9MteIg2KKDExr7DrgK+Yc15FvIk=
github.com/bits-and-blooms/bitset v1.2.2/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab h1:h1UgjJdAAhj+uPL68n7XASS6bU+07ZX1WJvVS2eyoeY=
github.com/elliotwutingfeng/asciiset v0.0.0-20230602022725-51bbb787efab/go.mod h1:GLo/8fDswSAniFG+BFIaiSPcK610jyzgEhWYPQwuQdw=
github.com/exascience/pargo v1.1.0 h1:pBKDhJYoH2ekBehnPCErSIDoi9DqiWL1V70s5kGZANI=
github.com/exascience/pargo v1.1.0/go.mod h1:8GeMktPA5KycHMfqXXOfiQzlazfbFSURzGZIJUO0tfk=
github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
Expand Down