Skip to content

Commit a1b6094

Browse files
committed
Add parameter file and FASTA file referenced by unit tests
Update unit test to use Tryp_Pig_Bov.fasta
1 parent 8af811f commit a1b6094

File tree

3 files changed

+221
-2
lines changed

3 files changed

+221
-2
lines changed
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Precursor mass tolerance
2+
# Examples: 2.5Da or 30ppm
3+
# Use comma to set asymmetric values, for example "0.5Da,2.5Da" will set 0.5Da to the left (expMass<theoMass) and 2.5Da to the right (expMass>theoMass)
4+
PrecursorMassTolerance=20ppm
5+
6+
# Max Number of Dynamic (Variable) Modifications per peptide
7+
# If this value is large, the search will be slow
8+
NumMods=3
9+
10+
# Modifications (see below for examples)
11+
StaticMod=None
12+
13+
DynamicMod=O1, M, opt, any, Oxidation # Oxidized methionine
14+
15+
# Fragmentation Method
16+
# 0 means Low-res LCQ/LTQ (Default for CID and ETD); use InstrumentID=0 if analyzing a dataset with low-res CID and high-res HCD spectra
17+
# 1 means High-res LTQ (Default for HCD; also appropriate for high res CID); use InstrumentID=1 for Orbitrap and Lumos with high res MS2 spectra
18+
# 2 means TOF
19+
# 3 means Q-Exactive; use InstrumentID=3 for Q Exactive and QEHFX instruments
20+
# 4 means UVPD
21+
FragmentationMethodID=0
22+
23+
# Instrument ID
24+
# 0 means Low-res LCQ/LTQ (Default for CID and ETD); use InstrumentID=0 if analyzing a dataset with low-res CID and high-res HCD spectra
25+
# 1 means High-res LTQ (Default for HCD; also appropriate for high res CID); use InstrumentID=1 for Orbitrap, Lumos, and QEHFX instruments
26+
# 2 means TOF
27+
# 3 means Q-Exactive
28+
InstrumentID=0
29+
30+
# Enzyme ID
31+
# 0 means unspecific cleavage (cleave after any residue)
32+
# 1 means Trypsin (Default); optionally use this along with NTT=0 for a no-enzyme-specificity search of a tryptically digested sample
33+
# 2: Chymotrypsin, 3: Lys-C, 4: Lys-N, 5: Glu-C, 6: Arg-C, 7: Asp-N, 8: alphaLP, 9: No Cleavage (for peptidomics)
34+
EnzymeID=1
35+
36+
# Isotope error range
37+
# Takes into account the error introduced by not choosing the monoisotopic peak for fragmentation.
38+
# Useful for accurate precursor ion masses.
39+
# Ignored if the parent mass tolerance is > 0.5Da or 500ppm.
40+
# The combination of -t and -ti determines the precursor mass tolerance.
41+
# e.g. "-t 20ppm -ti -1,2" tests abs(observed - theoretical - n * 1.00335Da) < 20ppm for n=-1, 0, 1, 2
42+
IsotopeErrorRange=-1,2
43+
44+
# Number of tolerable termini
45+
# The number of peptide termini that must have been cleaved by the enzyme (default 1)
46+
# For trypsin, 2 means fully tryptic only, 1 means partially tryptic, and 0 means no-enzyme search
47+
NTT=2
48+
49+
# Control N-terminal methionine cleavage
50+
# 0 means to consider protein N-term Met cleavage (Default)
51+
# 1 means to ignore protein N-term Met cleavage
52+
IgnoreMetCleavage=0
53+
54+
# Target/Decoy search mode
55+
# 0 means don't search decoy database (default)
56+
# 1 means search decoy database to compute FDR (source FASTA file must be forward-only proteins)
57+
TDA=1
58+
59+
# Number of Threads (by default, uses all available cores)
60+
NumThreads=All
61+
62+
# Minimum peptide length to consider
63+
MinPepLength=6
64+
65+
# Maximum peptide length to consider
66+
MaxPepLength=50
67+
68+
# Minimum precursor charge to consider (if not specified in the spectrum)
69+
MinCharge=2
70+
71+
# Maximum precursor charge to consider (if not specified in the spectrum)
72+
MaxCharge=5
73+
74+
# Number of matches per spectrum to be reported
75+
# If this value is greater than 1, the FDR values computed by MS-GF+ will be skewed by high-scoring 2nd and 3rd hits
76+
NumMatchesPerSpec=1
77+
78+
# Mass of charge carrier
79+
# Default: mass of proton
80+
#ChargeCarrierMass=1.00727649
81+
82+
# Maximum missed cleavages
83+
# Exclude peptides with more than this number of missed cleavages from the search, Default: -1 (no limit)
84+
#MaxMissedCleavages=-1
85+
86+
# Minimum number of ions a spectrum must have to be examined
87+
#MinNumPeaksPerSpectrum=10
88+
89+
# Number of isoforms to consider per peptide
90+
# Default: 128
91+
#NumIsoforms=128
92+
93+
# Include additional features in the output (enable this to post-process results with Percolator)
94+
#AddFeatures=1
95+
96+
# Amino Acid Modification Examples
97+
# Specify static modifications using one or more StaticMod= entries
98+
# Specify dynamic modifications using one or more DynamicMod= entries
99+
# Modification format is:
100+
# Mass or CompositionString, Residues, ModType, Position, Name (all five fields are required).
101+
# CompositionString can only contain a limited set of elements, primarily C H N O S or P
102+
#
103+
# Examples:
104+
# C2H3N1O1, C, fix, any, Carbamidomethyl # Fixed Carbamidomethyl C (alkylation)
105+
# O1, M, opt, any, Oxidation # Oxidation M
106+
# 15.994915, M, opt, any, Oxidation # Oxidation M (mass is used instead of CompositionString)
107+
# H-1N-1O1, NQ, opt, any, Deamidated # Negative numbers are allowed.
108+
# CH2, K, opt, any, Methyl # Methylation K
109+
# C2H2O1, K, opt, any, Acetyl # Acetylation K
110+
# HO3P, STY,opt, any, Phospho # Phosphorylation STY
111+
# C2H3NO, *, opt, N-term, Carbamidomethyl # Variable Carbamidomethyl N-term
112+
# H-2O-1, E, opt, N-term, Glu->pyro-Glu # Pyro-glu from E
113+
# H-3N-1, Q, opt, N-term, Gln->pyro-Glu # Pyro-glu from Q
114+
# C2H2O, *, opt, Prot-N-term, Acetyl # Acetylation Protein N-term

src/test/java/msgfplus/TestMSGFPlus.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ public void TestMGF()
7070
File outputDir = new File("C:\\DMS_WorkDir1");
7171

7272
File specFile = Paths.get(workDir.getPath(), "20121223_ICD_individual_33_TP_A177_Elite_30k_run2_01_excerpt.mgf").toFile();
73-
File dbFile = Paths.get(orgDbDir.getPath(),"CPTAC_264contams.fasta").toFile();
73+
File dbFile = Paths.get(orgDbDir.getPath(),"Tryp_Pig_Bov.fasta").toFile();
7474
File confFile = Paths.get(workDir.getPath(), "MSGFPlus_Tryp_MetOx_20ppmParTol.txt").toFile();
7575

7676
String versionString = getNextVersion();
@@ -108,7 +108,7 @@ public void TestMzML()
108108
File orgDbDir = new File("C:\\DMS_Temp_Org");
109109

110110
File specFile = Paths.get(workDir.getPath(), "QC_Mam_19_01_PNNL_10_06Jan21_Arwen_WBEH-20-12-01.mzML").toFile();
111-
File dbFile = Paths.get(orgDbDir.getPath(),"CPTAC_264contams.fasta").toFile();
111+
File dbFile = Paths.get(orgDbDir.getPath(),"Tryp_Pig_Bov.fasta").toFile();
112112
File confFile = Paths.get(workDir.getPath(), "MSGFPlus_Tryp_MetOx_StatCysAlk_20ppmParTol.txt").toFile();
113113

114114
String versionString = getNextVersion();
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
>Contaminant_TRYP_PIG sp|P00761|TRYP_PIG Trypsin precursor (EC 3.4.21.4) - Sus scrofa (Pig).
2+
FPTDDDDKIVGGYTCAANSIPYQVSLNSGSHFCGGSLINSQWVVSAAHCYKSRIQVRLGE
3+
HNIDVLEGNEQFINAAKIITHPNFNGNTLDNDIMLIKLSSPATLNSRVATVSLPRSCAAA
4+
GTECLISGWGNTKSSGSSYPSLLQCLKAPVLSDSSCKSSYPGQITGNMICVGFLEGGKDS
5+
CQGDSGGPVVCNGQLQGIVSWGYGCAQKNKPGVYTKVCNYVNWIQQTIAAN
6+
>Contaminant_Trypa1 VATVSLPR-like Promega trypsin artifact 1 (871.1) xATVSLPR (PromTArt1)
7+
QATVSLPR
8+
>Contaminant_Trypa2 VATVSLPR-like Promega trypsin artifact 2 (899.5) xxTVSLPR (PromTArt2)
9+
VQTVSLPR
10+
>Contaminant_Trypa3 VATVSLPR-like Promega trypsin artifact 3 (824.5) xxxVSLPR (PromTArt3)
11+
PGVVSLPR
12+
>Contaminant_Trypa4 LSSPATLNSR-like Promega trypsin artifact 4 (1071.5) xxxPATLNSR (PromTArt4)
13+
MNTLPLLAAK
14+
>Contaminant_Trypa5 Promega trypsin artifact 5 K to R mods (2239.1, 2914)(1987, 2003) (PromTArt5)
15+
LGEHNIDVLEGNEQFINAARIITHPNFNGNTLDNDIMLIRLSSPATLNSR
16+
>Contaminant_Trypa6 VATVSLPR 422 ion wrongly assigned z=3 (1262.8) (llhg are dummy aa's) (TrypArt6)
17+
LLHGVATVSLPR
18+
>Contaminant_TRYP_BOVIN TRYPSINOGEN. (sp|P00760|TRYP_BOVIN, gi|136425)
19+
VDDDDKIVGGYTCGANTVPYQVSLNSGYHFCGGSLINSQWVVSAAHCYKSGIQVRLGEDN
20+
INVVEGNEQFISASKSIVHPSYNSNTLNNDIMLIKLKSAASLNSRVASISLPTSCASAGT
21+
QCLISGWGNTKSSGTSYPDVLKCLKAPILSDSSCKSAYPGQITSNMFCAGYLEGGKDSCQ
22+
GDSGGPVVCSGKLQGIVSWGSGCAQKNKPGVYTKVCNYVSWIKQTIASN
23+
>Contaminant_CTRA_BOVIN CHYMOTRYPSINOGEN A (EC 3.4.21.1). - BOS TAURUS (BOVINE).
24+
CGVPAIQPVLSGLSRIVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINENWVVTAAHCGV
25+
TTSDVVVAGEFDQGSSSEKIQKLKIAKVFKNSKYNSLTINNDITLLKLSTAASFSQTVSA
26+
VCLPSASDDFAAGTTCVTTGWGLTRYTNANTPDRLQQASLPLLSNTNCKKYWGTKIKDAM
27+
ICAGASGVSSCMGDSGGPLVCKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQ
28+
TLAAN
29+
>Contaminant_CTRB_BOVIN CHYMOTRYPSINOGEN B (EC 3.4.21.1). - BOS TAURUS (BOVINE).
30+
CGVPAIQPVLSGLARIVNGEDAVPGSWPWQVSLQDSTGFHFCGGSLISEDWVVTAAHCGV
31+
TTSDVVVAGEFDQGLETEDTQVLKIGKVFKNPKFSILTVRNDITLLKLATPAQFSETVSA
32+
VCLPSADEDFPAGMLCATTGWGKTKYNALKTPDKLQQATLPIVSNTDCRKYWGSRVTDVM
33+
ICAGASGVSSCMGDSGGPLVCQKNGAWTLAGIVSWGSSTCSTSTPAVYARVTALMPWVQE
34+
TLAAN
35+
>Contaminant_ALBU_HUMAN SERUM ALBUMIN PRECURSOR. (sp|P02768|ALBU_HUMAN, gi|113576)
36+
MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIAFAQYLQQCPF
37+
EDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCTVATLRETYGEMADCCAKQEP
38+
ERNECFLQHKDDNPNLPRLVRPEVDVMCTAFHDNEETFLKKYLYEIARRHPYFYAPELLF
39+
FAKRYKAAFTECCQAADKAACLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAV
40+
ARLSQRFPKAEFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK
41+
ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVFLGMFLYEYAR
42+
RHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDEFKPLVEEPQNLIKQNCELFE
43+
QLGEYKFQNALLVRYTKKVPQVSTPTLVEVSRNLGKVGSKCCKHPEAKRMPCAEDYLSVV
44+
LNQLCVLHEKTPVSDRVTKCCTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTL
45+
SEKERQIKKQTALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV
46+
AASQAALGL
47+
>Contaminant_ALBU_BOVIN SERUM ALBUMIN PRECURSOR. (sp|P02769|ALBU_BOVIN, gi|113574)
48+
MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGEEQFKGLVLIAFSQYLQQCPF
49+
DEHVKLVNELTEFAKTCVADESHAGCEKSLHTLFGDELCKVASLRETYGDMADCCEKQEP
50+
ERNECFLSHKDDSPDLPKLKPDPNTLCDEFKADEKKFWGKYLYEIARRHPYFYAPELLYY
51+
ANKYNGVFQDCCQAEDKGACLLPKIETMREKVLASSARQRLRCASIQKFGERALKAWSVA
52+
RLSQKFPKAEFVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQDTISSKLKE
53+
CCDKPLLEKSHCIAEVEKDAIPENLPPLTADFAEDKDVCKNYQEAKDAFLGSFLYEYSRR
54+
HPEYAVSVLLRLAKEYEATLEECCAKDDPHACYSTVFDKLKHLVDEPQNLIKQNCDQFEK
55+
LGEYGFQNALIVRYTRKVPQVSTPTLVEVSRSLGKVGTRCCTKPESERMPCTEDYLSLIL
56+
NRLCVLHEKTPVSEKVTKCCTESLVNRRPCFSALTPDETYVPKAFDEKLFTFHADICTLP
57+
DTEKQIKKQTALVELLKHKPKATEEQLKTVMENFVAFVDKCCAADDKEACFAVEGPKLVV
58+
STQTALA
59+
>Contaminant_K2C1_HUMAN sp|P04264 Keratin, type II cytoskeletal 1 (Cytokeratin-1) (CK-1) (Keratin-1) (K1) (67 kDa cytokeratin) (Hair alpha protein) - Homo sapiens (Human).
60+
SRQFSSRSGYRSGGGFSSGSAGIINYQRRTTSSSTRRSGGGGGRFSSCGGGGGSFGAGGG
61+
FGSRSLVNLGGSKSISISVARGGGRGSGFGGGYGGGGFGGGGFGGGGFGGGGIGGGGFGG
62+
FGSGGGGFGGGGFGGGGYGGGYGPVCPPGGIQEVTINQSLLQPLNVEIDPEIQKVKSRER
63+
EQIKSLNNQFASFIDKVRFLEQQNQVLQTKWELLQQVDTSTRTHNLEPYFESFINNLRRR
64+
VDQLKSDQSRLDSELKNMQDMVEDYRNKYEDEINKRTNAENEFVTIKKDVDGAYMTKVDL
65+
QAKLDNLQQEIDFLTALYQAELSQMQTQISETNVILSMDNNRSLDLDSIIAEVKAQNEDI
66+
AQKSKAEAESLYQSKYEELQITAGRHGDSVRNSKIEISELNRVIQRLRSEIDNVKKQISN
67+
LQQSISDAEQRGENALKDAKNKLNDLEDALQQAKEDLARLLRDYQELMNTKLALDLEIAT
68+
YRTLLEGEESRMSGECAPNVSVSVSTSHTTISGGGSRGGGGGGYGSGGSSYGSGGGSYGS
69+
GGGGGGGRGSYGSGGSSYGSGGGSYGSGGGGGGHGSYGSGSSSGGYRGGSGGGGGGSSGG
70+
RGSGGGSSGGSIGGRGSSSGGVKSSGGSSSVRFVSTTYSGVTR
71+
>Contaminant_K22E_HUMAN sp|P35908 Keratin, type II cytoskeletal 2 epidermal (Cytokeratin-2e) (K2e) (CK 2e) - Homo sapiens (Human).
72+
MSCQISCKSRGRGGGGGGFRGFSSGSAVVSGGSRRSTSSFSCLSRHGGGGGGFGGGGFGS
73+
RSLVGLGGTKSISISVAGGGGGFGAAGGFGGRGGGFGGGSGFGGGSGFGGGSGFSGGGFG
74+
GGGFGGGRFGGFGGPGGVGGLGGPGGFGPGGYPGGIHEVSVNQSLLQPLNVKVDPEIQNV
75+
KAQEREQIKTLNNKFASFIDKVRFLEQQNQVLQTKWELLQQMNVGTRPINLEPIFQGYID
76+
SLKRYLDGLTAERTSQNSELNNMQDLVEDYKKKYEDEINKRTAAENDFVTLKKDVDNAYM
77+
IKVELQSKVDLLNQEIEFLKVLYDAEISQIHQSVTDTNVILSMDNSRNLDLDSIIAEVKA
78+
QYEEIAQRSKEEAEALYHSKYEELQVTVGRHGDSLKEIKIEISELNRVIQRLQGEIAHVK
79+
KQCKNVQDAIADAEQRGEHALKDARNKLNDLEEALQQAKEDLARLLRDYQELMNVKLALD
80+
VEIATYRKLLEGEECRMSGDLSSNVTVSVTSSTISSNVASKAAFGGSGGRGSSSGGGYSS
81+
GSSSYGSGGRQSGSRGGSGGGGSISGGGYGSGGGSGGRYGSGGGSKGGSISGGGYGSGGG
82+
KHSSGGGSRGGSSSGGGYGSGGGGSSSVKGSSGEAFGSSVTFSFR
83+
>Contaminant_K1C9_HUMAN sp|P35527 Keratin, type I cytoskeletal 9 (Cytokeratin-9) (CK-9) (Keratin-9) (K9) - Homo sapiens (Human).
84+
MSCRQFSSSYLSRSGGGGGGGLGSGGSIRSSYSRFSSSGGRGGGGRFSSSSGYGGGSSRV
85+
CGRGGGGSFGYSYGGGSGGGFSASSLGGGFGGGSRGFGGASGGGYSSSGGFGGGFGGGSG
86+
GGFGGGYGSGFGGLGGFGGGAGGGDGGILTANEKSTMQELNSRLASYLDKVQALEEANND
87+
LENKIQDWYDKKGPAAIQKNYSPYYNTIDDLKDQIVDLTVGNNKTLLDIDNTRMTLDDFR
88+
IKFEMEQNLRQGVDADINGLRQVLDNLTMEKSDLEMQYETLQEELMALKKNHKEEMSQLT
89+
GQNSGDVNVEINVAPGKDLTKTLNDMRQEYEQLIAKNRKDIENQYETQITQIEHEVSSSG
90+
QEVQSSAKEVTQLRHGVQELEIELQSQLSKKAALEKSLEDTKNRYCGQLQMIQEQISNLE
91+
AQITDVRQEIECQNQEYSLLLSIKMRLEKEIETYHNLLEGGQEDFESSGAGKIGLGGRGG
92+
SGGSYGRGSRGGSGGSYGGGGSGGGYGGGSGSRGGSGGSYGGGSGSGGGSGGGYGGGSGG
93+
GHSGGSGGGHSGGSGGNYGGGSGSGGGSGGGYGGGSGSRGGSGGSHGGGSGFGGESGGSY
94+
GGGEEASGSGGGYGGGSGKSSHS
95+
>Contaminant_K1C10_HUMAN sp|P13645 Keratin, type I cytoskeletal 10 (Cytokeratin-10) (CK-10) (Keratin-10) (K10) - Homo sapiens (Human).
96+
MSVRYSSSKHYSSSRSGGGGGGGGCGGGGGVSSLRISSSKGSLGGGFSSGGFSGGSFSRG
97+
SSGGGCFGGSSGGYGGLGGFGGGSFHGSYGSSSFGGSYGGSFGGGNFGGGSFGGGSFGGG
98+
GFGGGGFGGGFGGGFGGDGGLLSGNEKVTMQNLNDRLASYLDKVRALEESNYELEGKIKE
99+
WYEKHGNSHQGEPRDYSKYYKTIDDLKNQILNLTTDNANILLQIDNARLAADDFRLKYEN
100+
EVALRQSVEADINGLRRVLDELTLTKADLEMQIESLTEELAYLKKNHEEEMKDLRNVSTG
101+
DVNVEMNAAPGVDLTQLLNNMRSQYEQLAEQNRKDAEAWFNEKSKELTTEIDNNIEQISS
102+
YKSEITELRRNVQALEIELQSQLALKQSLEASLAETEGRYCVQLSQIQAQISALEEQLQQ
103+
IRAETECQNTEYQQLLDIKIRLENEIQTYRSLLEGEGSSGGGGRGGGSFGGGYGGGSSGG
104+
GSSGGGYGGGHGGSSGGGYGGGSSGGGSSGGGYGGGSSSGGHGGGSSSGGHGGSSSGGYG
105+
GGSSGGGGGGYGGGSSGGGSSSGGGYGGGSSSGGHKSSSSGSVGESSSKGPRY

0 commit comments

Comments
 (0)