1
- import os
2
- import psycopg
3
1
from matchms import set_matchms_logger_level , Spectrum
4
2
import numpy as np
3
+ from datetime import datetime
4
+ import logging
5
5
6
- DB_PORT = os .environ .get ('DB_PORT' , 5432 )
7
- DB_USER = os .environ .get ('DB_USER' , "massbank3" )
8
- DB_PASSWORD = os .environ .get ('DB_PASSWORD' , "massbank3password" )
9
- DB_HOST = os .environ .get ('DB_HOST' , "localhost" )
10
- DB_NAME = os .environ .get ('DB_NAME' , "massbank3" )
11
- spectra = []
12
6
13
7
class ReferenceSpectra :
14
8
"""This class loads all reference spectra from the database"""
9
+ timestamp = datetime .fromisoformat ('2010-01-01' )
10
+ spectra = []
11
+
15
12
def __init__ (self , connection ):
16
13
"""initialize the class with a database connection"""
17
14
self .connection = connection
18
- self .spectra = []
19
15
20
- def get_spectra (self ):
16
+ @property
17
+ def load_spectra (self ):
18
+ """load all spectra from the database if the metadata indicates newer data and stores them"""
21
19
with self .connection .cursor () as cur :
22
20
cur .execute ("select * from metadata;" )
23
21
timestamp = cur .fetchone ()[2 ]
24
- print ( timestamp )
25
-
26
- if __name__ == '__main__' :
27
- myspec = ReferenceSpectra ( psycopg . connect ( f"postgresql:// { DB_NAME } : { DB_PASSWORD } @ { DB_HOST } : { DB_PORT } / { DB_NAME } " ))
28
- myspec . get_spectra ( )
22
+ logging . info ( "Database timestamp: %s" , timestamp )
23
+ logging . info ( "In-memory timestamp: %s" , ReferenceSpectra . timestamp )
24
+ timestamp_diff = timestamp - ReferenceSpectra . timestamp
25
+ if timestamp_diff . total_seconds () > 0 :
26
+ logging . info ( "Database timestamp is %s newer. Reloading..." , timestamp_diff )
29
27
30
- # Load all (non-deprecated) spectra from the database for faster lookup
31
- def load_spectra ():
32
- global spectra
33
-
34
- spectra = []
28
+ # Prevent matchms from complaining about spectra not having a precursor_mz
29
+ set_matchms_logger_level ("ERROR" )
35
30
36
- # Prevent matchms from complaining about spectra not having a precursor_mz
37
- set_matchms_logger_level ("ERROR" )
31
+ with self .connection .cursor () as cur :
32
+ cur .execute (
33
+ "SELECT massbank.accession as accession, peak.mz as mz, peak.intensity as intensity, "
34
+ "peak.relative_intensity as rel FROM massbank JOIN spectrum ON massbank.id = spectrum.massbank_id "
35
+ "JOIN peak ON spectrum.id = peak.spectrum_id ORDER BY massbank.id, peak.mz;"
36
+ )
38
37
39
- with psycopg .connect (f"postgresql://{ DB_NAME } :{ DB_PASSWORD } @{ DB_HOST } :{ DB_PORT } /{ DB_NAME } " ) as conn :
40
- with conn .cursor () as cur :
41
- cur .execute (
42
- "SELECT massbank.accession as accession, peak.mz as mz, peak.intensity as intensity, peak.relative_intensity as rel FROM massbank JOIN spectrum ON massbank.id = spectrum.massbank_id JOIN peak ON spectrum.id = peak.spectrum_id ORDER BY massbank.id, peak.mz;"
43
- )
38
+ entries = {}
39
+ ReferenceSpectra .spectra = []
44
40
45
- entries = {}
46
- for res in cur :
47
- accession = res [0 ]
48
- if accession in entries :
49
- entries [accession ]["mz" ].append (res [1 ])
50
- entries [accession ]["intensity" ].append (res [2 ])
51
- entries [accession ]["rel" ].append (res [3 ])
52
- else :
53
- entries [accession ] = {"mz" : [res [1 ]], "intensity" : [res [2 ]], "rel" : [res [3 ]]}
41
+ for res in cur :
42
+ accession = res [0 ]
43
+ if accession in entries :
44
+ entries [accession ]["mz" ].append (res [1 ])
45
+ entries [accession ]["intensity" ].append (res [2 ])
46
+ entries [accession ]["rel" ].append (res [3 ])
47
+ else :
48
+ entries [accession ] = {"mz" : [res [1 ]], "intensity" : [res [2 ]], "rel" : [res [3 ]]}
54
49
55
- for accession in entries :
50
+ for accession in entries :
56
51
mz = []
57
52
intensities = []
58
53
for key in entries [accession ]:
@@ -61,10 +56,34 @@ def load_spectra():
61
56
mz = prop
62
57
elif key == "rel" :
63
58
intensities = prop
64
-
65
- if len (mz ) > 0 and len (intensities ) > 0 and len (mz ) == len (intensities ):
66
- #metadata key "accession" gets silently converted to spectrum_id, so we can use spectrum_id right away
67
- spectra .append (Spectrum (mz = np .array (mz ).astype (float ), intensities = np .array (intensities ).astype (float ),
68
- metadata = {'spectrum_id' : accession }))
69
-
70
- print (f"\n Loaded { len (spectra )} spectra from the database" )
59
+
60
+ if 0 < len (mz ) == len (intensities ):
61
+ # metadata key "accession" gets silently converted to spectrum_id, so we can use spectrum_id
62
+ # right away
63
+ ReferenceSpectra .spectra .append (
64
+ Spectrum (mz = np .array (mz ).astype (float ), intensities = np .array (intensities ).astype (float ),
65
+ metadata = {'spectrum_id' : accession }))
66
+ else :
67
+ logging .warning ("Empty spectrum or error in spectrum" )
68
+
69
+ logging .info ("Loaded %s spectra from the database." , len (ReferenceSpectra .spectra ))
70
+ ReferenceSpectra .timestamp = timestamp
71
+
72
+
73
+ # import os
74
+ # import psycopg
75
+ #
76
+ # if __name__ == '__main__':
77
+ # DB_PORT = os.environ.get('DB_PORT', 5432)
78
+ # DB_USER = os.environ.get('DB_USER', "massbank3")
79
+ # DB_PASSWORD = os.environ.get('DB_PASSWORD', "massbank3password")
80
+ # DB_HOST = os.environ.get('DB_HOST', "localhost")
81
+ # DB_NAME = os.environ.get('DB_NAME', "massbank3")
82
+ #
83
+ # logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
84
+ # myspec = ReferenceSpectra(psycopg.connect(f"postgresql://{DB_NAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"))
85
+ # print(len(ReferenceSpectra.spectra))
86
+ # myspec.load_spectra
87
+ # print(len(ReferenceSpectra.spectra))
88
+ # myspec.load_spectra
89
+ # print(len(ReferenceSpectra.spectra))
0 commit comments