Skip to content

Commit cae1b81

Browse files
committed
Add lazy loading for access to database
1 parent 4b666d7 commit cae1b81

File tree

4 files changed

+79
-54
lines changed

4 files changed

+79
-54
lines changed

similarity-service-cosine/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ To run the server, please execute the following from the root directory:
2626

2727
```
2828
pip3 install -r requirements.txt
29-
PYTHONPATH=gen python3 -m similarity-score-impl
29+
PYTHONPATH=gen python3 -m cosine_impl
3030
```
3131

3232
and open your browser to here:

similarity-service-cosine/cosine_impl/__main__.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,8 @@
33
import connexion
44

55
from openapi_server import encoder
6-
from cosine_impl import db
7-
86

97
def main():
10-
print("Loading spectra... ", end='', flush=True)
11-
db.load_spectra()
12-
print("Finished")
13-
148
app = connexion.App(__name__, specification_dir='..')
159
app.app.json_encoder = encoder.JSONEncoder
1610
app.add_api('openapi.yaml',

similarity-service-cosine/cosine_impl/cosine_impl_controller.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,20 @@
33
from matchms import Spectrum, calculate_scores
44
from matchms.similarity import CosineGreedy
55

6-
from cosine_impl.db import spectra
6+
from cosine_impl.db import ReferenceSpectra
77
from openapi_server.models import SimilarityScore
88
from openapi_server.models.similarity_calculation import SimilarityCalculation # noqa: E501
99
from openapi_server.models.similarity_score_list import SimilarityScoreList # noqa: E501
1010

11+
import os
12+
import psycopg
13+
14+
DB_PORT = os.environ.get('DB_PORT', 5432)
15+
DB_USER = os.environ.get('DB_USER', "massbank3")
16+
DB_PASSWORD = os.environ.get('DB_PASSWORD', "massbank3password")
17+
DB_HOST = os.environ.get('DB_HOST', "localhost")
18+
DB_NAME = os.environ.get('DB_NAME', "massbank3")
19+
1120

1221
def similarity_post(similarity_calculation): # noqa: E501
1322
"""Create a new similarity calculation.
@@ -22,6 +31,9 @@ def similarity_post(similarity_calculation): # noqa: E501
2231
if connexion.request.is_json:
2332
request = SimilarityCalculation.from_dict(similarity_calculation)
2433

34+
reference_spectra = ReferenceSpectra(psycopg.connect(f"postgresql://{DB_NAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"))
35+
reference_spectra.load_spectra()
36+
2537
mz = []
2638
intensities = []
2739

@@ -35,11 +47,11 @@ def similarity_post(similarity_calculation): # noqa: E501
3547
def filter_fn(spectrum):
3648
return spectrum.metadata['spectrum_id'] in request.reference_spectra_list
3749

38-
references = list(filter(filter_fn, spectra))
50+
references = list(filter(filter_fn, ReferenceSpectra.spectra))
3951

4052
scores = calculate_scores(references, [query], CosineGreedy())
4153
else:
42-
scores = calculate_scores(spectra, [query], CosineGreedy())
54+
scores = calculate_scores(ReferenceSpectra.spectra, [query], CosineGreedy())
4355
matches = scores.scores_by_query(query, 'CosineGreedy_score', sort=True)
4456
match_list = SimilarityScoreList([])
4557

Lines changed: 63 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,53 @@
1-
import os
2-
import psycopg
31
from matchms import set_matchms_logger_level, Spectrum
42
import numpy as np
3+
from datetime import datetime
4+
import logging
55

6-
DB_PORT = os.environ.get('DB_PORT', 5432)
7-
DB_USER = os.environ.get('DB_USER', "massbank3")
8-
DB_PASSWORD = os.environ.get('DB_PASSWORD', "massbank3password")
9-
DB_HOST = os.environ.get('DB_HOST', "localhost")
10-
DB_NAME = os.environ.get('DB_NAME', "massbank3")
11-
spectra = []
126

137
class ReferenceSpectra:
148
"""This class loads all reference spectra from the database"""
9+
timestamp = datetime.fromisoformat('2010-01-01')
10+
spectra = []
11+
1512
def __init__(self, connection):
1613
"""initialize the class with a database connection"""
1714
self.connection = connection
18-
self.spectra = []
1915

20-
def get_spectra(self):
16+
@property
17+
def load_spectra(self):
18+
"""load all spectra from the database if the metadata indicates newer data and stores them"""
2119
with self.connection.cursor() as cur:
2220
cur.execute("select * from metadata;")
2321
timestamp = cur.fetchone()[2]
24-
print(timestamp)
25-
26-
if __name__ == '__main__':
27-
myspec=ReferenceSpectra(psycopg.connect(f"postgresql://{DB_NAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"))
28-
myspec.get_spectra()
22+
logging.info("Database timestamp: %s", timestamp)
23+
logging.info("In-memory timestamp: %s", ReferenceSpectra.timestamp)
24+
timestamp_diff = timestamp - ReferenceSpectra.timestamp
25+
if timestamp_diff.total_seconds() > 0:
26+
logging.info("Database timestamp is %s newer. Reloading...", timestamp_diff)
2927

30-
# Load all (non-deprecated) spectra from the database for faster lookup
31-
def load_spectra():
32-
global spectra
33-
34-
spectra = []
28+
# Prevent matchms from complaining about spectra not having a precursor_mz
29+
set_matchms_logger_level("ERROR")
3530

36-
# Prevent matchms from complaining about spectra not having a precursor_mz
37-
set_matchms_logger_level("ERROR")
31+
with self.connection.cursor() as cur:
32+
cur.execute(
33+
"SELECT massbank.accession as accession, peak.mz as mz, peak.intensity as intensity, "
34+
"peak.relative_intensity as rel FROM massbank JOIN spectrum ON massbank.id = spectrum.massbank_id "
35+
"JOIN peak ON spectrum.id = peak.spectrum_id ORDER BY massbank.id, peak.mz;"
36+
)
3837

39-
with psycopg.connect(f"postgresql://{DB_NAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}") as conn:
40-
with conn.cursor() as cur:
41-
cur.execute(
42-
"SELECT massbank.accession as accession, peak.mz as mz, peak.intensity as intensity, peak.relative_intensity as rel FROM massbank JOIN spectrum ON massbank.id = spectrum.massbank_id JOIN peak ON spectrum.id = peak.spectrum_id ORDER BY massbank.id, peak.mz;"
43-
)
38+
entries = {}
39+
ReferenceSpectra.spectra = []
4440

45-
entries = {}
46-
for res in cur:
47-
accession = res[0]
48-
if accession in entries:
49-
entries[accession]["mz"].append(res[1])
50-
entries[accession]["intensity"].append(res[2])
51-
entries[accession]["rel"].append(res[3])
52-
else:
53-
entries[accession] = {"mz": [res[1]], "intensity": [res[2]], "rel": [res[3]]}
41+
for res in cur:
42+
accession = res[0]
43+
if accession in entries:
44+
entries[accession]["mz"].append(res[1])
45+
entries[accession]["intensity"].append(res[2])
46+
entries[accession]["rel"].append(res[3])
47+
else:
48+
entries[accession] = {"mz": [res[1]], "intensity": [res[2]], "rel": [res[3]]}
5449

55-
for accession in entries:
50+
for accession in entries:
5651
mz = []
5752
intensities = []
5853
for key in entries[accession]:
@@ -61,10 +56,34 @@ def load_spectra():
6156
mz = prop
6257
elif key == "rel":
6358
intensities = prop
64-
65-
if len(mz) > 0 and len(intensities) > 0 and len(mz) == len(intensities):
66-
#metadata key "accession" gets silently converted to spectrum_id, so we can use spectrum_id right away
67-
spectra.append(Spectrum(mz=np.array(mz).astype(float), intensities=np.array(intensities).astype(float),
68-
metadata={'spectrum_id': accession}))
69-
70-
print(f"\nLoaded {len(spectra)} spectra from the database")
59+
60+
if 0 < len(mz) == len(intensities):
61+
# metadata key "accession" gets silently converted to spectrum_id, so we can use spectrum_id
62+
# right away
63+
ReferenceSpectra.spectra.append(
64+
Spectrum(mz=np.array(mz).astype(float), intensities=np.array(intensities).astype(float),
65+
metadata={'spectrum_id': accession}))
66+
else:
67+
logging.warning("Empty spectrum or error in spectrum")
68+
69+
logging.info("Loaded %s spectra from the database.", len(ReferenceSpectra.spectra))
70+
ReferenceSpectra.timestamp = timestamp
71+
72+
73+
# import os
74+
# import psycopg
75+
#
76+
# if __name__ == '__main__':
77+
# DB_PORT = os.environ.get('DB_PORT', 5432)
78+
# DB_USER = os.environ.get('DB_USER', "massbank3")
79+
# DB_PASSWORD = os.environ.get('DB_PASSWORD', "massbank3password")
80+
# DB_HOST = os.environ.get('DB_HOST', "localhost")
81+
# DB_NAME = os.environ.get('DB_NAME', "massbank3")
82+
#
83+
# logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG)
84+
# myspec = ReferenceSpectra(psycopg.connect(f"postgresql://{DB_NAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"))
85+
# print(len(ReferenceSpectra.spectra))
86+
# myspec.load_spectra
87+
# print(len(ReferenceSpectra.spectra))
88+
# myspec.load_spectra
89+
# print(len(ReferenceSpectra.spectra))

0 commit comments

Comments
 (0)