Skip to content

Commit 133aeca

Browse files
committed
empiar pulled out and mapped from xml
1 parent c6299b2 commit 133aeca

File tree

2 files changed

+61
-20
lines changed

2 files changed

+61
-20
lines changed

AddedAnnotations.py

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,6 @@ def run(filename):
8989
wgt = models.Weight(xml.emdb_id)
9090
(wgt.emdb_id, wgt.overall_mw, wgt.units, wgt.provenance) = (xml.emdb_id, xml.overall_mw, "MDa", "EMDB")
9191
packed_models["WEIGHT"] = wgt
92-
if empiar:
93-
empiar_logger = start_logger_if_necessary("empiar_logger", empiar_log_file)
94-
empiar_mapping = EMPIARMapping(xml.emdb_id, empiar_dictionary, empiar_logger)
95-
empiar_map = empiar_mapping.execute()
96-
packed_models["EMPIAR"] = empiar_map
9792
if pmc or orcid:
9893
pubmed_log = start_logger_if_necessary("pubmed_logger", pubmed_log_file) if pmc else None
9994
orcid_log = start_logger_if_necessary("orcid_logger", orcid_log_file) if orcid else None
@@ -123,7 +118,7 @@ def run(filename):
123118

124119
if __name__ == "__main__":
125120
######### Command : python /Users/amudha/project/git_code/added_annotations/AddedAnnotations.py
126-
# -w /Users/amudha/project/ -f /Users/amudha/project/EMD_XML/ --CPX --model --uniprot --weight --empiar --pmc
121+
# -w /Users/amudha/project/ -f /Users/amudha/project/EMD_XML/ --CPX --model --uniprot --weight --pmc
127122

128123
prog = "EMDBAddedAnnotations"
129124
usage = """
@@ -132,7 +127,7 @@ def run(filename):
132127
python AddedAnnotations.py -w '[{"/path/to/working/folder"}]'
133128
-f '[{"/path/to/EMDB/header/files/folder"}]'
134129
-p '[{"/path/to/PDBe/files/folder"}]'
135-
--uniprot --CPX --component --model --weight --empiar --pmc --GO --interpro --pfam --pbdekb
130+
--uniprot --CPX --component --model --weight --pmc --GO --interpro --pfam --pbdekb
136131
--cath --scop --scop2 --scop2B
137132
"""
138133

@@ -150,7 +145,6 @@ def run(filename):
150145
parser.add_argument("--component", type=bool, nargs='?', const=True, default=False, help="Mapping to ChEMBL, ChEBI and DrugBank.")
151146
parser.add_argument("--model", type=bool, nargs='?', const=True, default=False, help="Collect MW from PDBe.")
152147
parser.add_argument("--weight", type=bool, nargs='?', const=True, default=False, help="Collect sample weight from header file.")
153-
parser.add_argument("--empiar", type=bool, nargs='?', const=True, default=False, help="Mapping EMPIAR ID to EMDB entries")
154148
parser.add_argument("--pmc", type=bool, nargs='?', const=True, default=False, help="Mapping publication ID to EMDB entries")
155149
parser.add_argument("--GO", type=bool, nargs='?', const=True, default=False, help="Mapping GO ids to EMDB entries")
156150
parser.add_argument("--interpro", type=bool, nargs='?', const=True, default=False, help="Mapping InterPro ids to EMDB entries")
@@ -170,7 +164,6 @@ def run(filename):
170164
component = args.component
171165
model = args.model
172166
weight = args.weight
173-
empiar = args.empiar
174167
pmc = args.pmc
175168
orcid = pmc
176169
go = args.GO
@@ -186,8 +179,6 @@ def run(filename):
186179

187180
if model:
188181
db_list.append("pdbe")
189-
if empiar:
190-
db_list.append("empiar")
191182
if uniprot:
192183
db_list.append("uniprot")
193184
if component:
@@ -227,7 +218,6 @@ def run(filename):
227218
component = True
228219
model = True
229220
weight = True
230-
empiar = True
231221
pmc = True
232222
orcid = True
233223
go = True
@@ -238,7 +228,7 @@ def run(filename):
238228
scop2 = True
239229
scop2B = True
240230
pdbekb = True
241-
db_list.extend(["pdbe", "empiar", "uniprot", "chembl", "chebi", "drugbank", "pubmed", "pubmedcentral", "issn",
231+
db_list.extend(["pdbe", "uniprot", "chembl", "chebi", "drugbank", "pubmed", "pubmedcentral", "issn",
242232
"orcid", "cpx", "go", "interpro", "pfam", "cath", "scop", "scop2", "scop2B", "pdbekb"])
243233

244234
#Get config variables:
@@ -250,7 +240,7 @@ def run(filename):
250240
CP_ftp = config.get("file_paths", "CP_ftp")
251241
components_cif = config.get("file_paths", "components_cif")
252242
assembly_ftp = config.get("file_paths", "assembly_ftp")
253-
emdb_empiar_list = config.get("file_paths", "emdb_empiar_list")
243+
#emdb_empiar_list = config.get("file_paths", "emdb_empiar_list")
254244
pmc_api = config.get("api", "pmc")
255245
uniprot_tab = config.get("file_paths", "uniprot_tab")
256246
#GO_obo = config.get("file_paths", "GO_obo")
@@ -283,10 +273,6 @@ def run(filename):
283273
weight_log_file = os.path.join(args.workDir, 'overall_mw.log')
284274
weight_log = setup_logger('weight_logger', weight_log_file)
285275
weight_log.info("EMDB_ID\tOVERALL_MW")
286-
if empiar:
287-
empiar_log_file = os.path.join(args.workDir, 'emdb_empiar.log')
288-
empiar_log = setup_logger('empiar_logger', empiar_log_file)
289-
empiar_log.info("EMDB_ID\tEMPIAR_ID\tPROVENANCE")
290276
if pmc:
291277
pubmed_log_file = os.path.join(args.workDir, 'emdb_pubmed.log')
292278
pubmed_log = setup_logger('pubmed_logger', pubmed_log_file)
@@ -333,8 +319,6 @@ def run(filename):
333319

334320
if uniprot:
335321
uniprot_dictionary, uniprot_with_models = generate_unp_dictionary(uniprot_tab)
336-
if empiar:
337-
empiar_dictionary = generate_emp_dictionary(emdb_empiar_list)
338322
if component:
339323
chembl_map, chebi_map, drugbank_map = parseCCD(components_cif)
340324
pubmed_dict = generate_pubmed_dictionary(args.workDir) if pmc else {}

fetch_empiar.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import argparse
2+
import configparser
3+
import os
4+
from pathlib import Path
5+
import xml.etree.ElementTree as ET
6+
import csv
7+
8+
9+
def empiar_mapping(header_dir, empiar_map_file):
10+
with open(empiar_map_file, 'w', newline='') as csvfile:
11+
csv_writer = csv.writer(csvfile, delimiter='\t')
12+
csv_writer.writerow(["EMDB_ID", "EMPIAR_ID", "PROVENANCE"])
13+
14+
for xml_file in Path(header_dir).glob('*.xml'):
15+
tree = ET.parse(xml_file)
16+
root = tree.getroot()
17+
ns = {'ns': root.tag.split('}')[0].strip('{')} # Extract namespace from the root tag
18+
emdb_entries = root.findall('.//ns:crossReferences/ns:relatedEMDBEntries/ns:emdbEntry', namespaces=ns)
19+
empiar_id = "EMPIAR-" + xml_file.stem
20+
21+
for emdb_entry in emdb_entries:
22+
csv_writer.writerow([emdb_entry.text, empiar_id, "EMPIAR"])
23+
24+
25+
if __name__ == "__main__":
26+
prog = "EMPIAR (EMICSS)"
27+
usage = """
28+
EMICSS for EMPIAR
29+
Example:
30+
python fetch_empiar.py -w '[{"/path/to/working/folder"}]' -f '[{"/path/to/EMPAR/header/folder"}]'
31+
"""
32+
33+
parser = argparse.ArgumentParser(prog=prog, usage=usage, add_help=False,
34+
formatter_class=argparse.RawTextHelpFormatter)
35+
parser.add_argument("-h", "--help", action="help", help="Show this help message and exit.")
36+
parser.add_argument('-w', '--workDir', type=Path, help="Main working directory path.")
37+
parser.add_argument('-f', '--headerDir', type=Path, help="Directory path to the EMPIAR header files.")
38+
args = parser.parse_args()
39+
40+
work_dir = args.workDir
41+
header_dir = args.headerDir
42+
empiar_map_file = os.path.join(work_dir, 'emdb_empiar.log')
43+
44+
# Get config variables:
45+
config = configparser.ConfigParser()
46+
env_file = os.path.join(Path(__file__).parent.absolute(), "config.ini")
47+
config.read(env_file)
48+
49+
empiar_mapping(header_dir, empiar_map_file)
50+
51+
52+
53+
54+
55+
56+
57+

0 commit comments

Comments
 (0)