Skip to content

Commit 9f9d35a

Browse files
committed
fix data_info empty cols issue
1 parent a4cf3ec commit 9f9d35a

File tree

1 file changed

+33
-27
lines changed

1 file changed

+33
-27
lines changed

atomdb/migration/periodic/elements_data.py

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
{
5757
'property': 'eneg',
5858
'group': None,
59-
'table_name': 'eneg',
59+
'table_name': 'Energy',
6060
'description': 'Electronegativity'
6161
}
6262
]
@@ -87,8 +87,8 @@ class ElementsDataInfo(pt.IsDescription):
8787
source_key = pt.StringCol(30, pos=2)
8888
property_description = pt.StringCol(250, pos=3)
8989
reference = pt.StringCol(250, pos=4)
90-
doi = pt.StringCol(150, pos=4)
91-
notes = pt.StringCol(600, pos=5)
90+
doi = pt.StringCol(150, pos=5)
91+
notes = pt.StringCol(500, pos=6)
9292

9393

9494
def create_data_for_tables(hdf5_file, parent_folder, table_name, table_description, row_description, columns, row_data, sources_data, units_data):
@@ -155,7 +155,7 @@ def read_elements_data_csv(elements_data_csv):
155155
headers = [header.strip() for header in lines[0]] # first row as column headers
156156
sources = [source.strip() for source in lines[1]] # second row as sources
157157
units = [unit.strip() for unit in lines[2]] # third row as units
158-
data_rows = lines[3:] # remaining rows as data
158+
data_rows = lines[3:] # remaining rows as data
159159

160160
# Process headers to make them unique
161161
unique_headers = []
@@ -182,35 +182,43 @@ def read_elements_data_csv(elements_data_csv):
182182

183183
def read_data_info_csv(data_info_csv):
184184
"""
185-
Read and parse the data_info.csv file containing metadata.
185+
Read and parse the data_info.csv file containing metadata.
186186
187-
Args:
188-
data_info_csv: Path to the data_info.csv file.
189-
190-
Returns:
191-
data_info: List of dictionaries containing metadata for each property.
192-
"""
187+
Args:
188+
data_info_csv: Path to the data_info.csv file.
193189
194-
# # Opens the csv file, filters out comment lines (starting with #) and empty lines.
190+
Returns:
191+
data_info: List of dictionaries containing metadata for each property.
192+
"""
193+
# Opens the csv file, filters out comment lines (starting with #) and empty lines.
195194
with open(data_info_csv, 'r') as f:
196-
reader = csv.reader(f)
197195
lines = []
198-
for line in reader:
199-
if line and not line[0].startswith('#'):
200-
lines.append([item.strip() for item in line])
201-
202-
# Get headers (first row)
203-
headers = [header.lstrip('#').strip() for header in lines[0]]
204-
data_rows = lines[1:]
205-
206-
data_info = []
207-
for row in data_rows:
208-
data_info.append(dict(zip(headers, row)))
196+
for line in f:
197+
stripped = line.strip()
198+
if stripped and not stripped.startswith('#'):
199+
lines.append(stripped)
200+
201+
# hardcode the headers
202+
data_info_headers = [
203+
'Property key',
204+
'Property name',
205+
'Source key',
206+
'Property description',
207+
'Reference',
208+
'doi',
209+
'Notes'
210+
]
211+
212+
reader = csv.reader(lines)
213+
data_rows = list(reader)
214+
215+
data_info = []
216+
for row in data_rows:
217+
data_info.append(dict(zip(data_info_headers, row)))
209218

210219
return data_info
211220

212221

213-
214222
def write_elements_data_to_hdf5(data, unique_headers, sources_data, units_data):
215223
"""
216224
Write element data to an HDF5 file.
@@ -304,12 +312,10 @@ def write_data_info_to_hdf5(data_info_list):
304312
table_row['doi'] = row.get('doi', '').encode('utf-8')
305313
table_row['notes'] = row.get('Notes', '').encode('utf-8')
306314
table_row.append()
307-
308315
property_info_table.flush()
309316

310317

311318

312-
313319
if __name__ == "__main__":
314320
# Read the elements data from the CSV file
315321
data, unique_headers, sources_data, units_data = read_elements_data_csv(elements_data_csv)

0 commit comments

Comments
 (0)