@@ -116,17 +116,31 @@ def get_experiences(self):
116
116
main_list = self .wait_for_element_to_load (name = "pvs-list__container" , base = main )
117
117
for position in main_list .find_elements (By .CLASS_NAME , "pvs-list__paged-list-item" ):
118
118
position = position .find_element (By .CSS_SELECTOR , "div[data-view-name='profile-component-entity']" )
119
- company_logo_elem , position_details = position .find_elements (By .XPATH , "*" )
119
+
120
+ # Fix: Handle case where more than 2 elements are returned
121
+ elements = position .find_elements (By .XPATH , "*" )
122
+ if len (elements ) < 2 :
123
+ continue # Skip if we don't have enough elements
124
+
125
+ company_logo_elem = elements [0 ]
126
+ position_details = elements [1 ]
120
127
121
128
# company elem
122
- company_linkedin_url = company_logo_elem .find_element (By .XPATH ,"*" ).get_attribute ("href" )
123
- if not company_linkedin_url :
129
+ try :
130
+ company_linkedin_url = company_logo_elem .find_element (By .XPATH ,"*" ).get_attribute ("href" )
131
+ if not company_linkedin_url :
132
+ continue
133
+ except NoSuchElementException :
124
134
continue
125
135
126
136
# position details
127
137
position_details_list = position_details .find_elements (By .XPATH ,"*" )
128
138
position_summary_details = position_details_list [0 ] if len (position_details_list ) > 0 else None
129
139
position_summary_text = position_details_list [1 ] if len (position_details_list ) > 1 else None
140
+
141
+ if not position_summary_details :
142
+ continue
143
+
130
144
outer_positions = position_summary_details .find_element (By .XPATH ,"*" ).find_elements (By .XPATH ,"*" )
131
145
132
146
if len (outer_positions ) == 4 :
@@ -147,50 +161,71 @@ def get_experiences(self):
147
161
location = outer_positions [2 ].find_element (By .TAG_NAME ,"span" ).text
148
162
else :
149
163
position_title = ""
150
- company = outer_positions [0 ].find_element (By .TAG_NAME ,"span" ).text
151
- work_times = ""
164
+ company = outer_positions [0 ].find_element (By .TAG_NAME ,"span" ).text if outer_positions else ""
165
+ work_times = outer_positions [ 1 ]. find_element ( By . TAG_NAME , "span" ). text if len ( outer_positions ) > 1 else ""
152
166
location = ""
153
167
154
-
155
- times = work_times .split ("·" )[0 ].strip () if work_times else ""
156
- duration = work_times .split ("·" )[1 ].strip () if len (work_times .split ("·" )) > 1 else None
168
+ # Safely extract times and duration
169
+ if work_times :
170
+ parts = work_times .split ("·" )
171
+ times = parts [0 ].strip () if parts else ""
172
+ duration = parts [1 ].strip () if len (parts ) > 1 else None
173
+ else :
174
+ times = ""
175
+ duration = None
157
176
158
177
from_date = " " .join (times .split (" " )[:2 ]) if times else ""
159
- to_date = " " .join (times .split (" " )[3 :]) if times else ""
160
- if position_summary_text and any (element .get_attribute ("pvs-list__container" ) for element in position_summary_text .find_elements (By .TAG_NAME , "*" )):
161
- inner_positions = (position_summary_text .find_element (By .CLASS_NAME ,"pvs-list__container" )
162
- .find_element (By .XPATH ,"*" ).find_element (By .XPATH ,"*" ).find_element (By .XPATH ,"*" )
163
- .find_elements (By .CLASS_NAME ,"pvs-list__paged-list-item" ))
178
+ to_date = " " .join (times .split (" " )[3 :]) if times and len (times .split (" " )) > 3 else ""
179
+
180
+ if position_summary_text and any (element .get_attribute ("class" ) == "pvs-list__container" for element in position_summary_text .find_elements (By .XPATH , "*" )):
181
+ try :
182
+ inner_positions = (position_summary_text .find_element (By .CLASS_NAME ,"pvs-list__container" )
183
+ .find_element (By .XPATH ,"*" ).find_element (By .XPATH ,"*" ).find_element (By .XPATH ,"*" )
184
+ .find_elements (By .CLASS_NAME ,"pvs-list__paged-list-item" ))
185
+ except NoSuchElementException :
186
+ inner_positions = []
164
187
else :
165
188
inner_positions = []
189
+
166
190
if len (inner_positions ) > 1 :
167
191
descriptions = inner_positions
168
192
for description in descriptions :
169
- res = description .find_element (By .TAG_NAME ,"a" ).find_elements (By .XPATH ,"*" )
170
- position_title_elem = res [0 ] if len (res ) > 0 else None
171
- work_times_elem = res [1 ] if len (res ) > 1 else None
172
- location_elem = res [2 ] if len (res ) > 2 else None
173
-
174
-
175
- location = location_elem .find_element (By .XPATH ,"*" ).text if location_elem else None
176
- position_title = position_title_elem .find_element (By .XPATH ,"*" ).find_element (By .TAG_NAME ,"*" ).text if position_title_elem else ""
177
- work_times = work_times_elem .find_element (By .XPATH ,"*" ).text if work_times_elem else ""
178
- times = work_times .split ("·" )[0 ].strip () if work_times else ""
179
- duration = work_times .split ("·" )[1 ].strip () if len (work_times .split ("·" )) > 1 else None
180
- from_date = " " .join (times .split (" " )[:2 ]) if times else ""
181
- to_date = " " .join (times .split (" " )[3 :]) if times else ""
182
-
183
- experience = Experience (
184
- position_title = position_title ,
185
- from_date = from_date ,
186
- to_date = to_date ,
187
- duration = duration ,
188
- location = location ,
189
- description = description ,
190
- institution_name = company ,
191
- linkedin_url = company_linkedin_url
192
- )
193
- self .add_experience (experience )
193
+ try :
194
+ res = description .find_element (By .TAG_NAME ,"a" ).find_elements (By .XPATH ,"*" )
195
+ position_title_elem = res [0 ] if len (res ) > 0 else None
196
+ work_times_elem = res [1 ] if len (res ) > 1 else None
197
+ location_elem = res [2 ] if len (res ) > 2 else None
198
+
199
+ location = location_elem .find_element (By .XPATH ,"*" ).text if location_elem else None
200
+ position_title = position_title_elem .find_element (By .XPATH ,"*" ).find_element (By .TAG_NAME ,"*" ).text if position_title_elem else ""
201
+ work_times = work_times_elem .find_element (By .XPATH ,"*" ).text if work_times_elem else ""
202
+
203
+ # Safely extract times and duration
204
+ if work_times :
205
+ parts = work_times .split ("·" )
206
+ times = parts [0 ].strip () if parts else ""
207
+ duration = parts [1 ].strip () if len (parts ) > 1 else None
208
+ else :
209
+ times = ""
210
+ duration = None
211
+
212
+ from_date = " " .join (times .split (" " )[:2 ]) if times else ""
213
+ to_date = " " .join (times .split (" " )[3 :]) if times and len (times .split (" " )) > 3 else ""
214
+
215
+ experience = Experience (
216
+ position_title = position_title ,
217
+ from_date = from_date ,
218
+ to_date = to_date ,
219
+ duration = duration ,
220
+ location = location ,
221
+ description = description ,
222
+ institution_name = company ,
223
+ linkedin_url = company_linkedin_url
224
+ )
225
+ self .add_experience (experience )
226
+ except (NoSuchElementException , IndexError ) as e :
227
+ # Skip this description if elements are missing
228
+ continue
194
229
else :
195
230
description = position_summary_text .text if position_summary_text else ""
196
231
@@ -215,47 +250,69 @@ def get_educations(self):
215
250
self .scroll_to_bottom ()
216
251
main_list = self .wait_for_element_to_load (name = "pvs-list__container" , base = main )
217
252
for position in main_list .find_elements (By .CLASS_NAME ,"pvs-list__paged-list-item" ):
218
- position = position .find_element (By .XPATH ,"//div[@data-view-name='profile-component-entity']" )
219
- institution_logo_elem , position_details = position .find_elements (By .XPATH ,"*" )
220
-
221
- # company elem
222
- institution_linkedin_url = institution_logo_elem .find_element (By .XPATH ,"*" ).get_attribute ("href" )
223
-
224
- # position details
225
- position_details_list = position_details .find_elements (By .XPATH ,"*" )
226
- position_summary_details = position_details_list [0 ] if len (position_details_list ) > 0 else None
227
- position_summary_text = position_details_list [1 ] if len (position_details_list ) > 1 else None
228
- outer_positions = position_summary_details .find_element (By .XPATH ,"*" ).find_elements (By .XPATH ,"*" )
229
-
230
- institution_name = outer_positions [0 ].find_element (By .TAG_NAME ,"span" ).text
231
- if len (outer_positions ) > 1 :
232
- degree = outer_positions [1 ].find_element (By .TAG_NAME ,"span" ).text
233
- else :
234
- degree = None
235
-
236
- if len (outer_positions ) > 2 :
237
- times = outer_positions [2 ].find_element (By .TAG_NAME ,"span" ).text
253
+ try :
254
+ position = position .find_element (By .CSS_SELECTOR , "div[data-view-name='profile-component-entity']" )
255
+
256
+ # Fix: Handle case where more than 2 elements are returned
257
+ elements = position .find_elements (By .XPATH ,"*" )
258
+ if len (elements ) < 2 :
259
+ continue # Skip if we don't have enough elements
260
+
261
+ institution_logo_elem = elements [0 ]
262
+ position_details = elements [1 ]
263
+
264
+ # institution elem
265
+ try :
266
+ institution_linkedin_url = institution_logo_elem .find_element (By .XPATH ,"*" ).get_attribute ("href" )
267
+ except NoSuchElementException :
268
+ institution_linkedin_url = None
269
+
270
+ # position details
271
+ position_details_list = position_details .find_elements (By .XPATH ,"*" )
272
+ position_summary_details = position_details_list [0 ] if len (position_details_list ) > 0 else None
273
+ position_summary_text = position_details_list [1 ] if len (position_details_list ) > 1 else None
274
+
275
+ if not position_summary_details :
276
+ continue
277
+
278
+ outer_positions = position_summary_details .find_element (By .XPATH ,"*" ).find_elements (By .XPATH ,"*" )
279
+
280
+ institution_name = outer_positions [0 ].find_element (By .TAG_NAME ,"span" ).text if outer_positions else ""
281
+ degree = outer_positions [1 ].find_element (By .TAG_NAME ,"span" ).text if len (outer_positions ) > 1 else None
238
282
239
- if times != "" :
240
- from_date = times .split (" " )[times .split (" " ).index ("-" )- 1 ] if len (times .split (" " ))> 3 else times .split (" " )[0 ]
241
- to_date = times .split (" " )[- 1 ]
242
- else :
243
283
from_date = None
244
284
to_date = None
285
+
286
+ if len (outer_positions ) > 2 :
287
+ try :
288
+ times = outer_positions [2 ].find_element (By .TAG_NAME ,"span" ).text
289
+
290
+ if times and "-" in times :
291
+ split_times = times .split (" " )
292
+ dash_index = split_times .index ("-" ) if "-" in split_times else - 1
293
+
294
+ if dash_index > 0 :
295
+ from_date = split_times [dash_index - 1 ]
296
+ if dash_index < len (split_times ) - 1 :
297
+ to_date = split_times [- 1 ]
298
+ except (NoSuchElementException , ValueError ):
299
+ from_date = None
300
+ to_date = None
245
301
302
+ description = position_summary_text .text if position_summary_text else ""
246
303
247
-
248
- description = position_summary_text . text if position_summary_text else ""
249
-
250
- education = Education (
251
- from_date = from_date ,
252
- to_date = to_date ,
253
- description = description ,
254
- degree = degree ,
255
- institution_name = institution_name ,
256
- linkedin_url = institution_linkedin_url
257
- )
258
- self . add_education ( education )
304
+ education = Education (
305
+ from_date = from_date ,
306
+ to_date = to_date ,
307
+ description = description ,
308
+ degree = degree ,
309
+ institution_name = institution_name ,
310
+ linkedin_url = institution_linkedin_url
311
+ )
312
+ self . add_education ( education )
313
+ except ( NoSuchElementException , IndexError ) as e :
314
+ # Skip this education entry if elements are missing
315
+ continue
259
316
260
317
def get_name_and_location (self ):
261
318
top_panel = self .driver .find_element (By .XPATH , "//*[@class='mt2 relative']" )
0 commit comments