@@ -42,25 +42,25 @@ def google_gen_query_url(keywords, face_only=False, safe_mode=False, image_type=
4242 base_url = "https://www.google.com/search?tbm=isch&hl=en"
4343 keywords_str = "&q=" + quote (keywords )
4444 query_url = base_url + keywords_str
45-
45+
4646 if safe_mode is True :
4747 query_url += "&safe=on"
4848 else :
4949 query_url += "&safe=off"
50-
50+
5151 filter_url = "&tbs="
5252
5353 if color is not None :
5454 if color == "bw" :
5555 filter_url += "ic:gray%2C"
5656 else :
5757 filter_url += "ic:specific%2Cisc:{}%2C" .format (color .lower ())
58-
58+
5959 if image_type is not None :
6060 if image_type .lower () == "linedrawing" :
6161 image_type = "lineart"
6262 filter_url += "itp:{}" .format (image_type )
63-
63+
6464 if face_only is True :
6565 filter_url += "itp:face"
6666
@@ -73,7 +73,10 @@ def google_image_url_from_webpage(driver, max_number, quiet=False):
7373 thumb_elements = []
7474 while True :
7575 try :
76- thumb_elements = driver .find_elements (By .CLASS_NAME , "rg_i" )
76+ # old way to get thumb_elements
77+ # thumb_elements = driver.find_elements(By.CLASS_NAME, "rg_i")
78+ # Adapt to the updated Google image search page
79+ thumb_elements = driver .find_elements (By .CSS_SELECTOR , ".H8Rx8c > g-img > img" )
7780 my_print ("Find {} images." .format (len (thumb_elements )), quiet )
7881 if len (thumb_elements ) >= max_number :
7982 break
@@ -90,7 +93,7 @@ def google_image_url_from_webpage(driver, max_number, quiet=False):
9093 except Exception as e :
9194 print ("Exception " , e )
9295 pass
93-
96+
9497 if len (thumb_elements ) == 0 :
9598 return []
9699
@@ -109,16 +112,17 @@ def google_image_url_from_webpage(driver, max_number, quiet=False):
109112 print ("Error while clicking in thumbnail:" , e )
110113 retry_click .append (elem )
111114
112- if len (retry_click ) > 0 :
115+ if len (retry_click ) > 0 :
113116 my_print ("Retry some failed clicks ..." , quiet )
114117 for elem in retry_click :
115118 try :
116119 if elem .is_displayed () and elem .is_enabled ():
117120 elem .click ()
118121 except Exception as e :
119122 print ("Error while retrying click:" , e )
120-
121- image_elements = driver .find_elements (By .CLASS_NAME , "islib" )
123+
124+ # image_elements = driver.find_elements(By.CLASS_NAME, "islib")
125+ image_elements = driver .find_elements (By .CSS_SELECTOR , ".ob5Hkd > a" )
122126 image_urls = list ()
123127 url_pattern = r"imgurl=\S*&imgrefurl"
124128
@@ -138,10 +142,10 @@ def bing_gen_query_url(keywords, face_only=False, safe_mode=False, image_type=No
138142 filter_url = "&qft="
139143 if face_only is True :
140144 filter_url += "+filterui:face-face"
141-
145+
142146 if image_type is not None :
143147 filter_url += "+filterui:photo-{}" .format (image_type )
144-
148+
145149 if color is not None :
146150 if color == "bw" or color == "color" :
147151 filter_url += "+filterui:color2-{}" .format (color .lower ())
@@ -183,7 +187,7 @@ def bing_get_image_url_using_api(keywords, max_number=10000, face_only=False,
183187 proxies = None
184188 if proxy and proxy_type :
185189 proxies = {"http" : "{}://{}" .format (proxy_type , proxy ),
186- "https" : "{}://{}" .format (proxy_type , proxy )}
190+ "https" : "{}://{}" .format (proxy_type , proxy )}
187191 start = 1
188192 image_urls = []
189193 while start <= max_number :
@@ -309,7 +313,7 @@ def process_batch(batch_no, batch_size):
309313
310314
311315def crawl_image_urls (keywords , engine = "Google" , max_number = 10000 ,
312- face_only = False , safe_mode = False , proxy = None ,
316+ face_only = False , safe_mode = False , proxy = None ,
313317 proxy_type = "http" , quiet = False , browser = "chrome_headless" , image_type = None , color = None ):
314318 """
315319 Scrape image urls of keywords from Google Image Search
0 commit comments