@@ -41,6 +41,91 @@ def add_filetype(file_path: str):
41
41
eprint (err )
42
42
return 1
43
43
44
+
45
+ def process_image_size (val : str ):
46
+ if (val == 'large' ):
47
+ return "isz:l"
48
+ elif (val == 'medium' ):
49
+ return "isz:m"
50
+ elif (val == 'icon' ):
51
+ return "isz:i"
52
+ else :
53
+ return ""
54
+
55
+ def process_image_color (val : str ):
56
+ if (val == "grayscale" ):
57
+ return "ic:gray"
58
+ elif (val == "transparent" ):
59
+ return "ic:trans"
60
+ elif (val == "red" ):
61
+ return "ic:specific%2Cisc:red"
62
+ elif (val == "orange" ):
63
+ return "ic:specific%2Cisc:orange"
64
+ elif (val == "yellow" ):
65
+ return "ic:specific%2Cisc:yellow"
66
+ elif (val == "green" ):
67
+ return "ic:specific%2Cisc:green"
68
+ elif (val == "teal" ):
69
+ return "ic:specific%2Cisc:teal"
70
+ elif (val == "blue" ):
71
+ return "ic:specific%2Cisc:blue"
72
+ elif (val == "purple" ):
73
+ return "ic:specific%2Cisc:purple"
74
+ elif (val == "pink" ):
75
+ return "ic:specific%2Cisc:pink"
76
+ elif (val == "white" ):
77
+ return "ic:specific%2Cisc:white"
78
+ elif (val == "gray" ):
79
+ return "ic:specific%2Cisc:gray"
80
+ elif (val == "black" ):
81
+ return "ic:specific%2Cisc:black"
82
+ elif (val == "brown" ):
83
+ return "ic:specific%2Cisc:brown"
84
+ else :
85
+ return ""
86
+
87
+ def process_image_type (val : str ):
88
+ if (val == "clipart" ):
89
+ return "itp:clipart"
90
+ elif (val == "lineart" ):
91
+ return "itp:lineart"
92
+ elif (val == "animated" ):
93
+ return "itp:animated"
94
+ else :
95
+ return ""
96
+
97
+ def process_safesearch (val : str ):
98
+ if (val == "on" ):
99
+ return "on"
100
+ elif (val == "off" ):
101
+ return "off"
102
+ else :
103
+ return ""
104
+
105
+
106
+ def setup_url (searchurl : str , imgsize : str , imgcolor : str , imgtype : str , safesearch : str ):
107
+ features = [searchurl ]
108
+ subfeatures = [[],[]]
109
+ if (imgsize != None ):
110
+ subfeatures [0 ] += [process_image_size (imgsize )]
111
+ if (imgcolor != None ):
112
+ subfeatures [0 ] += [process_image_color (imgcolor )]
113
+ if (imgtype != None ):
114
+ subfeatures [0 ] += [process_image_type (imgtype )]
115
+ if (safesearch != None ):
116
+ subfeatures [1 ] += [process_safesearch (safesearch )]
117
+
118
+ delim1 = "&"
119
+ delim2 = "%2C"
120
+
121
+ if (subfeatures [0 ] != []):
122
+ features += ["tbs=" + delim2 .join (subfeatures [0 ])]
123
+ if (subfeatures [1 ] != []):
124
+ features += ["safe=" + delim2 .join (subfeatures [1 ])]
125
+
126
+ return delim1 .join (features )
127
+
128
+
44
129
############################# scraping helpers ################################
45
130
46
131
def get_image_urls (query : str , page : int ):
@@ -159,7 +244,7 @@ def get_manifest(search_key: str, image_cnt: int):
159
244
160
245
################################# main api ####################################
161
246
162
- def scrape_images (search_key , image_cnt , directory , threads ):
247
+ def scrape_images (search_key , image_cnt , directory , threads , size , color , imgtype , safesearch ):
163
248
"""
164
249
Request manifest, generate paths, save files, get filetype.
165
250
This is the only function that should be called externally.
@@ -174,7 +259,8 @@ def scrape_images(search_key, image_cnt, directory, threads):
174
259
print ("savedir: {}" .format (directory ))
175
260
if not os .path .exists (directory ):
176
261
os .makedirs (directory )
177
-
262
+ global search_url
263
+ search_url = setup_url (search_url , size , color , imgtype , safesearch )
178
264
id_url_manifest = get_manifest (search_key , image_cnt )
179
265
with ThreadPoolExecutor (max_workers = threads ) as pool :
180
266
with tqdm (total = len (id_url_manifest )) as progress :
0 commit comments