Skip to content

Commit 29644a2

Browse files
author
rabbitflyer5
committed
Added search filter toolbar support for size, color, and type, as well as safesearch-setting functionality.
1 parent d1cac74 commit 29644a2

File tree

3 files changed

+116
-5
lines changed

3 files changed

+116
-5
lines changed

src/cli.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse, os, sys
2+
from enum import Enum
23

34
# Should revisit this to look for xdg_downloads in env
45
def get_download_path():
@@ -29,7 +30,6 @@ def check_pos_int(val: int):
2930
else:
3031
raise ValueError
3132

32-
3333
def get_arguments(argv=sys.argv):
3434
"""
3535
The cli front end for the scraper.
@@ -40,7 +40,7 @@ def get_arguments(argv=sys.argv):
4040
Returns:
4141
parser.parse_args() -- A struct with all required info to run the scraper
4242
"""
43-
parser = argparse.ArgumentParser(description="Scrape google for images")
43+
parser = argparse.ArgumentParser(description="Scrape Google for images")
4444
parser.add_argument("keyword",
4545
help="the phrase used to find images",
4646
type=str,
@@ -59,6 +59,31 @@ def get_arguments(argv=sys.argv):
5959
type=check_pos_int,
6060
nargs="?",
6161
default=1)
62+
parser.add_argument("-s", "--size",
63+
help="Restrict your search to a certain size of image. Can be 'large', 'medium', or 'icon'.",
64+
type=str,
65+
nargs="?",
66+
choices=['large','medium','icon'],
67+
default='')
68+
parser.add_argument("--color",
69+
help="Search for a certain color of image. Can be 'red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown', 'grayscale', or 'transparent'.",
70+
type=str,
71+
nargs="?",
72+
choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown', 'grayscale', 'transparent'],
73+
default='')
74+
parser.add_argument("-k", "--type",
75+
help="The type of image to search for. Can be 'clipart', 'lineart', or 'animated'.",
76+
type=str,
77+
nargs="?",
78+
choices=['clipart', 'lineart', 'animated'],
79+
dest="imgtype",
80+
default='')
81+
parser.add_argument("-p", "--safesearch",
82+
help="Force the use of a specific safesearch setting. Can be 'on' or 'off'.",
83+
type=str,
84+
nargs="?",
85+
choices=['on', 'off'],
86+
default='')
6287
args = parser.parse_args(argv[1:])
6388
# Set default directory
6489
if args.directory is None:

src/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
def main():
77
args = get_arguments(sys.argv)
8-
scrape_images(args.keyword[0], args.count, args.directory, args.threads)
8+
scrape_images(args.keyword[0], args.count, args.directory, args.threads, args.size, args.color, args.imgtype, args.safesearch)
99

1010
if __name__ == "__main__":
1111
main()

src/scraper.py

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,91 @@ def add_filetype(file_path: str):
4141
eprint(err)
4242
return 1
4343

44+
45+
def process_image_size(val: str):
46+
if (val == 'large'):
47+
return "isz:l"
48+
elif (val == 'medium'):
49+
return "isz:m"
50+
elif (val == 'icon'):
51+
return "isz:i"
52+
else:
53+
return ""
54+
55+
def process_image_color(val: str):
56+
if (val == "grayscale"):
57+
return "ic:gray"
58+
elif (val == "transparent"):
59+
return "ic:trans"
60+
elif (val == "red"):
61+
return "ic:specific%2Cisc:red"
62+
elif (val == "orange"):
63+
return "ic:specific%2Cisc:orange"
64+
elif (val == "yellow"):
65+
return "ic:specific%2Cisc:yellow"
66+
elif (val == "green"):
67+
return "ic:specific%2Cisc:green"
68+
elif (val == "teal"):
69+
return "ic:specific%2Cisc:teal"
70+
elif (val == "blue"):
71+
return "ic:specific%2Cisc:blue"
72+
elif (val == "purple"):
73+
return "ic:specific%2Cisc:purple"
74+
elif (val == "pink"):
75+
return "ic:specific%2Cisc:pink"
76+
elif (val == "white"):
77+
return "ic:specific%2Cisc:white"
78+
elif (val == "gray"):
79+
return "ic:specific%2Cisc:gray"
80+
elif (val == "black"):
81+
return "ic:specific%2Cisc:black"
82+
elif (val == "brown"):
83+
return "ic:specific%2Cisc:brown"
84+
else:
85+
return ""
86+
87+
def process_image_type(val: str):
88+
if (val == "clipart"):
89+
return "itp:clipart"
90+
elif (val == "lineart"):
91+
return "itp:lineart"
92+
elif (val == "animated"):
93+
return "itp:animated"
94+
else:
95+
return ""
96+
97+
def process_safesearch(val: str):
98+
if (val == "on"):
99+
return "on"
100+
elif (val == "off"):
101+
return "off"
102+
else:
103+
return ""
104+
105+
106+
def setup_url(searchurl: str, imgsize: str, imgcolor: str, imgtype: str, safesearch: str):
107+
features = [searchurl]
108+
subfeatures = [[],[]]
109+
if (imgsize != None):
110+
subfeatures[0] += [process_image_size(imgsize)]
111+
if (imgcolor != None):
112+
subfeatures[0] += [process_image_color(imgcolor)]
113+
if (imgtype != None):
114+
subfeatures[0] += [process_image_type(imgtype)]
115+
if (safesearch != None):
116+
subfeatures[1] += [process_safesearch(safesearch)]
117+
118+
delim1 = "&"
119+
delim2 = "%2C"
120+
121+
if (subfeatures[0] != []):
122+
features += ["tbs=" + delim2.join(subfeatures[0])]
123+
if (subfeatures[1] != []):
124+
features += ["safe=" + delim2.join(subfeatures[1])]
125+
126+
return delim1.join(features)
127+
128+
44129
############################# scraping helpers ################################
45130

46131
def get_image_urls(query: str, page: int):
@@ -159,7 +244,7 @@ def get_manifest(search_key: str, image_cnt: int):
159244

160245
################################# main api ####################################
161246

162-
def scrape_images(search_key, image_cnt, directory, threads):
247+
def scrape_images(search_key, image_cnt, directory, threads, size, color, imgtype, safesearch):
163248
"""
164249
Request manifest, generate paths, save files, get filetype.
165250
This is the only function that should be called externally.
@@ -174,7 +259,8 @@ def scrape_images(search_key, image_cnt, directory, threads):
174259
print("savedir: {}".format(directory))
175260
if not os.path.exists(directory):
176261
os.makedirs(directory)
177-
262+
global search_url
263+
search_url = setup_url(search_url, size, color, imgtype, safesearch)
178264
id_url_manifest = get_manifest(search_key, image_cnt)
179265
with ThreadPoolExecutor(max_workers=threads) as pool:
180266
with tqdm(total=len(id_url_manifest)) as progress:

0 commit comments

Comments
 (0)