Skip to content

Commit 767c8a8

Browse files
author
rabbitflyer5
committed
Added most of the remaining advanced image search features.
1 parent 29644a2 commit 767c8a8

File tree

3 files changed

+96
-57
lines changed

3 files changed

+96
-57
lines changed

src/cli.py

+28-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import argparse, os, sys
2-
from enum import Enum
32

43
# Should revisit this to look for xdg_downloads in env
54
def get_download_path():
@@ -60,30 +59,45 @@ def get_arguments(argv=sys.argv):
6059
nargs="?",
6160
default=1)
6261
parser.add_argument("-s", "--size",
63-
help="Restrict your search to a certain size of image. Can be 'large', 'medium', or 'icon'.",
62+
help="Restrict your search to a certain size of image.",
6463
type=str,
6564
nargs="?",
66-
choices=['large','medium','icon'],
67-
default='')
68-
parser.add_argument("--color",
69-
help="Search for a certain color of image. Can be 'red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown', 'grayscale', or 'transparent'.",
65+
choices=['large','medium','icon', '400x300', '640x480', '800x600', '1024x768', '2mp', '4mp', '8mp', '10mp', '12mp', '15mp', '20mp', '40mp', '70mp'])
66+
parser.add_argument("-a", "--aspectratio",
67+
help="Restrict to specific aspect ratios.",
7068
type=str,
7169
nargs="?",
72-
choices=['red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown', 'grayscale', 'transparent'],
73-
default='')
70+
choices=['tall', 'square', 'wide', 'panoramic'])
71+
parser.add_argument("-i", "--color",
72+
help="Search for a certain color of image.",
73+
type=str,
74+
nargs="?",
75+
choices=['color', 'grayscale', 'transparent', 'red', 'orange', 'yellow', 'green', 'teal', 'blue', 'purple', 'pink', 'white', 'gray', 'black', 'brown'])
7476
parser.add_argument("-k", "--type",
75-
help="The type of image to search for. Can be 'clipart', 'lineart', or 'animated'.",
77+
help="The type of image to search for.",
78+
type=str,
79+
nargs="?",
80+
choices=['face', 'photo', 'clipart', 'lineart', 'animated'],
81+
dest="imgtype")
82+
parser.add_argument("-r", "--region",
83+
help="Get results from a specific region.",
84+
type=str,
85+
nargs="?")
86+
parser.add_argument("-f", "--filetype",
87+
help="Search for a specific file extension.",
88+
type=str,
89+
nargs="?",
90+
choices=['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico', 'raw'])
91+
parser.add_argument("-u", "--usage",
92+
help="Specify usage rights.",
7693
type=str,
7794
nargs="?",
78-
choices=['clipart', 'lineart', 'animated'],
79-
dest="imgtype",
80-
default='')
95+
choices=['cc', 'other'])
8196
parser.add_argument("-p", "--safesearch",
8297
help="Force the use of a specific safesearch setting. Can be 'on' or 'off'.",
8398
type=str,
8499
nargs="?",
85-
choices=['on', 'off'],
86-
default='')
100+
choices=['on', 'off'])
87101
args = parser.parse_args(argv[1:])
88102
# Set default directory
89103
if args.directory is None:

src/main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
def main():
77
args = get_arguments(sys.argv)
8-
scrape_images(args.keyword[0], args.count, args.directory, args.threads, args.size, args.color, args.imgtype, args.safesearch)
8+
scrape_images(args.keyword[0], args.count, args.directory, args.threads, args.size, args.aspectratio, args.color, args.imgtype, args.region, args.filetype, args.usage, args.safesearch)
99

1010
if __name__ == "__main__":
1111
main()

src/scraper.py

+67-42
Original file line numberDiff line numberDiff line change
@@ -43,75 +43,100 @@ def add_filetype(file_path: str):
4343

4444

4545
def process_image_size(val: str):
46+
key = 'isz:'
4647
if (val == 'large'):
47-
return "isz:l"
48+
return key + 'l'
4849
elif (val == 'medium'):
49-
return "isz:m"
50+
return key + 'm'
5051
elif (val == 'icon'):
51-
return "isz:i"
52+
return key + 'i'
53+
elif (val in ['400x300', '640x480', '800x600', '1024x768']):
54+
key += 'lt%2Cislt:'
55+
if (val == '400x300'):
56+
return key + "qsvga"
57+
elif (val == '640x480'):
58+
return key + "vga"
59+
elif (val == '800x600'):
60+
return key + "svga"
61+
elif (val == '1024x768'):
62+
return key + "xga"
63+
elif (val in ['2mp','4mp','6mp','8mp','10mp','12mp','15mp','20mp','40mp','70mp']):
64+
return key + 'lt%2Cislt:' + val
5265
else:
5366
return ""
5467

68+
def process_image_aspectratio(val: str):
69+
key = 'iar:'
70+
if (val == 'tall'):
71+
return key + 't'
72+
elif (val == 'square'):
73+
return key + 's'
74+
elif (val == 'wide'):
75+
return key + 'w'
76+
elif (val == 'panoramic'):
77+
return key + 'xw'
78+
5579
def process_image_color(val: str):
56-
if (val == "grayscale"):
80+
if (val == "color"):
81+
return "ic:color"
82+
elif (val == "grayscale"):
5783
return "ic:gray"
5884
elif (val == "transparent"):
5985
return "ic:trans"
60-
elif (val == "red"):
61-
return "ic:specific%2Cisc:red"
62-
elif (val == "orange"):
63-
return "ic:specific%2Cisc:orange"
64-
elif (val == "yellow"):
65-
return "ic:specific%2Cisc:yellow"
66-
elif (val == "green"):
67-
return "ic:specific%2Cisc:green"
68-
elif (val == "teal"):
69-
return "ic:specific%2Cisc:teal"
70-
elif (val == "blue"):
71-
return "ic:specific%2Cisc:blue"
72-
elif (val == "purple"):
73-
return "ic:specific%2Cisc:purple"
74-
elif (val == "pink"):
75-
return "ic:specific%2Cisc:pink"
76-
elif (val == "white"):
77-
return "ic:specific%2Cisc:white"
78-
elif (val == "gray"):
79-
return "ic:specific%2Cisc:gray"
80-
elif (val == "black"):
81-
return "ic:specific%2Cisc:black"
82-
elif (val == "brown"):
83-
return "ic:specific%2Cisc:brown"
86+
elif (val in ['red','orange','yellow','green','teal','blue','purple','pink','white','gray','black','brown']):
87+
return "ic:specific%2Cisc:" + val
8488
else:
8589
return ""
8690

8791
def process_image_type(val: str):
88-
if (val == "clipart"):
89-
return "itp:clipart"
90-
elif (val == "lineart"):
91-
return "itp:lineart"
92-
elif (val == "animated"):
93-
return "itp:animated"
92+
if (val in ['face', 'photo', 'clipart', 'lineart', 'animated']):
93+
return 'itp:' + val
9494
else:
9595
return ""
9696

97+
def process_image_region(val: str):
98+
if (val == ''):
99+
return ''
100+
else:
101+
return 'ctr:country' + val.upper()
102+
103+
def process_image_filetype(val: str):
104+
if (val in ['jpg', 'gif', 'png', 'bmp', 'svg', 'webp', 'ico', 'raw']):
105+
return 'ift:' + val
106+
107+
def process_image_usage(val: str):
108+
key = 'sur:'
109+
if (val == 'cc'):
110+
return key + 'cl'
111+
elif (val == 'other'):
112+
return key + 'ol'
113+
else:
114+
return ''
115+
97116
def process_safesearch(val: str):
98-
if (val == "on"):
99-
return "on"
100-
elif (val == "off"):
101-
return "off"
117+
if (val in ["on", "off"]):
118+
return val
102119
else:
103120
return ""
104121

105122

106-
def setup_url(searchurl: str, imgsize: str, imgcolor: str, imgtype: str, safesearch: str):
123+
def setup_url(searchurl: str, imgsize: str, imgaspectratio: str, imgcolor: str, imgtype: str, imgregion: str, imgfiletype: str, imgusage: str, safesearch: str):
107124
features = [searchurl]
108125
subfeatures = [[],[]]
109126
if (imgsize != None):
110127
subfeatures[0] += [process_image_size(imgsize)]
128+
if (imgaspectratio != None):
129+
subfeatures[0] += [process_image_aspectratio(imgaspectratio)]
111130
if (imgcolor != None):
112131
subfeatures[0] += [process_image_color(imgcolor)]
113132
if (imgtype != None):
114133
subfeatures[0] += [process_image_type(imgtype)]
134+
if (imgregion != None):
135+
subfeatures[0] += [process_image_region(imgregion)]
136+
if (imgfiletype != None):
137+
subfeatures[0] += [process_image_filetype(imgfiletype)]
138+
if (imgusage != None):
139+
subfeatures[0] += [process_image_usage(imgusage)]
115140
if (safesearch != None):
116141
subfeatures[1] += [process_safesearch(safesearch)]
117142

@@ -122,7 +147,7 @@ def setup_url(searchurl: str, imgsize: str, imgcolor: str, imgtype: str, safesea
122147
features += ["tbs=" + delim2.join(subfeatures[0])]
123148
if (subfeatures[1] != []):
124149
features += ["safe=" + delim2.join(subfeatures[1])]
125-
150+
print(delim1.join(features))
126151
return delim1.join(features)
127152

128153

@@ -244,7 +269,7 @@ def get_manifest(search_key: str, image_cnt: int):
244269

245270
################################# main api ####################################
246271

247-
def scrape_images(search_key, image_cnt, directory, threads, size, color, imgtype, safesearch):
272+
def scrape_images(search_key, image_cnt, directory, threads, size, aspectratio, color, imgtype, region, filetype, usage, safesearch):
248273
"""
249274
Request manifest, generate paths, save files, get filetype.
250275
This is the only function that should be called externally.
@@ -260,7 +285,7 @@ def scrape_images(search_key, image_cnt, directory, threads, size, color, imgtyp
260285
if not os.path.exists(directory):
261286
os.makedirs(directory)
262287
global search_url
263-
search_url = setup_url(search_url, size, color, imgtype, safesearch)
288+
search_url = setup_url(search_url, size, aspectratio, color, imgtype, region, filetype, usage, safesearch)
264289
id_url_manifest = get_manifest(search_key, image_cnt)
265290
with ThreadPoolExecutor(max_workers=threads) as pool:
266291
with tqdm(total=len(id_url_manifest)) as progress:

0 commit comments

Comments
 (0)