1
1
package cfprxchecker
2
2
3
3
import (
4
+ "context"
4
5
"github.com/gocolly/colly"
5
6
"github.com/gocolly/colly/extensions"
6
7
"log"
7
8
"net/http"
9
+ "net/url"
8
10
"os"
9
11
"strings"
10
12
"sync"
@@ -21,6 +23,16 @@ type Args struct {
21
23
muGood sync.Mutex
22
24
}
23
25
26
+ // ProxyURL returns a proxy function (for use in a Transport)
27
+ // that always returns the same URL.
28
+ func ProxyURL (fixedURL * url.URL ) func (* http.Request ) (* url.URL , error ) {
29
+ return func (pr * http.Request ) (* url.URL , error ) {
30
+ ctx := context .WithValue (pr .Context (), colly .ProxyURLKey , fixedURL .String ())
31
+ * pr = * pr .WithContext (ctx )
32
+ return fixedURL , nil
33
+ }
34
+ }
35
+
24
36
func (a * Args ) writeGoodProxy (proxy string ) {
25
37
if proxy != "" {
26
38
@@ -39,9 +51,7 @@ func (a *Args) writeGoodProxy(proxy string) {
39
51
40
52
func (a * Args ) writeBadProxy (proxy string ) {
41
53
if proxy != "" {
42
-
43
54
var b strings.Builder
44
-
45
55
a .muBad .Lock ()
46
56
defer a .muBad .Unlock ()
47
57
@@ -54,13 +64,7 @@ func (a *Args) writeBadProxy(proxy string) {
54
64
}
55
65
}
56
66
57
- func CheckProxiesAgainstCloudFlare (args * Args ) {
58
- // Rotate the proxies
59
- rp , err := RoundRobinProxySwitcher (args .ProxyList ... )
60
- if err != nil {
61
- log .Fatal (err )
62
- }
63
-
67
+ func NewCollector (args * Args ) * colly.Collector {
64
68
// Instantiate default collector
65
69
c := colly .NewCollector (
66
70
colly .Async (true ),
@@ -71,40 +75,55 @@ func CheckProxiesAgainstCloudFlare(args *Args) {
71
75
c .CacheDir = ""
72
76
73
77
c .WithTransport (& http.Transport {
74
- Proxy : rp ,
75
78
DisableKeepAlives : true ,
76
79
MaxIdleConns : 100 ,
77
80
MaxIdleConnsPerHost : 100 ,
78
81
})
79
82
80
- // Limit the maximum parallelism to 24
81
- err = c .Limit (& colly.LimitRule {DomainGlob : "*" , Parallelism : 24 })
82
-
83
- if err != nil {
84
- log .Printf ("error while doing c.Limit %s" , err )
85
- }
83
+ c .SetRequestTimeout (args .TimeoutProxy )
86
84
87
85
extensions .RandomUserAgent (c )
88
86
89
- if args .BadProxiesOutputFile != nil {
90
- c .OnError (func (response * colly.Response , err error ) {
91
- log .Printf ("[DEBUG] Bad proxy found error status %d [%s] [%s]" , response .StatusCode , response .Request .ProxyURL , err )
92
- args .writeBadProxy (response .Request .ProxyURL )
93
- })
87
+ return c
88
+ }
89
+
90
+ func CheckProxiesAgainstCloudFlare (args * Args ) {
91
+ var onErrorCallback = func (response * colly.Response , err error ) {
92
+ log .Printf ("[DEBUG] Bad proxy found error status %d [%s] [%s]" , response .StatusCode , response .Request .ProxyURL , err )
93
+ args .writeBadProxy (response .Request .ProxyURL )
94
94
}
95
95
96
- c . OnResponse ( func (response * colly.Response ) {
96
+ var onResponseCallback = func (response * colly.Response ) {
97
97
log .Printf ("[DEBUG] Good proxy found [%s]" , response .Request .ProxyURL )
98
98
args .writeGoodProxy (response .Request .ProxyURL )
99
- })
99
+ }
100
+
101
+ var collectors []* colly.Collector
100
102
101
103
for _ , proxy := range args .ProxyList {
102
104
log .Printf ("[DEBUG] Doing %s" , proxy )
103
- if err = c .Visit (args .WebsiteToCrawl ); err != nil {
104
- log .Printf ("Error happening doing Visit %s" , err )
105
+ newCollector := NewCollector (args )
106
+ newCollector .OnResponse (onResponseCallback )
107
+ newCollector .OnError (onErrorCallback )
108
+ u , err := url .Parse (proxy )
109
+ if err != nil {
110
+ log .Printf ("error while parseing proxy %s %s" , proxy , err )
111
+ newCollector = nil
112
+ continue
113
+ }
114
+ newCollector .SetProxyFunc (ProxyURL (u ))
115
+ if err = newCollector .Visit (args .WebsiteToCrawl ); err != nil {
116
+ log .Printf ("error happening doing Visit %s" , err )
117
+ newCollector = nil
118
+ continue
119
+
105
120
}
121
+ collectors = append (collectors , newCollector )
122
+ log .Printf ("[DEBUG] end Doing %v" , len (collectors ))
123
+ }
124
+
125
+ for _ , collec := range collectors {
126
+ collec .Wait ()
106
127
}
107
128
108
- // Wait until threads are finished
109
- c .Wait ()
110
129
}
0 commit comments