22package scraper
33
44import (
5+ "errors"
56 "fmt"
67 "net/http"
78 "net/url"
@@ -18,9 +19,11 @@ const (
1819
1920// AnimefireClient handles interactions with Animefire.plus
2021type AnimefireClient struct {
21- client * http.Client
22- baseURL string
23- userAgent string
22+ client * http.Client
23+ baseURL string
24+ userAgent string
25+ maxRetries int
26+ retryDelay time.Duration
2427}
2528
2629// NewAnimefireClient creates a new Animefire client
@@ -29,84 +32,164 @@ func NewAnimefireClient() *AnimefireClient {
2932 client : & http.Client {
3033 Timeout : 30 * time .Second ,
3134 },
32- baseURL : AnimefireBase ,
33- userAgent : UserAgent ,
35+ baseURL : AnimefireBase ,
36+ userAgent : UserAgent ,
37+ maxRetries : 2 ,
38+ retryDelay : 350 * time .Millisecond ,
3439 }
3540}
3641
3742// SearchAnime searches for anime on Animefire.plus using the original logic
3843func (c * AnimefireClient ) SearchAnime (query string ) ([]* models.Anime , error ) {
39- searchURL := fmt .Sprintf ("%s/pesquisar/%s" , c .baseURL , url .QueryEscape (query ))
44+ searchURL := fmt .Sprintf ("%s/pesquisar/%s" , c .baseURL , url .PathEscape (query ))
4045
41- req , err := http .NewRequest ("GET" , searchURL , nil )
42- if err != nil {
43- return nil , fmt .Errorf ("failed to create request: %w" , err )
46+ var lastErr error
47+ attempts := c .maxRetries + 1
48+
49+ for attempt := 0 ; attempt < attempts ; attempt ++ {
50+ req , err := http .NewRequest ("GET" , searchURL , nil )
51+ if err != nil {
52+ return nil , fmt .Errorf ("failed to create request: %w" , err )
53+ }
54+
55+ c .decorateRequest (req )
56+
57+ resp , err := c .client .Do (req )
58+ if err != nil {
59+ lastErr = fmt .Errorf ("failed to make request: %w" , err )
60+ if c .shouldRetry (attempt ) {
61+ c .sleep ()
62+ continue
63+ }
64+ return nil , lastErr
65+ }
66+
67+ if resp .StatusCode != http .StatusOK {
68+ lastErr = c .handleStatusError (resp )
69+ _ = resp .Body .Close ()
70+ if c .shouldRetry (attempt ) {
71+ c .sleep ()
72+ continue
73+ }
74+ return nil , lastErr
75+ }
76+
77+ doc , err := goquery .NewDocumentFromReader (resp .Body )
78+ _ = resp .Body .Close ()
79+ if err != nil {
80+ lastErr = fmt .Errorf ("failed to parse HTML: %w" , err )
81+ if c .shouldRetry (attempt ) {
82+ c .sleep ()
83+ continue
84+ }
85+ return nil , lastErr
86+ }
87+
88+ if c .isChallengePage (doc ) {
89+ lastErr = errors .New ("animefire returned a challenge page (try VPN or wait)" )
90+ if c .shouldRetry (attempt ) {
91+ c .sleep ()
92+ continue
93+ }
94+ return nil , lastErr
95+ }
96+
97+ animes := c .extractSearchResults (doc )
98+ if len (animes ) == 0 {
99+ // Legitimate empty result set – return without error
100+ return []* models.Anime {}, nil
101+ }
102+
103+ return animes , nil
104+ }
105+
106+ if lastErr != nil {
107+ return nil , lastErr
44108 }
109+ return nil , errors .New ("failed to retrieve results from AnimeFire" )
110+ }
45111
112+ func (c * AnimefireClient ) decorateRequest (req * http.Request ) {
46113 req .Header .Set ("User-Agent" , c .userAgent )
114+ req .Header .Set ("Accept" , "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" )
115+ req .Header .Set ("Accept-Language" , "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7" )
116+ req .Header .Set ("Cache-Control" , "no-cache" )
117+ req .Header .Set ("Pragma" , "no-cache" )
118+ req .Header .Set ("Referer" , c .baseURL + "/" )
119+ }
47120
48- resp , err := c . client . Do ( req )
49- if err != nil {
50- return nil , fmt .Errorf ("failed to make request: %w" , err )
121+ func ( c * AnimefireClient ) handleStatusError ( resp * http. Response ) error {
122+ if resp . StatusCode == http . StatusForbidden {
123+ return fmt .Errorf ("access restricted: VPN may be required" )
51124 }
52- defer func () { _ = resp .Body .Close () }()
125+ return fmt .Errorf ("server returned: %s" , resp .Status )
126+ }
53127
54- if resp .StatusCode != http .StatusOK {
55- if resp .StatusCode == http .StatusForbidden {
56- return nil , fmt .Errorf ("access restricted: VPN may be required" )
57- }
58- return nil , fmt .Errorf ("server returned: %s" , resp .Status )
128+ func (c * AnimefireClient ) shouldRetry (attempt int ) bool {
129+ return attempt < c .maxRetries
130+ }
131+
132+ func (c * AnimefireClient ) sleep () {
133+ if c .retryDelay <= 0 {
134+ return
135+ }
136+ time .Sleep (c .retryDelay )
137+ }
138+
139+ func (c * AnimefireClient ) isChallengePage (doc * goquery.Document ) bool {
140+ title := strings .ToLower (strings .TrimSpace (doc .Find ("title" ).First ().Text ()))
141+ if strings .Contains (title , "just a moment" ) {
142+ return true
59143 }
60144
61- doc , err := goquery .NewDocumentFromReader (resp .Body )
62- if err != nil {
63- return nil , fmt .Errorf ("failed to parse HTML: %w" , err )
145+ if doc .Find ("#cf-wrapper" ).Length () > 0 || doc .Find ("#challenge-form" ).Length () > 0 {
146+ return true
64147 }
65148
149+ body := strings .ToLower (doc .Text ())
150+ return strings .Contains (body , "cf-error" ) || strings .Contains (body , "cloudflare" )
151+ }
152+
153+ func (c * AnimefireClient ) extractSearchResults (doc * goquery.Document ) []* models.Anime {
66154 var animes []* models.Anime
67155
68- // Use the same parsing logic as the original system
69156 doc .Find (".row.ml-1.mr-1 a" ).Each (func (i int , s * goquery.Selection ) {
70157 if urlPath , exists := s .Attr ("href" ); exists {
71158 name := strings .TrimSpace (s .Text ())
72159 if name != "" {
73- fullURL := c .resolveURL (c .baseURL , urlPath )
74- anime := & models.Anime {
160+ animes = append (animes , & models.Anime {
75161 Name : name ,
76- URL : fullURL ,
77- }
78- animes = append (animes , anime )
162+ URL : c .resolveURL (c .baseURL , urlPath ),
163+ })
79164 }
80165 }
81166 })
82167
83- // If no results with the primary selector, try the card-based selector as fallback
84- if len (animes ) == 0 {
85- doc .Find (".card_ani" ).Each (func (i int , s * goquery.Selection ) {
86- titleElem := s .Find (".ani_name a" )
87- title := strings .TrimSpace (titleElem .Text ())
88- link , exists := titleElem .Attr ("href" )
89-
90- if exists && title != "" {
91- // Get image URL
92- imgElem := s .Find (".div_img img" )
93- imgURL , _ := imgElem .Attr ("src" )
94- if imgURL != "" {
95- imgURL = c .resolveURL (c .baseURL , imgURL )
96- }
97-
98- anime := & models.Anime {
99- Name : title ,
100- URL : c .resolveURL (c .baseURL , link ),
101- ImageURL : imgURL ,
102- }
103-
104- animes = append (animes , anime )
105- }
106- })
168+ if len (animes ) > 0 {
169+ return animes
107170 }
108171
109- return animes , nil
172+ doc .Find (".card_ani" ).Each (func (i int , s * goquery.Selection ) {
173+ titleElem := s .Find (".ani_name a" )
174+ title := strings .TrimSpace (titleElem .Text ())
175+ link , exists := titleElem .Attr ("href" )
176+
177+ if exists && title != "" {
178+ imgElem := s .Find (".div_img img" )
179+ imgURL , _ := imgElem .Attr ("src" )
180+ if imgURL != "" {
181+ imgURL = c .resolveURL (c .baseURL , imgURL )
182+ }
183+
184+ animes = append (animes , & models.Anime {
185+ Name : title ,
186+ URL : c .resolveURL (c .baseURL , link ),
187+ ImageURL : imgURL ,
188+ })
189+ }
190+ })
191+
192+ return animes
110193}
111194
112195// resolveURL resolves relative URLs to absolute URLs
0 commit comments