diff --git a/colly.go b/colly.go index ea9a2f321..5b1082446 100644 --- a/colly.go +++ b/colly.go @@ -660,12 +660,15 @@ func (c *Collector) fetch(u, method string, depth int, requestData io.Reader, ct hTrace = &HTTPTrace{} req = hTrace.WithTrace(req) } - checkHeadersFunc := func(statusCode int, headers http.Header) bool { + origURL := req.URL + checkHeadersFunc := func(req *http.Request, statusCode int, headers http.Header) bool { + if req.URL != origURL { + request.URL = req.URL + request.Headers = &req.Header + } c.handleOnResponseHeaders(&Response{Ctx: ctx, Request: request, StatusCode: statusCode, Headers: &headers}) return !request.abort } - - origURL := req.URL response, err := c.backend.Cache(req, c.MaxBodySize, checkHeadersFunc, c.CacheDir) if proxyURL, ok := req.Context().Value(ProxyURLKey).(string); ok { request.ProxyURL = proxyURL @@ -673,10 +676,6 @@ func (c *Collector) fetch(u, method string, depth int, requestData io.Reader, ct if err := c.handleOnError(response, err, request, ctx); err != nil { return err } - if req.URL != origURL { - request.URL = req.URL - request.Headers = &req.Header - } atomic.AddUint32(&c.responseCount, 1) response.Ctx = ctx response.Request = request diff --git a/colly_test.go b/colly_test.go index bb3f3305f..9b1ce5863 100644 --- a/colly_test.go +++ b/colly_test.go @@ -778,9 +778,16 @@ func TestRedirect(t *testing.T) { t.Error("Invalid URL after redirect: " + u) } }) + + c.OnResponseHeaders(func(r *Response) { + if !strings.HasSuffix(r.Request.URL.String(), "/redirected/") { + t.Error("Invalid URL in Request after redirect (OnResponseHeaders): " + r.Request.URL.String()) + } + }) + c.OnResponse(func(r *Response) { if !strings.HasSuffix(r.Request.URL.String(), "/redirected/") { - t.Error("Invalid URL in Request after redirect: " + r.Request.URL.String()) + t.Error("Invalid URL in Request after redirect (OnResponse): " + r.Request.URL.String()) } }) c.Visit(ts.URL + "/redirect") diff --git a/http_backend.go b/http_backend.go index fe96c9a6d..26a847986 100644 --- a/http_backend.go +++ b/http_backend.go @@ -40,7 +40,7 @@ type httpBackend struct { lock *sync.RWMutex } -type checkHeadersFunc func(statusCode int, header http.Header) bool +type checkHeadersFunc func(req *http.Request, statusCode int, header http.Header) bool // LimitRule provides connection restrictions for domains. // Both DomainRegexp and DomainGlob can be used to specify @@ -188,7 +188,7 @@ func (h *httpBackend) Do(request *http.Request, bodySize int, checkHeadersFunc c if res.Request != nil { *request = *res.Request } - if !checkHeadersFunc(res.StatusCode, res.Header) { + if !checkHeadersFunc(request, res.StatusCode, res.Header) { // closing res.Body (see defer above) without reading it aborts // the download return nil, ErrAbortedAfterHeaders