[pacman-dev] [PATCH 1/4] dload: handle irregular URLs

Dave Reisner d at falconindy.com
Sun Jul 3 19:04:06 EDT 2011


On Sun, Jul 03, 2011 at 01:56:05PM -0500, Dan McGee wrote:
> On Fri, Jul 1, 2011 at 7:59 AM, Dave Reisner <d at falconindy.com> wrote:
> > From: Dave Reisner <d at falconindy.com>
> >
> > URLs might end with a slash and follow redirects, or could be a
> > generated by a script such as /getpkg.php?id=12345. In both cases, we
> > may have a better filename that we can write to, taken from either
> > content-disposition header, or the effective URL.
> >
> > Specific to the first case, we write to a temporary file of the format
> > 'alpmtmp.XXXXXX', where XXXXXX is randomized by mkstemp(3). Since this
> > is a randomly generated file, we cannot support resuming and the file is
> > unlinked in the event of an interrupt.
> >
> > We also run into the possibility of changing out the filename from under
> > alpm on a -U operation, so callers of _alpm_download can optionally pass
> > a pointer to a *char to be filled in by curl_download_internal with the
> > actual filename we wrote to. Any sync operation will pass a NULL pointer
> > here, as we rely on specific names for packages from a mirror.
> >
> > Fixes FS#22645.
> >
> > Signed-off-by: Dave Reisner <d at falconindy.com>
> > ---
> >  lib/libalpm/be_sync.c |    4 +-
> >  lib/libalpm/dload.c   |  129 +++++++++++++++++++++++++++++++++++++++++--------
> >  lib/libalpm/dload.h   |    3 +-
> >  lib/libalpm/sync.c    |    2 +-
> >  4 files changed, 114 insertions(+), 24 deletions(-)
> >
> > diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
> > index ce418d5..16351f8 100644
> > --- a/lib/libalpm/be_sync.c
> > +++ b/lib/libalpm/be_sync.c
> > @@ -184,7 +184,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
> >                CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> >                snprintf(fileurl, len, "%s/%s.db", server, db->treename);
> >
> > -               ret = _alpm_download(handle, fileurl, syncpath, force, 0, 0);
> > +               ret = _alpm_download(handle, fileurl, syncpath, NULL, force, 0, 0);
> >
> >                if(ret == 0 && (check_sig == PM_PGP_VERIFY_ALWAYS ||
> >                                        check_sig == PM_PGP_VERIFY_OPTIONAL)) {
> > @@ -201,7 +201,7 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db)
> >                        /* if we downloaded a DB, we want the .sig from the same server */
> >                        snprintf(fileurl, len, "%s/%s.db.sig", server, db->treename);
> >
> > -                       sig_ret = _alpm_download(handle, fileurl, syncpath, 1, 0, errors_ok);
> > +                       sig_ret = _alpm_download(handle, fileurl, syncpath, NULL, 1, 0, errors_ok);
> >                        /* errors_ok suppresses error messages, but not the return code */
> >                        sig_ret = errors_ok ? 0 : sig_ret;
> >                }
> > diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> > index fd83aac..eefa285 100644
> > --- a/lib/libalpm/dload.c
> > +++ b/lib/libalpm/dload.c
> > @@ -148,16 +148,46 @@ static int utimes_long(const char *path, long seconds)
> >        return 0;
> >  }
> >
> > +static size_t parse_headers(void *ptr, size_t size, size_t nmemb, void *user)
> > +{
> > +       size_t realsize = size * nmemb;
> > +       const char *fptr, *endptr = NULL;
> > +       const char * const cd_header = "Content-Disposition:";
> > +       const char * const fn_key = "filename=";
> > +       struct fileinfo **dlfile = (struct fileinfo**)user;
> > +
> > +       if(strncasecmp(cd_header, ptr, strlen(cd_header)) == 0) {
> Locale issues come into play? This string has an 'i'. If cURL
> normalizes headers one way or the other we should probably do a direct
> compare, but the strncasemp() isn't explicit regarding locale or
> anything, so it may just map lower/upper ASCII to the opposite ASCII
> and that is it.
> 

Fixed by borrowing some of curl's rawstr.c and adding it to our sources.

> > +               if((fptr = strstr(ptr, fn_key))) {
> > +                       fptr += strlen(fn_key);
> > +
> > +                       /* find the end of the field, which is either a semi-colon, or the end of
> > +                        * the data. As per curl_easy_setopt(3), we cannot count on headers being
> > +                        * null terminated, so we look for the closing \r\n */
> > +                       endptr = fptr + strcspn(fptr, ";\r\n") - 1;
> > +
> > +                       /* remove quotes */
> > +                       if(*fptr == '"' && *endptr == '"') {
> > +                               fptr++;
> > +                               endptr--;
> > +                       }
> > +
> > +                       STRNDUP((*dlfile)->cd_filename, fptr, endptr - fptr + 1,
> > +                                       RET_ERR((*dlfile)->handle, PM_ERR_MEMORY, realsize));
> > +               }
> > +       }
> >
> > -static int curl_download_internal(alpm_handle_t *handle,
> > -               const char *url, const char *localpath,
> > -               int force, int allow_resume, int errors_ok)
> > +       return realsize;
> > +}
> > +
> > +static int curl_download_internal(alpm_handle_t *handle, const char *url,
> > +               const char *localpath, char **final_file, int force, int allow_resume,
> > +               int errors_ok)
> I believe this hideous function signature gets addressed later, so I
> won't worry about it now.
> 
> >  {
> > -       int ret = -1;
> > +       int ret = -1, should_unlink = 0;
> >        FILE *localf = NULL;
> >        const char *useragent;
> >        const char *open_mode = "wb";
> > -       char *destfile, *tempfile;
> > +       char *destfile = NULL, *tempfile = NULL, *effective_url;
> >        /* RFC1123 states applications should support this length */
> >        char hostname[256];
> >        char error_buffer[CURL_ERROR_SIZE];
> > @@ -170,15 +200,39 @@ static int curl_download_internal(alpm_handle_t *handle,
> >        dlfile.handle = handle;
> >        dlfile.initial_size = 0.0;
> >        dlfile.filename = get_filename(url);
> > +       dlfile.cd_filename = NULL;
> >        if(!dlfile.filename || curl_gethost(url, hostname) != 0) {
> >                _alpm_log(handle, PM_LOG_ERROR, _("url '%s' is invalid\n"), url);
> >                RET_ERR(handle, PM_ERR_SERVER_BAD_URL, -1);
> >        }
> >
> > -       destfile = get_fullpath(localpath, dlfile.filename, "");
> > -       tempfile = get_fullpath(localpath, dlfile.filename, ".part");
> > -       if(!destfile || !tempfile) {
> > -               goto cleanup;
> > +       if(strlen(dlfile.filename) > 0 && strcmp(dlfile.filename, ".sig") != 0) {
> > +               destfile = get_fullpath(localpath, dlfile.filename, "");
> > +               tempfile = get_fullpath(localpath, dlfile.filename, ".part");
> > +               if(!destfile || !tempfile) {
> > +                       goto cleanup;
> > +               }
> > +       } else {
> > +               /* URL isn't to a file and ended with a slash */
> > +               int fd;
> > +               char randpath[PATH_MAX];
> > +
> > +               /* we can't support resuming this kind of download, so a partial transfer
> > +                * will be destroyed */
> > +               should_unlink = 1;
> > +
> > +               /* create a random filename, which is opened with O_EXCL */
> > +               snprintf(randpath, PATH_MAX, "%salpmtmp.XXXXXX", localpath);
> > +               if((fd = mkstemp(randpath)) == -1 || !(localf = fdopen(fd, open_mode))) {
> > +                       unlink(randpath);
> > +                       close(fd);
> > +                       _alpm_log(handle, PM_LOG_ERROR,
> > +                                       _("failed to create temporary file for download\n"));
> > +                       goto cleanup;
> > +               }
> > +               /* localf now points to our alpmtmp.XXXXXX */
> > +               STRDUP(tempfile, randpath, RET_ERR(handle, PM_ERR_MEMORY, -1));
> > +               dlfile.filename = strrchr(randpath, '/') + 1;
> >        }
> >
> >        error_buffer[0] = '\0';
> > @@ -197,6 +251,8 @@ static int curl_download_internal(alpm_handle_t *handle,
> >        curl_easy_setopt(handle->curl, CURLOPT_PROGRESSDATA, (void *)&dlfile);
> >        curl_easy_setopt(handle->curl, CURLOPT_LOW_SPEED_LIMIT, 1024L);
> >        curl_easy_setopt(handle->curl, CURLOPT_LOW_SPEED_TIME, 10L);
> > +       curl_easy_setopt(handle->curl, CURLOPT_HEADERFUNCTION, parse_headers);
> > +       curl_easy_setopt(handle->curl, CURLOPT_WRITEHEADER, &dlfile);
> >
> >        useragent = getenv("HTTP_USER_AGENT");
> >        if(useragent != NULL) {
> > @@ -215,9 +271,11 @@ static int curl_download_internal(alpm_handle_t *handle,
> >                dlfile.initial_size = (double)st.st_size;
> >        }
> >
> > -       localf = fopen(tempfile, open_mode);
> >        if(localf == NULL) {
> > -               goto cleanup;
> > +               localf = fopen(tempfile, open_mode);
> > +               if(localf == NULL) {
> > +                       goto cleanup;
> > +               }
> >        }
> >
> >        curl_easy_setopt(handle->curl, CURLOPT_WRITEDATA, localf);
> > @@ -264,6 +322,7 @@ static int curl_download_internal(alpm_handle_t *handle,
> >        curl_easy_getinfo(handle->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &remote_size);
> >        curl_easy_getinfo(handle->curl, CURLINFO_SIZE_DOWNLOAD, &bytes_dl);
> >        curl_easy_getinfo(handle->curl, CURLINFO_CONDITION_UNMET, &timecond);
> > +       curl_easy_getinfo(handle->curl, CURLINFO_EFFECTIVE_URL, &effective_url);
> >
> >        /* time condition was met and we didn't download anything. we need to
> >         * clean up the 0 byte .part file that's left behind. */
> > @@ -284,6 +343,26 @@ static int curl_download_internal(alpm_handle_t *handle,
> >                goto cleanup;
> >        }
> >
> > +       if(dlfile.cd_filename) {
> > +               /* content-disposition header has a better name for our file */
> > +               free(destfile);
> > +               destfile = get_fullpath(localpath, dlfile.cd_filename, "");
> > +       } else {
> > +               const char *effective_filename = strrchr(effective_url, '/');
> > +               if(effective_filename) {
> > +                       effective_filename++;
> > +
> > +                       /* if destfile was never set, we wrote to a tempfile. even if destfile is
> > +                        * set, we may have followed some redirects and the effective url may
> > +                        * have a better suggestion as to what to name our file. in either case,
> > +                        * refactor destfile to this newly derived name. */
> > +                       if(!destfile || strcmp(effective_filename, strrchr(destfile, '/') + 1) != 0) {
> Any chance of strrchr returning NULL here?
> 

Not as far as I can tell. destfile includes localpath, which is our
CacheDir. It'd have to be pretty fucked up not to include a /. Agree?

> > +                               free(destfile);
> > +                               destfile = get_fullpath(localpath, effective_filename, "");
> > +                       }
> > +               }
> > +       }
> > +
> >        ret = 0;
> >
> >  cleanup:
> > @@ -294,10 +373,18 @@ cleanup:
> >
> >        if(ret == 0) {
> >                rename(tempfile, destfile);
> > +               if(final_file) {
> > +                       *final_file = strdup(strrchr(destfile, '/') + 1);
> Same concern as above with strrchr(). STRDUP would be nice too.
> 

lack of STRDUP is laziness. Same as above wrt strrchr and NULL.

> > +               }
> > +       }
> > +
> > +       if(dload_interrupted && should_unlink) {
> > +               unlink(tempfile);
> >        }
> >
> >        FREE(tempfile);
> >        FREE(destfile);
> > +       FREE(dlfile.cd_filename);
> >
> >        /* restore the old signal handlers */
> >        sigaction(SIGINT, &sig_int[OLD], NULL);
> > @@ -316,18 +403,19 @@ cleanup:
> >  * @param handle the context handle
> >  * @param url the file's URL
> >  * @param localpath the directory to save the file in
> > + * @param final_file the real name of the downloaded file (may be NULL)
> >  * @param force force download even if there is an up-to-date local copy
> >  * @param allow_resume allow a partial download to be resumed
> >  * @param errors_ok do not log errors (but still return them)
> >  * @return 0 on success, -1 on error (pm_errno is set accordingly if errors_ok == 0)
> >  */
> >  int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath,
> > -               int force, int allow_resume, int errors_ok)
> > +               char **final_file, int force, int allow_resume, int errors_ok)
> >  {
> >        if(handle->fetchcb == NULL) {
> >  #ifdef HAVE_LIBCURL
> > -               return curl_download_internal(handle, url, localpath,
> > -                               force, allow_resume, errors_ok);
> > +               return curl_download_internal(handle, url, localpath, final_file, force,
> > +                               allow_resume, errors_ok);
> >  #else
> >                RET_ERR(handle, PM_ERR_EXTERNAL_DOWNLOAD, -1);
> >  #endif
> > @@ -344,18 +432,17 @@ int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath
> >  char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
> >  {
> >        char *filepath;
> > -       const char *filename, *cachedir;
> > +       const char *cachedir;
> > +       char *final_file = NULL;
> >        int ret;
> >
> >        CHECK_HANDLE(handle, return NULL);
> >
> > -       filename = get_filename(url);
> > -
> >        /* find a valid cache dir to download to */
> >        cachedir = _alpm_filecache_setup(handle);
> >
> >        /* download the file */
> > -       ret = _alpm_download(handle, url, cachedir, 0, 1, 0);
> > +       ret = _alpm_download(handle, url, cachedir, &final_file, 0, 1, 0);
> >        if(ret == -1) {
> >                _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), url);
> >                return NULL;
> > @@ -373,7 +460,7 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
> >                CALLOC(sig_url, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, NULL));
> >                snprintf(sig_url, len, "%s.sig", url);
> >
> > -               ret = _alpm_download(handle, sig_url, cachedir, 1, 0, errors_ok);
> > +               ret = _alpm_download(handle, sig_url, cachedir, &final_file, 1, 0, errors_ok);
> >                if(ret == -1 && !errors_ok) {
> >                        _alpm_log(handle, PM_LOG_WARNING, _("failed to download %s\n"), sig_url);
> >                        /* Warn now, but don't return NULL. We will fail later during package
> > @@ -385,7 +472,9 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t *handle, const char *url)
> >        }
> >
> >        /* we should be able to find the file the second time around */
> > -       filepath = _alpm_filecache_find(handle, filename);
> > +       filepath = _alpm_filecache_find(handle, final_file);
> > +       FREE(final_file);
> > +
> >        return filepath;
> >  }
> >
> > diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h
> > index 0cdd900..351b2b3 100644
> > --- a/lib/libalpm/dload.h
> > +++ b/lib/libalpm/dload.h
> > @@ -29,11 +29,12 @@
> >  struct fileinfo {
> >        alpm_handle_t *handle;
> >        const char *filename;
> > +       char *cd_filename;
> >        double initial_size;
> >  };
> >
> >  int _alpm_download(alpm_handle_t *handle, const char *url, const char *localpath,
> > -               int force, int allow_resume, int errors_ok);
> > +               char **final_file, int force, int allow_resume, int errors_ok);
> >
> >  #endif /* _ALPM_DLOAD_H */
> >
> > diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> > index 8f03840..8df8a17 100644
> > --- a/lib/libalpm/sync.c
> > +++ b/lib/libalpm/sync.c
> > @@ -797,7 +797,7 @@ static int download_files(alpm_handle_t *handle, alpm_list_t **deltas)
> >                                        CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1));
> >                                        snprintf(fileurl, len, "%s/%s", server_url, filename);
> >
> > -                                       ret = _alpm_download(handle, fileurl, cachedir, 0, 1, 0);
> > +                                       ret = _alpm_download(handle, fileurl, cachedir, NULL, 0, 1, 0);
> >                                        FREE(fileurl);
> >                                        if(ret != -1) {
> >                                                break;
> > --
> > 1.7.6
> 


More information about the pacman-dev mailing list