[pacman-dev] [RFC] Iterator interface for reading databases

Rémy Oudompheng remyoudompheng at gmail.com
Sat Jul 16 12:32:57 EDT 2011


Hello,

I am thinking an iterator-like interface for database reading could be
useful. The most obvious example use is the pkgfile functionality.
It could be a convenient tool to do one-pass work on databases
without loading them completely in memory, which can be impossible for
databases with files info (I don't think everyone can load the whole
[community] database using libalpm standard structures).

Here's a patch that roughly shows what I am thinking about: only the code
for the "local" backend is shown.

diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
index 6e1e4bc..986e9a1 100644
--- a/lib/libalpm/alpm.h
+++ b/lib/libalpm/alpm.h
@@ -119,6 +119,7 @@ typedef enum _alpm_sigstatus_t {
 
 typedef struct __alpm_handle_t alpm_handle_t;
 typedef struct __alpm_db_t alpm_db_t;
+typedef struct __alpm_dbreader_t alpm_dbreader_t;
 typedef struct __alpm_pkg_t alpm_pkg_t;
 typedef struct __alpm_trans_t alpm_trans_t;
 
@@ -371,6 +372,9 @@ int alpm_option_set_default_siglevel(alpm_handle_t *handle, alpm_siglevel_t leve
  * @{
  */
 
+alpm_dbreader_t *alpm_open_dbdir(alpm_handle_t *handle, const char *dbpath);
+alpm_dbreader_t *alpm_open_dbarchive(alpm_handle_t *handle, const char *dbpath);
+
 /** Get the database of locally installed packages.
  * The returned pointer points to an internal structure
  * of libalpm which should only be manipulated through
diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c
index 70f242d..74d6886 100644
--- a/lib/libalpm/be_local.c
+++ b/lib/libalpm/be_local.c
@@ -41,15 +41,22 @@
 #include "package.h"
 #include "deps.h"
 
-static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq);
+static int local_db_read(const char *path, alpm_pkg_t *info, alpm_dbinfrq_t inforeq);
 
 #define LAZY_LOAD(info, errret) \
 	do { \
 		if(!(pkg->infolevel & info)) { \
-			local_db_read(pkg, info); \
+			local_db_read(NULL, pkg, info); \
 		} \
 	} while(0)
 
+struct local_dbreader_t {
+	alpm_handle_t *handle;
+	alpm_pkg_t *(*getitem) (alpm_dbreader_t*);
+	DIR* dbdir;
+	const char *dbpath;
+	char finished;
+};
 
 /* Cache-specific accessor functions. These implementations allow for lazy
  * loading by the files backend when a data member is actually needed
@@ -235,7 +242,7 @@ static int _cache_changelog_close(const alpm_pkg_t UNUSED *pkg, void *fp)
 
 static int _cache_force_load(alpm_pkg_t *pkg)
 {
-	return local_db_read(pkg, INFRQ_ALL);
+	return local_db_read(NULL, pkg, INFRQ_ALL);
 }
 
 
@@ -376,9 +383,10 @@ static int local_db_populate(alpm_db_t *db)
 	size_t est_count;
 	int count = 0;
 	struct stat buf;
-	struct dirent *ent = NULL;
 	const char *dbpath;
 	DIR *dbdir;
+	alpm_dbreader_t *reader;
+	alpm_pkg_t *pkg;
 
 	dbpath = _alpm_db_path(db);
 	if(dbpath == NULL) {
@@ -408,7 +416,6 @@ static int local_db_populate(alpm_db_t *db)
 		while(readdir(dbdir) != NULL) {
 			est_count++;
 		}
-		rewinddir(dbdir);
 	}
 	if(est_count >= 2) {
 		/* subtract the two extra pointers to get # of children */
@@ -418,35 +425,13 @@ static int local_db_populate(alpm_db_t *db)
 	/* initialize hash at 50% full */
 	db->pkgcache = _alpm_pkghash_create(est_count * 2);
 	if(db->pkgcache == NULL){
-		closedir(dbdir);
 		RET_ERR(db->handle, ALPM_ERR_MEMORY, -1);
 	}
+	closedir(dbdir);
 
-	while((ent = readdir(dbdir)) != NULL) {
-		const char *name = ent->d_name;
-
-		alpm_pkg_t *pkg;
-
-		if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) {
-			continue;
-		}
-		if(!is_dir(dbpath, ent)) {
-			continue;
-		}
-
-		pkg = _alpm_pkg_new();
-		if(pkg == NULL) {
-			closedir(dbdir);
-			RET_ERR(db->handle, ALPM_ERR_MEMORY, -1);
-		}
-		/* split the db entry name */
-		if(_alpm_splitname(name, &(pkg->name), &(pkg->version),
-					&(pkg->name_hash)) != 0) {
-			_alpm_log(db->handle, ALPM_LOG_ERROR, _("invalid name for database entry '%s'\n"),
-					name);
-			_alpm_pkg_free(pkg);
-			continue;
-		}
+	reader = alpm_open_dbdir(db->handle, dbpath);
+	while((pkg = reader->getitem(reader))) {
+		pkg->origin_data.db = db;
 
 		/* duplicated database entries are not allowed */
 		if(_alpm_pkghash_find(db->pkgcache, pkg->name)) {
@@ -455,18 +440,6 @@ static int local_db_populate(alpm_db_t *db)
 			continue;
 		}
 
-		pkg->origin = PKG_FROM_LOCALDB;
-		pkg->origin_data.db = db;
-		pkg->ops = &local_pkg_ops;
-		pkg->handle = db->handle;
-
-		/* explicitly read with only 'BASE' data, accessors will handle the rest */
-		if(local_db_read(pkg, INFRQ_BASE) == -1) {
-			_alpm_log(db->handle, ALPM_LOG_ERROR, _("corrupted database entry '%s'\n"), name);
-			_alpm_pkg_free(pkg);
-			continue;
-		}
-
 		/* add to the collection */
 		_alpm_log(db->handle, ALPM_LOG_FUNCTION, "adding '%s' to package cache for db '%s'\n",
 				pkg->name, db->treename);
@@ -474,7 +447,6 @@ static int local_db_populate(alpm_db_t *db)
 		count++;
 	}
 
-	closedir(dbdir);
 	if(count > 0) {
 		db->pkgcache->list = alpm_list_msort(db->pkgcache->list, (size_t)count, _alpm_pkg_cmp);
 	}
@@ -516,13 +488,13 @@ static char *get_pkgpath(alpm_db_t *db, alpm_pkg_t *info)
 	f = alpm_list_add(f, linedup); \
 } while(1) /* note the while(1) and not (0) */
 
-static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq)
+static int local_db_read(const char* pkgpath, alpm_pkg_t *info, alpm_dbinfrq_t inforeq)
 {
 	FILE *fp = NULL;
 	char path[PATH_MAX];
 	char line[1024];
-	char *pkgpath = NULL;
-	alpm_db_t *db = info->origin_data.db;
+  alpm_db_t *db = info->origin_data.db;
+	const char *treename = db ? db->treename : "NULL";
 
 	/* bitmask logic here:
 	 * infolevel: 00001111
@@ -533,18 +505,20 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq)
 		/* already loaded all of this info, do nothing */
 		return 0;
 	}
-	_alpm_log(db->handle, ALPM_LOG_FUNCTION, "loading package data for %s : level=0x%x\n",
+	_alpm_log(info->handle, ALPM_LOG_FUNCTION, "loading package data for %s : level=0x%x\n",
 			info->name, inforeq);
 
 	/* clear out 'line', to be certain - and to make valgrind happy */
 	memset(line, 0, sizeof(line));
 
-	pkgpath = get_pkgpath(db, info);
+	if (pkgpath == NULL) {
+		pkgpath = get_pkgpath(db, info);
+	}
 
 	if(access(pkgpath, F_OK)) {
 		/* directory doesn't exist or can't be opened */
-		_alpm_log(db->handle, ALPM_LOG_DEBUG, "cannot find '%s-%s' in db '%s'\n",
-				info->name, info->version, db->treename);
+		_alpm_log(info->handle, ALPM_LOG_DEBUG, "cannot find '%s-%s' in db '%s'\n",
+				info->name, info->version, db ? treename : "NULL");
 		goto error;
 	}
 
@@ -552,7 +526,7 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq)
 	if(inforeq & INFRQ_DESC && !(info->infolevel & INFRQ_DESC)) {
 		snprintf(path, PATH_MAX, "%sdesc", pkgpath);
 		if((fp = fopen(path, "r")) == NULL) {
-			_alpm_log(db->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno));
+			_alpm_log(info->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno));
 			goto error;
 		}
 		while(!feof(fp)) {
@@ -560,14 +534,14 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq)
 			if(strcmp(line, "%NAME%") == 0) {
 				READ_NEXT();
 				if(strcmp(line, info->name) != 0) {
-					_alpm_log(db->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: name "
-								"mismatch on package %s\n"), db->treename, info->name);
+					_alpm_log(info->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: name "
+								"mismatch on package %s\n"), treename, info->name);
 				}
 			} else if(strcmp(line, "%VERSION%") == 0) {
 				READ_NEXT();
 				if(strcmp(line, info->version) != 0) {
-					_alpm_log(db->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: version "
-								"mismatch on package %s\n"), db->treename, info->name);
+					_alpm_log(info->handle, ALPM_LOG_ERROR, _("%s database is inconsistent: version "
+								"mismatch on package %s\n"), treename, info->name);
 				}
 			} else if(strcmp(line, "%DESC%") == 0) {
 				READ_AND_STORE(info->desc);
@@ -625,7 +599,7 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq)
 	if(inforeq & INFRQ_FILES && !(info->infolevel & INFRQ_FILES)) {
 		snprintf(path, PATH_MAX, "%sfiles", pkgpath);
 		if((fp = fopen(path, "r")) == NULL) {
-			_alpm_log(db->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno));
+			_alpm_log(info->handle, ALPM_LOG_ERROR, _("could not open file %s: %s\n"), path, strerror(errno));
 			goto error;
 		}
 		while(fgets(line, sizeof(line), fp)) {
@@ -664,11 +638,9 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq)
 	/* internal */
 	info->infolevel |= inforeq;
 
-	free(pkgpath);
 	return 0;
 
 error:
-	free(pkgpath);
 	if(fp) {
 		fclose(fp);
 	}
@@ -913,4 +885,96 @@ alpm_db_t *_alpm_db_register_local(alpm_handle_t *handle)
 	return db;
 }
 
+static alpm_pkg_t *local_dbreader_getitem(struct local_dbreader_t *reader) {
+	struct dirent *ent = NULL;
+	reader->handle->pm_errno = 0;
+	if (reader->finished) {
+		/* iteration has already finished */
+		RET_ERR(reader->handle, ALPM_ERR_WRONG_ARGS, NULL);
+	}
+	if (reader->dbdir == NULL) {
+		reader->finished = 1;
+		return NULL;
+	}
+
+	while(1) {
+		ent = readdir(reader->dbdir);
+		if (ent == NULL) {
+			closedir(reader->dbdir);
+			reader->finished = 1;
+			return NULL;
+		}
+		const char *name = ent->d_name;
+		char fullpath[PATH_MAX];
+		snprintf(fullpath, PATH_MAX, "%s/%s", reader->dbpath, name);
+
+		alpm_pkg_t *pkg;
+
+		if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) {
+			continue;
+		}
+		if(!is_dir(reader->dbpath, ent)) {
+			continue;
+		}
+
+		pkg = _alpm_pkg_new();
+		if(pkg == NULL) {
+			closedir(reader->dbdir);
+			reader->finished = 1;
+			RET_ERR(reader->handle, ALPM_ERR_MEMORY, NULL);
+		}
+		pkg->handle = reader->handle;
+		/* split the db entry name */
+		if(_alpm_splitname(name, &(pkg->name), &(pkg->version),
+					&(pkg->name_hash)) != 0) {
+			_alpm_log(reader->handle, ALPM_LOG_ERROR, _("invalid name for database entry '%s'\n"),
+					name);
+			_alpm_pkg_free(pkg);
+			continue;
+		}
+
+		pkg->origin = PKG_FROM_LOCALDB;
+		pkg->origin_data.db = NULL;
+		pkg->ops = &local_pkg_ops;
+
+		/* explicitly read with only 'BASE' data, accessors will handle the rest */
+		if(local_db_read(fullpath, pkg, INFRQ_BASE) == -1) {
+			_alpm_log(reader->handle, ALPM_LOG_ERROR, _("corrupted database entry '%s'\n"), name);
+			_alpm_pkg_free(pkg);
+			continue;
+		}
+		return pkg;
+	}
+}
+
+alpm_dbreader_t SYMEXPORT *alpm_open_dbdir(alpm_handle_t *handle, const char *dbpath) {
+	struct local_dbreader_t *reader = malloc(sizeof(struct local_dbreader_t));
+	DIR *dbdir;
+	if(handle == NULL) {
+		return NULL;
+	}
+	if(reader == NULL) {
+		RET_ERR(handle, ALPM_ERR_MEMORY, NULL);
+	}
+	if(dbpath == NULL) {
+		RET_ERR(handle, ALPM_ERR_WRONG_ARGS, NULL);
+	}
+
+	/* start reading */
+	dbdir = opendir(dbpath);
+	if(dbdir == NULL) {
+		if(errno != ENOENT) {
+			/* no database existing yet is not an error */
+			free(reader);
+			return NULL;
+		}
+	}
+	reader->getitem = &local_dbreader_getitem;
+	reader->handle = handle;
+	reader->dbdir = dbdir;
+	reader->dbpath = dbpath;
+	reader->finished = 0;
+	return (alpm_dbreader_t*)reader;
+}
+
 /* vim: set ts=2 sw=2 noet: */
diff --git a/lib/libalpm/db.h b/lib/libalpm/db.h
index a950130..3ce5889 100644
--- a/lib/libalpm/db.h
+++ b/lib/libalpm/db.h
@@ -56,6 +56,11 @@ struct db_operations {
 	void (*unregister) (alpm_db_t *);
 };
 
+struct __alpm_dbreader_t {
+	alpm_handle_t *handle;
+	alpm_pkg_t *(*getitem) (alpm_dbreader_t*);
+};
+
 /* Database */
 struct __alpm_db_t {
 	alpm_handle_t *handle;

-- 
Rémy.


More information about the pacman-dev mailing list