[arch-general] patch integrity check to compare tree to dbs - was: Integrity Check i686 of core, extra 28-08-2009

Henning Garus henning.garus at googlemail.com
Tue Sep 1 18:28:33 EDT 2009


On Sun, Aug 30, 2009 at 04:57:52PM +0200, Henning Garus wrote:
> On Sun, Aug 30, 2009 at 01:18:32PM +0200, Xavier wrote:
> > On Sun, Aug 30, 2009 at 12:56 PM, Henning
> > Garus<henning.garus at googlemail.com> wrote:
> > > On Sun, Aug 30, 2009 at 01:56:23AM +0200, Xavier wrote:
> > >>
> > >> Great, thanks! It indeed found all the problems I had noticed, and much more.
> > >>
> > >> It would be nice if this script could be automatically run as well,
> > >> once per week or so.
> > >>
> > >> Can you share the script used? Then we need to figure out if it can be
> > >> run in the same place than the other script.
> > >
> > > Since my script is largely based on check_packages.py that should be fairly
> > > straightforward. In fact my script expects parse_pkgbuilds.sh in the same
> > > directory.
> > >
> > > I have uploaded the script to codepad: http://codepad.org/tSmNwYNI
> > >
> > 
> > I see. Then I am not sure whether we want to keep this check separate
> > or just include it in check_packages.py
> 
> I kept it separate, because it deals with DBs and the ABS tree, while
> check_packages.py deals with the ABS tree only. On the other hand, integrating
> it should speed things up a bit (you run parse_pkgbuilds.sh only once) and we
> get rid of some duplicated code. On the downside the output can be quite long
> with activated --vercmp, But I am not sure if that is even useful.
> 
> Somehow integrating feels like the better idea, I will look into it.

Here it is. seems a bit shorter this way. I also changed the handling of the
any arch. Checking any alone does not seem very useful, so I allowed multiple
abs roots to be specified.
-------------- next part --------------
>From 9f1d948bf3bd61f45e737c2b67cb4ae28cae9184 Mon Sep 17 00:00:00 2001
From: Henning Garus <henning.garus at gmail.com>
Date: Tue, 1 Sep 2009 23:54:47 +0200
Subject: [PATCH 1/2] check_packages.py: Allow multiple abs-trees

By parsing multiple abs trees we can add any when parsing the other trees,
checking any standalone doesn't make much sense.

Signed-off-by: Henning Garus <henning.garus at gmail.com>
---
 cron-jobs/check_archlinux/check_packages.py |   32 ++++++++++++++------------
 cron-jobs/integrity-check                   |    2 +-
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/cron-jobs/check_archlinux/check_packages.py b/cron-jobs/check_archlinux/check_packages.py
index f2a9601..e4798a0 100755
--- a/cron-jobs/check_archlinux/check_packages.py
+++ b/cron-jobs/check_archlinux/check_packages.py
@@ -51,10 +51,11 @@ class Depend:
 		self.mod = mod
 
 def parse_pkgbuilds(repos,arch):
-	for repo in repos:
-		data = commands.getoutput(os.path.dirname(sys.argv[0]) + '/parse_pkgbuilds.sh '
-				+ arch + ' ' + absroot + '/' +  repo)
-		parse_data(repo,data)
+    for absroot in absroots:
+		for repo in repos:
+			data = commands.getoutput(os.path.dirname(sys.argv[0]) + '/parse_pkgbuilds.sh '
+					+ arch + ' ' + absroot + '/' +  repo)
+			parse_data(repo,data)
 
 def parse_data(repo,data):
 	attrname = None
@@ -332,9 +333,9 @@ def print_usage():
 	print "Usage: ./check_packages.py [OPTION]"
 	print ""
 	print "Options:"
-	print "  --abs-tree=<path>             Check the specified tree (default : /var/abs)"
+	print "  --abs-tree=<path[,path]>      Check the specified tree(s) (default : /var/abs)"
 	print "  --repos=<r1,r2,...>           Check the specified repos (default : core,extra)"
-	print "  --arch=<any|i686|x86_64>      Check the specified arch (default : i686)"
+	print "  --arch=<i686|x86_64>          Check the specified arch (default : i686)"
 	print "  -h, --help                    Show this help and exit"
 	print ""
 	print "Examples:"
@@ -345,7 +346,7 @@ def print_usage():
 	print ""
 
 ## Default path to the abs root directory
-absroot = "/var/abs"
+absroots = ["/var/abs"]
 ## Default list of repos to check
 repos = ['core', 'extra']
 ## Default arch
@@ -359,7 +360,7 @@ except getopt.GetoptError:
 if opts != []:
 	for o, a in opts:
 		if o in ("--abs-tree"):
-			absroot = a
+			absroot = a.split(',')
 		elif o in ("--repos"):
 			repos = a.split(",")
 		elif o in ("--arch"):
@@ -371,14 +372,15 @@ if opts != []:
 			print_usage()
 			sys.exit()
 
-if not os.path.isdir(absroot):
-	print "Error : the abs tree " + absroot + " does not exist"
-	sys.exit()
-for repo in repos:
-	repopath = absroot + "/" + repo
-	if not os.path.isdir(repopath):
-		print "Error : the repository " + repo + " does not exist in " + absroot
+for absroot in absroots:
+	if not os.path.isdir(absroot):
+		print "Error : the abs tree " + absroot + " does not exist"
 		sys.exit()
+	for repo in repos:
+		repopath = absroot + "/" + repo
+		if not os.path.isdir(repopath):
+			print "Error : the repository " + repo + " does not exist in " + absroot
+			sys.exit()
 # repos which need to be loaded
 loadrepos = set([])
 for repo in repos:
diff --git a/cron-jobs/integrity-check b/cron-jobs/integrity-check
index 0b59064..b3185ec 100755
--- a/cron-jobs/integrity-check
+++ b/cron-jobs/integrity-check
@@ -9,5 +9,5 @@ fi
 
 $basedir/check_archlinux/check_packages.py \
     --repos="$1" \
-    --abs-tree="/srv/abs/rsync/$2" --arch="$2" |\
+    --abs-tree="/srv/abs/rsync/$2,/srv/abs/rsync/any" --arch="$2" |\
     $basedir/devlist-mailer "Integrity Check $2: $1" "$3"
-- 
1.6.4.1

-------------- next part --------------
>From ab0d57072f2e02d0664f31ffdbeff58d42091667 Mon Sep 17 00:00:00 2001
From: Henning Garus <henning.garus at gmail.com>
Date: Tue, 1 Sep 2009 23:57:39 +0200
Subject: [PATCH 2/2] Add db comparison to intgrity check

Compare the abs tree with the repo dbs to check if we have a PKGBUILD for
each package in the dbs and vice versa.

Signed-off-by: Henning Garus <henning.garus at gmail.com>
---
 cron-jobs/check_archlinux/check_packages.py |   55 ++++++++++++++++++++++++++-
 1 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/cron-jobs/check_archlinux/check_packages.py b/cron-jobs/check_archlinux/check_packages.py
index e4798a0..faa8c2f 100755
--- a/cron-jobs/check_archlinux/check_packages.py
+++ b/cron-jobs/check_archlinux/check_packages.py
@@ -16,9 +16,11 @@
 #      a non-core package)
 #   8. Circular dependencies
 
-import os,re,commands,getopt,sys,alpm
+import os,re,commands,getopt,sys,tarfile,alpm
 import pdb
 
+DBEXT='.db.tar.gz'
+
 packages = {} # pkgname : PacmanPackage
 provisions = {} # provision : PacmanPackage
 pkgdeps,makepkgdeps = {},{} # pkgname : list of the PacmanPackage dependencies
@@ -26,6 +28,9 @@ invalid_pkgbuilds = []
 missing_pkgbuilds = []
 dups = []
 
+dbonly = []
+absonly = []
+
 mismatches = []
 missing_deps = []
 missing_makedeps = []
@@ -102,6 +107,17 @@ def parse_data(repo,data):
 				provisions[provname] = []
 			provisions[provname].append(pkg)
 
+def parse_dbs(repos,arch):
+	dbpkgs = {}
+	for repo in repos:
+		pkgs = set([])
+		db = tarfile.open(os.path.join(repodir,repo,'os',arch,repo + DBEXT))
+		for line in db.getnames():
+			if not '/' in line:
+				pkgs.add(line.rsplit('-',2)[0])
+		dbpkgs[repo] = pkgs
+	return(dbpkgs)
+
 def splitdep(dep):
 	name = dep
 	version = ""
@@ -317,6 +333,8 @@ def print_results():
 	print_result(dep_hierarchy, "Repo Hierarchy for Dependencies")
 	print_result(makedep_hierarchy, "Repo Hierarchy for Makedepends")
 	print_result(circular_deps, "Circular Dependencies")
+	print_result(dbonly, "Packages found in db, but not in tree")
+	print_result(absonly,"Packages found in tree, but not in db")
 	print_subheading("Summary")
 	print "Missing PKGBUILDs:                    ", len(missing_pkgbuilds)
 	print "Invalid PKGBUILDs:                    ", len(invalid_pkgbuilds)
@@ -326,6 +344,8 @@ def print_results():
 	print "Missing (make)dependencies:           ", len(missing_deps)+len(missing_makedeps)
 	print "Repo hierarchy problems:              ", len(dep_hierarchy)+len(makedep_hierarchy)
 	print "Circular dependencies:                ", len(circular_deps)
+	print "In db, but not in tree:               ", len(dbonly)
+	print "In tree, but not in db                ", len(absonly)
 	print ""
 
 def print_usage():
@@ -336,6 +356,7 @@ def print_usage():
 	print "  --abs-tree=<path[,path]>      Check the specified tree(s) (default : /var/abs)"
 	print "  --repos=<r1,r2,...>           Check the specified repos (default : core,extra)"
 	print "  --arch=<i686|x86_64>          Check the specified arch (default : i686)"
+	print "  --repo-dir=<path>             Check the dbs at the specified path (default : /srv/ftp)"
 	print "  -h, --help                    Show this help and exit"
 	print ""
 	print "Examples:"
@@ -351,9 +372,12 @@ absroots = ["/var/abs"]
 repos = ['core', 'extra']
 ## Default arch
 arch = "i686"
+## Default repodir
+repodir = "/srv/ftp"
 
 try:
-	opts, args = getopt.getopt(sys.argv[1:], "", ["abs-tree=", "repos=", "arch="])
+	opts, args = getopt.getopt(sys.argv[1:], "", ["abs-tree=", "repos=",
+	"arch=", "repo-dir="])
 except getopt.GetoptError:
 	print_usage()
 	sys.exit()
@@ -365,6 +389,8 @@ if opts != []:
 			repos = a.split(",")
 		elif o in ("--arch"):
 			arch = a
+		elif o in ("--repo-dir"):
+			repodir = a
 		else:
 			print_usage()
 			sys.exit()
@@ -381,6 +407,17 @@ for absroot in absroots:
 		if not os.path.isdir(repopath):
 			print "Error : the repository " + repo + " does not exist in " + absroot
 			sys.exit()
+if not os.path.isdir(repodir):
+	print "Error: the repository directory %s does not exiist" % repodir
+	sys.exit()
+for repo in repos:
+	path = os.path.join(repodir,repo,'os',arch,repo + DBEXT)
+	if not os.path.isfile(path):
+		print "Error : repo DB %s : File not found" % path
+		sys.exit()
+	if not tarfile.is_tarfile(path):
+		print "Error : Cant open repo DB %s, not a valid tar file" % path
+		sys.exit()
 # repos which need to be loaded
 loadrepos = set([])
 for repo in repos:
@@ -397,6 +434,9 @@ for name,pkg in packages.iteritems():
 	if pkg.repo in repos:
 		repopkgs[name] = pkg
 
+print "==> parsing db files"
+dbpkgs = parse_dbs(repos,arch)
+
 print "==> checking mismatches"
 for name,pkg in repopkgs.iteritems():
 	pkgdirname = pkg.path.split("/")[-1]
@@ -441,4 +481,15 @@ for name,pkg in packages.iteritems():
 		pkgdeps[pkg] = deps
 find_scc(repopkgs.values())
 
+print "==> checking for differences between db files and pkgbuilds"
+for repo in repos:
+	for pkg in dbpkgs[repo]:
+		if not (pkg in repopkgs.keys() and repopkgs[pkg].repo == repo):
+			dbonly.append("%s/%s" % (repo,pkg))
+	dbonly.sort()
+for name,pkg in repopkgs.iteritems():
+	if not name in dbpkgs[pkg.repo]:
+		absonly.append("%s/%s" % (pkg.repo,name))
+absonly.sort
+
 print_results()
-- 
1.6.4.1



More information about the arch-general mailing list