[aur-dev] [PATCH 1/4] make gendummydata script more friendly
elij
elij.mx at gmail.com
Tue Apr 5 20:54:09 EDT 2011
- no need to use mysql
- just considering categories as an integer range, specified to the size
of that in the aur-schema.
- use logging module instead of writing directly to stderr
this makes the code cleaner as there is only one test for the value of
DBUG.
---
support/schema/gendummydata.py | 100 +++++++++++++---------------------------
1 files changed, 32 insertions(+), 68 deletions(-)
diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py
index 7b1d0cf..47d9bd5 100755
--- a/support/schema/gendummydata.py
+++ b/support/schema/gendummydata.py
@@ -15,7 +15,8 @@ import os
import sys
import cStringIO
import commands
-
+import logging
+import re
DBUG = 1
SEED_FILE = "/usr/share/dict/words"
@@ -33,6 +34,7 @@ PKG_FILES = (8, 30) # min/max number of files in a package
PKG_DEPS = (1, 5) # min/max depends a package has
PKG_SRC = (1, 3) # min/max sources a package has
PKG_CMNTS = (1, 5) # min/max number of comments a package has
+CATEGORIES_COUNT = 17 # the number of categories from aur-schema
VOTING = (0, .30) # percentage range for package voting
RANDOM_PATHS = ( # random path locations for package files
"/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib",
@@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://")
RANDOM_LOCS = ("pub", "release", "files", "downloads", "src")
FORTUNE_CMD = "/usr/bin/fortune -l"
+# setup logging
+logformat = "%(levelname)s: %(message)s"
+if DBUG != 0:
+ level = logging.DEBUG
+else:
+ level = logging.INFO
+logging.basicConfig(format=logformat, level=level)
+log = logging.getLogger()
if len(sys.argv) != 2:
- sys.stderr.write("Missing output filename argument");
+ log.error("Missing output filename argument")
raise SystemExit
# make sure the seed file exists
#
if not os.path.exists(SEED_FILE):
- sys.stderr.write("Please install the 'words' Arch package\n");
- raise SystemExit
-
-# Make sure database access will be available
-#
-try:
- import MySQLdb
-except:
- sys.stderr.write("Please install the 'mysql-python' Arch package\n");
+ log.error("Please install the 'words' Arch package")
raise SystemExit
-# try to connect to database
-#
-try:
- db = MySQLdb.connect(host = DB_HOST, user = DB_USER,
- db = DB_NAME, passwd = DB_PASS)
- dbc = db.cursor()
-except:
- sys.stderr.write("Could not connect to database\n");
- raise SystemExit
-
-esc = db.escape_string
-
-
# track what users/package names have been used
#
seen_users = {}
seen_pkgs = {}
-categories = {}
-category_keys = []
user_keys = []
# some functions to generate random data
@@ -95,14 +82,14 @@ def genVersion():
ver.append("%d" % random.randrange(0,100))
return ".".join(ver) + "-u%d" % random.randrange(1,11)
def genCategory():
- return categories[category_keys[random.randrange(0,len(category_keys))]]
+ return random.randrange(0,CATEGORIES_COUNT)
def genUID():
return seen_users[user_keys[random.randrange(0,len(user_keys))]]
# load the words, and make sure there are enough words for users/pkgs
#
-if DBUG: print "Grabbing words from seed file..."
+log.debug("Grabbing words from seed file...")
fp = open(SEED_FILE, "r")
contents = fp.readlines()
fp.close()
@@ -117,7 +104,7 @@ else:
# select random usernames
#
-if DBUG: print "Generating random user names..."
+log.debug("Generating random user names...")
user_id = USER_ID
while len(seen_users) < MAX_USERS:
user = random.randrange(0, len(contents))
@@ -130,7 +117,7 @@ user_keys = seen_users.keys()
# select random package names
#
-if DBUG: print "Generating random package names..."
+log.debug("Generating random package names...")
num_pkgs = PKG_ID
while len(seen_pkgs) < MAX_PKGS:
pkg = random.randrange(0, len(contents))
@@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS:
#
contents = None
-# Load package categories from database
-#
-if DBUG: print "Loading package categories..."
-q = "SELECT * FROM PackageCategories"
-dbc.execute(q)
-row = dbc.fetchone()
-while row:
- categories[row[1]] = row[0]
- row = dbc.fetchone()
-category_keys = categories.keys()
-
-# done with the database
-#
-dbc.close()
-db.close()
-
# developer/tu IDs
#
developers = []
@@ -179,7 +150,7 @@ out.write("BEGIN;\n")
# Begin by creating the User statements
#
-if DBUG: print "Creating SQL statements for users.",
+log.debug("Creating SQL statements for users.")
count = 0
for u in user_keys:
account_type = 1 # default to normal user
@@ -201,22 +172,20 @@ for u in user_keys:
# a normal user account
#
pass
-
+
s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s at example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u)
out.write(s)
if count % 10 == 0:
- if DBUG: print ".",
+ log.debug("working...")
count += 1
-if DBUG: print "."
-if DBUG:
- print "Number of developers:", len(developers)
- print "Number of trusted users:", len(trustedusers)
- print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers))
- print "Number of packages:", MAX_PKGS
+log.debug("Number of developers: %d" % len(developers))
+log.debug("Number of trusted users: %d" % len(trustedusers))
+log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers)))
+log.debug("Number of packages: %d" % MAX_PKGS)
# Create the package statements
#
-if DBUG: print "Creating SQL statements for packages.",
+log.debug("Creating SQL statements for packages.")
count = 0
for p in seen_pkgs.keys():
NOW = int(time.time())
@@ -238,24 +207,22 @@ for p in seen_pkgs.keys():
out.write(s)
if count % 100 == 0:
- if DBUG: print ".",
+ log.debug("working...")
count += 1
# create random comments for this package
#
num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1])
for i in range(0, num_comments):
- fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'",""))
+ fortune = commands.getoutput(FORTUNE_CMD).replace("'","")
now = NOW + random.randrange(400, 86400*3)
s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now)
out.write(s)
-if DBUG: print "."
-
# Cast votes
#
track_votes = {}
-if DBUG: print "Casting votes for packages.",
+log.debug("Casting votes for packages.")
count = 0
for u in user_keys:
num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]),
@@ -271,7 +238,7 @@ for u in user_keys:
track_votes[pkg] += 1
out.write(s)
if count % 100 == 0:
- if DBUG: print ".",
+ log.debug("working...")
count += 1
# Update statements for package votes
@@ -282,7 +249,7 @@ for p in track_votes.keys():
# Create package dependencies and sources
#
-if DBUG: print "."; print "Creating statements for package depends/sources.",
+log.debug("Creating statements for package depends/sources.")
count = 0
for p in seen_pkgs.keys():
num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1])
@@ -308,7 +275,7 @@ for p in seen_pkgs.keys():
out.write(s)
if count % 100 == 0:
- if DBUG: print ".",
+ log.debug("working...")
count += 1
@@ -317,7 +284,4 @@ for p in seen_pkgs.keys():
out.write("COMMIT;\n")
out.write("\n")
out.close()
-
-if DBUG: print "."
-if DBUG: print "Done."
-
+log.debug("Done.")
--
1.7.4.1
More information about the aur-dev
mailing list