Commit 8ec43d71 authored by Lisa's avatar Lisa
Browse files

Clean repo script

parent 8c267ea4
# A script to clean duplicated package files from a repository, keeping only the newest one
# Copyright (C) 2013 Lisa Vitolo <>
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
import sys
import os
def extractName(pkgfile):
hyphens = [i for i, x in enumerate(pkgfile) if x == "-"]
return pkgfile[:hyphens[-2]]
def extractVersion(pkgfile):
hyphens = [i for i, x in enumerate(pkgfile) if x == "-"]
return pkgfile[hyphens[-2] + 1: hyphens[-1]]
def extractRelease(pkgfile):
return pkgfile[pkgfile.rindex("-") + 1:]
# Version comparison, based on the one performed by Akabei minus the epoch comparison
def isMoreRecent(pkg1, pkg2):
version1 = extractVersion(pkg1)
version2 = extractVersion(pkg2)
rel1 = extractRelease(pkg1)
rel2 = extractRelease(pkg2)
vl1 = version1.split(".")
vl2 = version2.split(".")
i1 = 0
i2 = 0
while (i1 < len(vl1) and i2 < len(vl2)):
p1 = int(vl1[i1])
p2 = int(vl2[i2])
if (p1 > p2):
return True
elif (p1 < p2):
return False
i1 += 1
i2 += 1
if (i1 == len(vl1) and i2 < len(vl2)):
return False
if (i2 == len(vl2) and i1 < len(vl1)):
return True
if (len(rel1) == 0 ^ len(rel2) == 0):
return False
pg1 = int(rel1)
pg2 = int(rel2)
if (pg1 > pg2):
return True
return False
def addSorted(sameNameList, pkg):
ins = 0
inserted = False
while ins < len(sameNameList):
if isMoreRecent(removeExtensions(pkg), removeExtensions(sameNameList[ins])):
sameNameList.insert(ins, pkg)
inserted = True
ins += 1
if not inserted:
def removeExtensions(pkg):
return pkg[:pkg.rindex("-")]
# The dictionary returned contains entries shaped like this:
# "package name" -> list of package files with that name, sorted by version (most recent first)
def duplicates(pkglist):
duplicateDictionary = {}
for pkg in pkglist:
pkgWithoutExtension = removeExtensions(pkg) # remove file extension and architecture string
pkgname = extractName(pkgWithoutExtension)
sameNameList = duplicateDictionary[pkgname]
addSorted(sameNameList, pkg) # add the new package in the right position
except KeyError: # the key is not present in the dictionary, so a new list needs to be created
duplicateDictionary[pkgname] = [pkg]
return duplicateDictionary
def printDups(dups):
print ":: The package files which would be removed are: "
for name in dups:
fileList = dups[name]
for f in fileList[1:]:
print f + " (newest is " + fileList[0] + ")"
def removeDups(dups, path):
print ":: WARNING! You are about to permanently delete files from the disk. Are you sure you want to continue? (y/n)"
answer =
if (answer != "y"):
print ":: Nothing done, exiting..."
for name in dups:
fileList = dups[name]
for f in fileList[1:]:
completePath = path + "/" + f
print ":: Files removed successfully"
def help():
print "-d <dir> Perform check on <dir>. If no argument is supplied, defaults to the current working directory."
print "-f Prints the file that would be removed, without touching them."
print "-h This message"
if __name__ == "__main__":
dirname = ""
fake = False
for arg in sys.argv:
if (arg == "-d"):
i = sys.argv.index("-d")
if (i < len(sys.argv) - 1):
dirname = sys.argv[i+1]
print ":: Error: directory name is missing after -d"
elif (arg == "-f"):
fake = True
elif (arg == "-h"):
# No argument supplied
if len(dirname) == 0:
dirname = os.getcwd()
pkglist = [file for file in os.listdir(dirname) if file.endswith(".tar.xz")]
dup = duplicates(pkglist)
if fake:
removeDups(dup, dirname)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment