You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
312 lines
11 KiB
312 lines
11 KiB
#!/usr/bin/env python3
|
|
'''
|
|
Remove the lines from the from the input file in Bucket_Game-base that are not in the matching list file in Bucket_Game-branches.
|
|
The resulting modified list is written to standard output.
|
|
|
|
The provided filename must exist in both the Bucket_Game-base directory and the parallel Bucket_Game-branches directory.
|
|
|
|
The file must contain output such as from ls or find executing ls. Examples:
|
|
find -type f -name "*.ogg" -exec ls -lh {} \;
|
|
find -type f -exec ls -lh {} \;
|
|
|
|
Usage:
|
|
./trimpatchstats.py <filename>
|
|
'''
|
|
import sys
|
|
import os
|
|
import json
|
|
|
|
def error(msg):
|
|
sys.stderr.write("{}\n".format(msg))
|
|
sys.stderr.flush()
|
|
|
|
def usage():
|
|
error("")
|
|
error("trimpatchstats.py")
|
|
error("-----------------")
|
|
error(__doc__)
|
|
|
|
|
|
def splitFirst(line, delimiter):
|
|
'''
|
|
Only split once. Return a tuple of both parts excluding delimiter.
|
|
If delimiter isn't present, return the line, None
|
|
'''
|
|
delI = line.find(delimiter)
|
|
if delI < 0:
|
|
return line, None
|
|
return line[:delI], line[delI+1:]
|
|
|
|
|
|
def splitLast(line, delimiter):
|
|
'''
|
|
Only split once. Return a tuple of both parts excluding delimiter.
|
|
If delimiter isn't present, return the line, None
|
|
'''
|
|
delI = line.rfind(delimiter)
|
|
if delI < 0:
|
|
return line, None
|
|
return line[:delI], line[delI+1:]
|
|
|
|
|
|
def parse_ls(line):
|
|
'''
|
|
Parse the output from "ls -l" or "ls -lh" (human-readable size).
|
|
You can use 'n' instead of 'l' in either case, but the owner and
|
|
group will be numbers instead of names.
|
|
|
|
Returns: a dictionary containing 'permissions',
|
|
'hardlinks' (count), 'owner', 'group', 'size', 'date',
|
|
'name' where each entry is a string. The format of size is
|
|
in bytes, unless the 'h' option was used with ls, in which
|
|
case it will be human-readable (such as "8.3K"").
|
|
The owner and group will be number strings rather than
|
|
name strings if the 'n' option was used with ls.
|
|
|
|
Sequential arguments:
|
|
line -- The line must be like "perms hardlinks user group size date"
|
|
such as any of the following:
|
|
-rw-r--r-- 1 owner owner 8.3K Dec 14 2018 fire_extinguish.ogg
|
|
-rw-r--r-- 1 1000 1000 8.3K Dec 14 2018 'fire extinguish.ogg'
|
|
-rw-r--r-- 1 owner owner 8300 Dec 14 2018 "Poikilos' fire.ogg"
|
|
'''
|
|
if line.startswith("total "):
|
|
raise ValueError("Expected ls -l file lines but got the"
|
|
"ls -l total line: {}".format(line))
|
|
results = {}
|
|
results['name'] = None
|
|
chopped = line
|
|
|
|
# Gradually chop off parts of line until only the date
|
|
# remains, since that is the least predictable as far as
|
|
# delimiters.
|
|
|
|
if chopped.endswith("'") or chopped.endswith('"'):
|
|
# the filename has spaces
|
|
# (or has "'" if has double quotes).
|
|
inQ = chopped[-1]
|
|
chopped = chopped[:-1]
|
|
firstQI = chopped.rfind(inQ)
|
|
if firstQI < 0:
|
|
raise ValueError("There is a missing '{}' before the"
|
|
" filename that ends with '{}'"
|
|
" in \"{}\""
|
|
"".format(inQ, inQ, chopped + inQ))
|
|
name = chopped[firstQI+1:]
|
|
chopped = chopped[:firstQI-1] # -1 to remove the space
|
|
else:
|
|
chopped, name = splitLast(chopped, " ")
|
|
results['permissions'], chopped = splitFirst(chopped, " ")
|
|
results['hardlinks'], chopped = splitFirst(chopped, " ")
|
|
results['owner'], chopped = splitFirst(chopped, " ")
|
|
results['group'], chopped = splitFirst(chopped, " ")
|
|
results['size'], chopped = splitFirst(chopped, " ")
|
|
results['date'] = chopped # Only the date should remain by now.
|
|
# results['name'] = os.path.split(path)[0]
|
|
# results['path'] = path
|
|
results['name'] = name
|
|
return results
|
|
|
|
|
|
def fill_file_dict_path(fileInfoDict, baseDir):
|
|
'''
|
|
Fill in the 'path' of the fileInfoDict with the baseDir, or if not
|
|
specified, the current working directory IF the file exists in it,
|
|
otherwise set 'path' to None if not set already.
|
|
'''
|
|
|
|
name = fileInfoDict.get('name')
|
|
if name is None:
|
|
raise ValueError("The 'name' key must contain a filename"
|
|
" in the dictionary provided to"
|
|
" fill_file_dict_path.")
|
|
baseDirMsg = "specified baseDir"
|
|
if baseDir is None:
|
|
baseDirMsg = "current directory"
|
|
baseDir = os.getcwd()
|
|
path = os.path.join(baseDir, name)
|
|
|
|
fileInfoDict['path'] = os.path.abspath(name)
|
|
if not os.path.exists(path):
|
|
if os.path.exists(fileInfoDict['path']):
|
|
print("WARNING: missing \"{}\" so using {} to form the path"
|
|
" \"{}\""
|
|
"".format(path, baseDirMsg, fileInfoDict['path']))
|
|
else:
|
|
print("WARNING: missing \"{}\" so using {} to form the path"
|
|
" \"{}\" which also doesn't exist"
|
|
"".format(path, baseDirMsg, fileInfoDict['path']))
|
|
fileInfoDict['name'] = os.path.split(fileInfoDict['path'])[-1]
|
|
|
|
|
|
def key_matches_for_any(haystacks, key, needle):
|
|
'''
|
|
See if any dict in haystacks such as haystacks[0][key] is needle.
|
|
'''
|
|
for haystack in haystacks:
|
|
if haystack.get(key) == needle:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
class LSFileInfo:
|
|
|
|
def __init__(self, line, baseDir):
|
|
'''
|
|
Parse the output from "ls -l" or "ls -lh" (human-readable size).
|
|
You can use 'n' instead of 'l' in either case, but the owner and
|
|
group will be numbers instead of names.
|
|
|
|
Sequential arguments:
|
|
line -- The line must be like (ls -lh in this example):
|
|
-rw-r--r-- 1 owner owner 8.3K Dec 14 2018 fire_extinguish.ogg
|
|
perms hardlinks user group size date
|
|
baseDir -- Provide the directory from which the ls command was
|
|
originally run so the LSFileInfo can get the full path.
|
|
Otherwise, the current working directory will be tried.
|
|
'''
|
|
results = parse_ls(line)
|
|
# self.name = results['name']
|
|
# self.path = None
|
|
self.permissions = results['permissions']
|
|
self.hardlinks = results['hardlinks']
|
|
self.owner = results['owner']
|
|
self.group = results['group']
|
|
self.size = results['size']
|
|
self.date = results['date']
|
|
fill_file_dict_path(results, baseDir)
|
|
self.name = results.get('name')
|
|
self.path = results.get('path')
|
|
|
|
def to_dict(self):
|
|
results = {}
|
|
# result['name'] = self.name
|
|
results['path'] = self.path
|
|
results['permissions'] = self.permissions
|
|
results['hardlinks'] = self.hardlinks
|
|
results['owner'] = self.owner
|
|
results['group'] = self.group
|
|
results['date'] = self.date
|
|
return result
|
|
|
|
def __repr__(self):
|
|
return json.dumps(self.to_dict())
|
|
|
|
def __str__(self):
|
|
return json.dumps(self.to_dict())
|
|
|
|
def getFileNumber(path, num):
|
|
'''
|
|
Get a file from a listfile that contains ls -l or ls -n output.
|
|
|
|
Sequential arguments:
|
|
path -- The file must be output from 'ls -l' or 'ls -n' or other
|
|
ls commands with 'l' or 'n'.
|
|
num -- The file number, such as 1 for the first file, skipping a
|
|
line that is blank or starts with "total ", "#", or "$".
|
|
'''
|
|
lineN = 0
|
|
with open(path, 'r') as ins:
|
|
for rawL in ins:
|
|
line = rawL.strip()
|
|
if len(line) == 0:
|
|
continue
|
|
if line.startswith("total "):
|
|
continue
|
|
if line.startswith("#"):
|
|
continue
|
|
if line.startswith("$"):
|
|
continue
|
|
lineN += 1
|
|
if lineN == num:
|
|
return parse_ls(line)
|
|
return None
|
|
|
|
|
|
def printOnlyPatched(baseListPath):
|
|
baseListPath = os.path.abspath(baseListPath)
|
|
basePath, listName = os.path.split(baseListPath)
|
|
parentPath = os.path.split(basePath)[0]
|
|
patchedPath = os.path.join(parentPath, "Bucket_Game-branches")
|
|
patchedListPath = os.path.join(patchedPath, listName)
|
|
if not os.path.isfile(patchedListPath):
|
|
raise ValueError("{} is missing.".format(patchedListPath))
|
|
|
|
dotI = listName.find(".")
|
|
targetDirName = None
|
|
targetDirPath = None
|
|
if dotI > -1:
|
|
# See if the filename is named after the directory.
|
|
targetDirName = os.path.split(listName[:dotI])[1]
|
|
tryPath = os.path.join(patchedPath, targetDirName)
|
|
if os.path.isdir(tryPath):
|
|
targetDirPath = tryPath
|
|
else:
|
|
targetDirPath = os.getcwd()
|
|
print("WARNING: missing \"{}\" so trying current directory"
|
|
"".format(tryPath, None))
|
|
|
|
fid = getFileNumber(patchedListPath, 1)
|
|
if fid is None:
|
|
print("Error: \"{}\" contained no filenames."
|
|
"".format(patchedListPath))
|
|
exit(1)
|
|
tryFilePath = os.path.join(targetDirPath, fid['name'])
|
|
if not os.path.isfile(tryFilePath):
|
|
print("Error: \"{}\" doesn't exist. Run again from the"
|
|
" directory containing \"{}\", otherwise name the patch"
|
|
" file so the part before the first dot matches a"
|
|
" directory name in the \"{}\" directory and run in a"
|
|
" directory parallel to that."
|
|
"".format(tryFilePath, fid['name'], patchedPath))
|
|
exit(1)
|
|
|
|
error("* analyzing \"{}\"".format(patchedListPath))
|
|
patchedFIs = []
|
|
rawNames = []
|
|
with open(patchedListPath, 'r') as ins:
|
|
for rawL in ins:
|
|
line = rawL.strip()
|
|
if len(line) < 1:
|
|
continue
|
|
if line.startswith("total "):
|
|
continue
|
|
if line.startswith("$"):
|
|
continue
|
|
if line.startswith("#"):
|
|
continue
|
|
fid = parse_ls(line)
|
|
rawNames.append(fid['name'])
|
|
# fill_file_dict_path(fid, targetDirPath)
|
|
# error(fid)
|
|
|
|
error("* analyzing \"{}\"".format(baseListPath))
|
|
allCount = 0
|
|
matchCount = 0
|
|
with open(baseListPath, 'r') as ins:
|
|
for rawL in ins:
|
|
line = rawL.strip()
|
|
if len(line) < 1:
|
|
continue
|
|
if line.startswith("total "):
|
|
continue
|
|
if line.startswith("$"):
|
|
continue
|
|
if line.startswith("#"):
|
|
continue
|
|
fid = parse_ls(line)
|
|
allCount += 1
|
|
if fid['name'] in rawNames:
|
|
print(line)
|
|
matchCount += 1
|
|
error("{} of {} in {} were also in {}"
|
|
"".format(matchCount, allCount, baseListPath,
|
|
patchedListPath))
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
usage()
|
|
error("Error: You are missing the required list filename argument.\n")
|
|
exit(1)
|
|
printOnlyPatched(sys.argv[1])
|
|
|