#!/usr/bin/env python3 ''' Remove the lines from the from the input file in Bucket_Game-base that are not in the matching list file in Bucket_Game-branches. The resulting modified list is written to standard output. The provided filename must exist in both the Bucket_Game-base directory and the parallel Bucket_Game-branches directory. The file must contain output such as from ls or find executing ls. Examples: find -type f -name "*.ogg" -exec ls -lh {} \\; find -type f -exec ls -lh {} \\; Usage: ./trimpatchstats.py ''' import sys import os import json def echo0(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) def usage(): echo0("") echo0("trimpatchstats.py") echo0("-----------------") echo0(__doc__) def splitFirst(line, delimiter): ''' Only split once. Return a tuple of both parts excluding delimiter. If delimiter isn't present, return the line, None ''' delI = line.find(delimiter) if delI < 0: return line, None return line[:delI], line[delI+1:] def splitLast(line, delimiter): ''' Only split once. Return a tuple of both parts excluding delimiter. If delimiter isn't present, return the line, None ''' delI = line.rfind(delimiter) if delI < 0: return line, None return line[:delI], line[delI+1:] def parse_ls(line): ''' Parse the output from "ls -l" or "ls -lh" (human-readable size). You can use 'n' instead of 'l' in either case, but the owner and group will be numbers instead of names. Returns: a dictionary containing 'permissions', 'hardlinks' (count), 'owner', 'group', 'size', 'date', 'name' where each entry is a string. The format of size is in bytes, unless the 'h' option was used with ls, in which case it will be human-readable (such as "8.3K""). The owner and group will be number strings rather than name strings if the 'n' option was used with ls. Sequential arguments: line -- The line must be like "perms hardlinks user group size date" such as any of the following: -rw-r--r-- 1 owner owner 8.3K Dec 14 2018 fire_extinguish.ogg -rw-r--r-- 1 1000 1000 8.3K Dec 14 2018 'fire extinguish.ogg' -rw-r--r-- 1 owner owner 8300 Dec 14 2018 "Poikilos' fire.ogg" ''' if line.startswith("total "): raise ValueError("Expected ls -l file lines but got the" "ls -l total line: {}".format(line)) results = {} results['name'] = None chopped = line # Gradually chop off parts of line until only the date # remains, since that is the least predictable as far as # delimiters. if chopped.endswith("'") or chopped.endswith('"'): # the filename has spaces # (or has "'" if has double quotes). inQ = chopped[-1] chopped = chopped[:-1] firstQI = chopped.rfind(inQ) if firstQI < 0: raise ValueError("There is a missing '{}' before the" " filename that ends with '{}'" " in \"{}\"" "".format(inQ, inQ, chopped + inQ)) name = chopped[firstQI+1:] chopped = chopped[:firstQI-1] # -1 to remove the space else: chopped, name = splitLast(chopped, " ") results['permissions'], chopped = splitFirst(chopped, " ") results['hardlinks'], chopped = splitFirst(chopped, " ") results['owner'], chopped = splitFirst(chopped, " ") results['group'], chopped = splitFirst(chopped, " ") results['size'], chopped = splitFirst(chopped, " ") results['date'] = chopped # Only the date should remain by now. # results['name'] = os.path.split(path)[0] # results['path'] = path results['name'] = name return results def fill_file_dict_path(fileInfoDict, baseDir): ''' Fill in the 'path' of the fileInfoDict with the baseDir, or if not specified, the current working directory IF the file exists in it, otherwise set 'path' to None if not set already. ''' name = fileInfoDict.get('name') if name is None: raise ValueError("The 'name' key must contain a filename" " in the dictionary provided to" " fill_file_dict_path.") baseDirMsg = "specified baseDir" if baseDir is None: baseDirMsg = "current directory" baseDir = os.getcwd() path = os.path.join(baseDir, name) fileInfoDict['path'] = os.path.abspath(name) if not os.path.exists(path): if os.path.exists(fileInfoDict['path']): print("WARNING: missing \"{}\" so using {} to form the path" " \"{}\"" "".format(path, baseDirMsg, fileInfoDict['path'])) else: print("WARNING: missing \"{}\" so using {} to form the path" " \"{}\" which also doesn't exist" "".format(path, baseDirMsg, fileInfoDict['path'])) fileInfoDict['name'] = os.path.split(fileInfoDict['path'])[-1] def key_matches_for_any(haystacks, key, needle): ''' See if any dict in haystacks such as haystacks[0][key] is needle. ''' for haystack in haystacks: if haystack.get(key) == needle: return True else: return False class LSFileInfo: def __init__(self, line, baseDir): ''' Parse the output from "ls -l" or "ls -lh" (human-readable size). You can use 'n' instead of 'l' in either case, but the owner and group will be numbers instead of names. Sequential arguments: line -- The line must be like (ls -lh in this example): -rw-r--r-- 1 owner owner 8.3K Dec 14 2018 fire_extinguish.ogg perms hardlinks user group size date baseDir -- Provide the directory from which the ls command was originally run so the LSFileInfo can get the full path. Otherwise, the current working directory will be tried. ''' results = parse_ls(line) # self.name = results['name'] # self.path = None self.permissions = results['permissions'] self.hardlinks = results['hardlinks'] self.owner = results['owner'] self.group = results['group'] self.size = results['size'] self.date = results['date'] fill_file_dict_path(results, baseDir) self.name = results.get('name') self.path = results.get('path') def to_dict(self): results = {} # result['name'] = self.name results['path'] = self.path results['permissions'] = self.permissions results['hardlinks'] = self.hardlinks results['owner'] = self.owner results['group'] = self.group results['date'] = self.date return result def __repr__(self): return json.dumps(self.to_dict()) def __str__(self): return json.dumps(self.to_dict()) def getFileNumber(path, num): ''' Get a file from a listfile that contains ls -l or ls -n output. Sequential arguments: path -- The file must be output from 'ls -l' or 'ls -n' or other ls commands with 'l' or 'n'. num -- The file number, such as 1 for the first file, skipping a line that is blank or starts with "total ", "#", or "$". ''' lineN = 0 with open(path, 'r') as ins: for rawL in ins: line = rawL.strip() if len(line) == 0: continue if line.startswith("total "): continue if line.startswith("#"): continue if line.startswith("$"): continue lineN += 1 if lineN == num: return parse_ls(line) return None def printOnlyPatched(baseListPath): baseListPath = os.path.abspath(baseListPath) basePath, listName = os.path.split(baseListPath) parentPath = os.path.split(basePath)[0] patchedPath = os.path.join(parentPath, "Bucket_Game-branches") patchedListPath = os.path.join(patchedPath, listName) if not os.path.isfile(patchedListPath): raise ValueError("{} is missing.".format(patchedListPath)) dotI = listName.find(".") targetDirName = None targetDirPath = None if dotI > -1: # See if the filename is named after the directory. targetDirName = os.path.split(listName[:dotI])[1] tryPath = os.path.join(patchedPath, targetDirName) if os.path.isdir(tryPath): targetDirPath = tryPath else: targetDirPath = os.getcwd() print("WARNING: missing \"{}\" so trying current directory" "".format(tryPath, None)) fid = getFileNumber(patchedListPath, 1) if fid is None: print("Error: \"{}\" contained no filenames." "".format(patchedListPath)) return 1 tryFilePath = os.path.join(targetDirPath, fid['name']) if not os.path.isfile(tryFilePath): print("Error: \"{}\" doesn't exist. Run again from the" " directory containing \"{}\", otherwise name the patch" " file so the part before the first dot matches a" " directory name in the \"{}\" directory and run in a" " directory parallel to that." "".format(tryFilePath, fid['name'], patchedPath)) return 2 echo0("* analyzing \"{}\"".format(patchedListPath)) patchedFIs = [] rawNames = [] with open(patchedListPath, 'r') as ins: for rawL in ins: line = rawL.strip() if len(line) < 1: continue if line.startswith("total "): continue if line.startswith("$"): continue if line.startswith("#"): continue fid = parse_ls(line) rawNames.append(fid['name']) # fill_file_dict_path(fid, targetDirPath) # echo0(fid) echo0("* analyzing \"{}\"".format(baseListPath)) allCount = 0 matchCount = 0 with open(baseListPath, 'r') as ins: for rawL in ins: line = rawL.strip() if len(line) < 1: continue if line.startswith("total "): continue if line.startswith("$"): continue if line.startswith("#"): continue fid = parse_ls(line) allCount += 1 if fid['name'] in rawNames: print(line) matchCount += 1 echo0("{} of {} in {} were also in {}" "".format(matchCount, allCount, baseListPath, patchedListPath)) return 0 def main(): if len(sys.argv) < 2: usage() echo0("Error: You are missing the required list filename argument.\n") return 1 return printOnlyPatched(sys.argv[1]) if __name__ == "__main__": sys.exit(main())