From 1b335d63074d8a8215f64a32b848a8de913abfb0 Mon Sep 17 00:00:00 2001 From: poikilos <7557867+poikilos@users.noreply.github.com> Date: Tue, 30 Nov 2021 21:20:39 -0500 Subject: [PATCH] Store last_src for reporting and detecting whether cache or url was loaded. Outline code for using a token. Improve documentation. Handle options and usage more predictably. --- utilities/enissue.py | 42 +++++-- utilities/pyissuesyncd | 260 ++++++++++++++++++++++++++++++++--------- 2 files changed, 242 insertions(+), 60 deletions(-) diff --git a/utilities/enissue.py b/utilities/enissue.py index d1e0de7..728a15c 100755 --- a/utilities/enissue.py +++ b/utilities/enissue.py @@ -25,11 +25,20 @@ Options: pg_dump dbname > outfile --test Run unit tests then exit (0) if passed. -Partial API documentation: -options keys: +## Partial API documentation: +This part covers the use of enissue itself as a Python API such as via +`import enissue` (For using a web API, you must view the API +documentation at the website of the specific web API). + +### options keys: - default_dt_parser: This must be a method that returns a python datetime object by accepting a single argument, which is a string from the style of the Repo's API. +- token: This should be set to a token that you've generated using the + web interface of your repo. The token will cause you to "see your + rate limit bumped to 5,000 requests an hour" on Github according to + + 2021-11-30. ''' from __future__ import print_function import sys @@ -345,10 +354,11 @@ class Repo: ): ''' Keyword arguments: - options -- The options dict have any of the following keys (any - that aren't set will be detected based on the URL--if - there is an api name that corresponds to your site's - API in the apis global dict): + options -- Set self.options. Any that aren't set will be + detected based on the URL, otherwise from + options['api_id'] (if there is an api name that + corresponds to your site's API in the apis global dict). + The options dict may have any of the following keys: repo_url -- This is required. It can be an API or web URL as long as it ends with username/reponame (except where there is no username in the URL). @@ -402,7 +412,11 @@ class Repo: if you set a different single_cache for each repo! api_id -- a key in the global apis dict which determines the defaults for accessing the web API. + ''' + + if options is None: + raise ValueError("options cannot be None.") repo_url = options.get('repo_url') debug("* using URL {}".format(repo_url)) if repo_url is None: @@ -833,6 +847,7 @@ class Repo: if not quiet: print(p+"Cache expires: {}".format(expires_s)) with open(c_path) as json_file: + self.last_src = c_path result = json.load(json_file) max_issue = None results = result @@ -876,7 +891,7 @@ class Repo: print(p+"There is no cache for \"{}\"".format( c_path )) - + self.last_src = query_s try: debug(p+"Query URL (query_s): {}".format(query_s)) response = request.urlopen(query_s) @@ -952,6 +967,13 @@ class Repo: The reactions to a timeline event are from a URL such as: https://api.github.com/repos/poikilos/EnlivenMinetest/issues/comments/968357490/reactions + This method uses the following options from self.options: + token -- If set, then the header gets a new line like: + "Authorization: token ghp_16C7e42F292c6912E7710c838347Ae178B4a" + where ghp_16C7e42F292c6912E7710c838347Ae178B4a is just an + example from the GitHub documentation but you must set a + real token you've generated. + Keyword arguments: quiet -- Set to True to hide messages (verbose mode will override this). @@ -963,6 +985,10 @@ class Repo: length as the max results count). Therefore, this method refuses to handle such URLs. ''' + if self.options is None: + raise RuntimeError("You must set options before running" + "getCachedJsonDict") + token = self.options.get('token') result = None p = self.log_prefix # The known API URLs are already set as follows: @@ -1066,6 +1092,7 @@ class Repo: + max_cache_delta)) with open(c_path) as json_file: try: + self.last_src = c_path result = json.load(json_file) except json.decoder.JSONDecodeError as ex: error("") @@ -1079,6 +1106,7 @@ class Repo: if result is not None: return result, None + self.last_src = url try: res = request.urlopen(url) data_s = decode_safe(res.read()) diff --git a/utilities/pyissuesyncd b/utilities/pyissuesyncd index 56eb60f..3dee8a4 100755 --- a/utilities/pyissuesyncd +++ b/utilities/pyissuesyncd @@ -14,34 +14,45 @@ See the license file in the included EnlivenMinetest directory or at [EnlivenMinetest](https://github.com/poikilos/EnlivenMinetest) -Outputs: -data_directory: The data directory for this service daemon is -os.path.join(profile, ".cache", "pyissuesyncd"). +Definitions: +- data_directory: The data directory for this service daemon is + os.path.join(profile, ".cache", "pyissuesyncd"). +- repo: In this context, repo refers to a project management system with + a web API. +Examples: +DST_REPO=https://example.com/git/repo pyissuesyncd +pyissuesyncd --dst-repo https://example.com/git/repo + +The two _CACHE directories below are used as the single_cache option +for the Repo (see enissue.py's Repo class for documentation). + +''' + +__old_doc__ = ''' required arguments: --dst-repo (or set the DST_REPO environment variable) Issues and dependent data will be overwritten at this API URL. - optional arguments: Environment variables get be set, but a CLI argument will override the corresponding variable noted below in all caps. -The two _CACHE directories below are used as the single_cache option -for the Repo (see enissue.py's Repo class for documentation). - --src-cache: Set the directory to store a cached version of the source repo's data. * defaults to SRC_CACHE or os.path.join(data_directory, "source") --dst-cache: Set the directory to store a cached version of the destination repo's data. * defaults to DST_CACHE or os.path.join(data_directory, "destination") +--src-repo-token: Set the token you generated using the repo's web interface. +* defaults to SRC_REPO_TOKEN or None -Examples: -DST_REPO=https://example.com/git/repo pyissuesyncd -pyissuesyncd --dst-repo https://example.com/git/repo +--src-min-issue: Set what issue number to check first. +* defaults to SRC_MIN_ISSUE or None +--src-max-issue: Set what issue number to check last. +* defaults to SRC_MAX_ISSUE or None ''' import os import sys @@ -140,6 +151,23 @@ def get_issue(repo, options, issue_no): def start_issuesyncd(src_options, dst_options): + src_s_c = src_options.get("single_cache") + dst_s_c = dst_options.get("single_cache") + if src_s_c is None: + usage() + raise ValueError("single_cache is None in src_options but" + " it should be set to a directory that will" + " contain the issues directory.") + if dst_s_c is None: + usage() + raise ValueError("single_cache is None in dst_options but" + " it should be set to a directory that will" + " contain the issues directory.") + if src_s_c == dst_s_c: + usage() + raise ValueError("single_cache for dst and src are both" + " {} but must be different." + "".format(src_s_c)) # src_never_expire = src_options.get('never_expire') is True max_issue = src_options.get('max_issue') if max_issue is None: @@ -149,7 +177,11 @@ def start_issuesyncd(src_options, dst_options): else: max_issue = int(max_issue) - issue_no = 0 # This is incremented to 1 before use. + min_issue = src_options.get("min_issue") + if min_issue is None: + min_issue = 1 + + issue_no = min_issue - 1 # This is incremented to 1 before use. # issue_no = max_issue - 1 # debug only src_res_code = 0 end_codes = [404, 403] @@ -216,12 +248,25 @@ def start_issuesyncd(src_options, dst_options): continue else: error("However, an issue was returned.") - error("Got issue {}".format(issue_no)) + got_fmt = "#{}: got (source not recorded)" + if src_repo.last_src is not None: + if src_repo.last_src.startswith("http:"): + got_fmt = "#{} downloaded" + elif os.path.isfile(src_repo.last_src): + got_fmt = "#{} loaded from cache" + error(got_fmt.format(issue_no)) # Example: ~/.cache/pyissuesyncd/source/issues/1.json src_dt_parser = src_repo.options['default_dt_parser'] src_created_dt_s = src_repo.getKnown(src_issue, 'created_at') src_updated_dt_s = src_repo.getKnown(src_issue, 'updated_at') - src_updated_dt = src_dt_parser(src_updated_dt_s) + try: + src_updated_dt = src_dt_parser(src_updated_dt_s) + except ValueError as ex: + error("Error in {}".format(src_repo.last_src)) + error(ex) + error("If you changed repos and used the same cache dir," + " manually delete the cache file or directory above.") + sys.exit(1) src_updated_ts = int(src_updated_dt.strftime("%s")) # ^ See @@ -294,70 +339,179 @@ def start_issuesyncd(src_options, dst_options): pass # dst_repo.update_issue(src_issue, src_repo) +''' +manual_args = ['--dst-repo', '--src-repo', '--src-cache', + '--dst-cache', '--src-max-issue', '--src-repo-token'] +''' + +prev_arg = None + +collect_src_keys = { + 'SRC_REPO': "repo_url", + 'SRC_CACHE': "single_cache", + 'SRC_MAX_ISSUE': "max_issue", + 'SRC_REPO_TOKEN': "token", + 'SRC_MIN_ISSUE': "min_issue", +} + +collect_dst_keys = { + 'DST_REPO': "repo_url", + 'DST_CACHE': "token", +} +src_args = {} +for envVarName, option in collect_src_keys.items(): + _arg = "--{}".format(envVarName).replace("_", "-").lower() + src_args[_arg] = option +dst_args = {} +for envVarName, option in collect_dst_keys.items(): + _arg = "--{}".format(envVarName).replace("_", "-").lower() + dst_args[_arg] = option + +env_help = { + 'DST_REPO': "Issues and dependent data will be overwritten at this API URL.", + 'SRC_CACHE': "Set the directory to store a cached version of the source repo's data.", + 'DST_CACHE': "Set the directory to store a cached version of the destination repo's data.", + 'SRC_REPO_TOKEN': "Set the token you generated using the repo's web interface.", + 'SRC_MIN_ISSUE': "Set what issue number to check first.", + 'SRC_MAX_ISSUE': "Set what issue number to check last.", +} +src_option_defaults = { + 'repo_url': 'https://github.com/poikilos/EnlivenMinetest', + 'api_id': 'GitHub', +} +env_default_help = { + 'SRC_CACHE': '(default = os.path.join(data_directory, "source"))', + 'DST_CACHE': 'os.path.join(data_directory, "destination")', + 'SRC_REPO_TOKEN': 'None', + 'SRC_MIN_ISSUE': 'None', + 'SRC_MAX_ISSUE': 'None', + 'SRC_REPO': src_option_defaults['repo_url'], +} +required_env_names = ['DST_REPO'] def usage(): print(__doc__) + print("All options:") + arg_w = 17 + env_w = 27 + line_fmt = "{:"+str(arg_w)+"} {:"+str(env_w)+"} {}" + p = " " # option help prefix + for _arg, option in src_args.items(): + help_msg = "Set src_options['{}']".format(option) + envVarName = None + envVarMsg = None + for k, v in collect_src_keys.items(): + if v == option: + envVarName = k + envVarMsg = "(or env var {})".format(envVarName) + break + # arg_msg = _arg + # if envVarName in required_env_names: + # arg_msg += " (required)" + print(line_fmt.format(_arg, envVarMsg, help_msg)) + if envVarName is not None: + env_help_msg = env_help.get(envVarName) + if env_help_msg is not None: + print(p+env_help_msg) + env_default_help_msg = env_default_help.get(envVarName) + if env_default_help_msg is not None: + print(p+"(default: {})".format(env_default_help_msg)) + if envVarName in required_env_names: + print(p+"(required)") + + for _arg, option in dst_args.items(): + help_msg = "Set dst_options['{}']".format(option) + envVarName = None + envVarMsg = None + for k, v in collect_dst_keys.items(): + if v == option: + envVarName = k + envVarMsg = "(or env var {})".format(envVarName) + break + print(line_fmt.format(_arg, envVarMsg, help_msg)) + if envVarName is not None: + env_help_msg = env_help.get(envVarName) + if env_help_msg is not None: + print(p+env_help_msg) + env_default_help_msg = env_default_help.get(envVarName) + if env_default_help_msg is not None: + print(p+"(default: {})".format(env_default_help_msg)) + if envVarName in required_env_names: + print(p+"(required)") + + print("") + if __name__ == "__main__": src_options = { - 'repo_url': "https://github.com/poikilos/EnlivenMinetest", + 'repo_url': src_option_defaults['repo_url'], 'never_expire': True, 'quiet': True, - 'api_id': "GitHub", + 'api_id': src_option_defaults['api_id'], } dst_options = { 'never_expire': True, 'quiet': True, 'api_id': "Gitea", } - DST_REPO = os.environ.get('DST_REPO') - if DST_REPO is not None: - dst_options['repo_url'] = DST_REPO - del DST_REPO - SRC_REPO = os.environ.get('SRC_REPO') - if DST_REPO is not None: - src_options['repo_url'] = SRC_REPO - del SRC_REPO - - SRC_CACHE = os.environ.get('SRC_CACHE') - if SRC_CACHE is None: - SRC_CACHE = os.path.join(data_directory, "source") - - DST_CACHE = os.environ.get('DST_CACHE') - if DST_CACHE is None: - DST_CACHE = os.path.join(data_directory, "destination") - - SRC_MAX_ISSUE = os.environ.get('SRC_MAX_ISSUE') - - prev_arg = None - - manual_args = ['--dst-repo', '--src-repo', '--src-cache', - '--dst-cache', '--src-max-issue'] + for envVarName, option in collect_src_keys.items(): + _VAL = os.environ.get(envVarName) + if _VAL is not None: + src_args[option] = _VAL + error("* environment set src_options['{}'] to {} via {}" + "".format(option, _VAL, envVarName)) + + for envVarName, option in collect_dst_keys.items(): + _VAL = os.environ.get(envVarName) + if _VAL is not None: + dst_args[option] = _VAL + error("* environment set dst_options['{}'] to {} via {}" + "".format(option, _VAL, envVarName)) + set_obj_name = None + set_key = None for arg in sys.argv[1:]: - if prev_arg == "--dst-repo": - dst_options['repo_url'] = arg - elif prev_arg == "--src-repo": - src_options['repo_url'] = arg - elif prev_arg == "--src_cache": - SRC_CACHE = arg - elif prev_arg == "--dst_cache": - DST_CACHE = arg - elif prev_arg == "--src-max-issue": - SRC_MAX_ISSUE = int(arg) - elif arg in manual_args: + if set_key is not None: + if set_obj_name == "dst": + dst_options[set_key] = arg + error("* set dst_options['{}'] to {}" + "".format(set_key, arg)) + elif set_obj_name == "src": + src_options[set_key] = arg + error("* set src_options['{}'] to {}" + "".format(set_key, arg)) + else: + raise RuntimeError("set_obj_name must be dst or src.") + set_key = None + set_obj_name = None + elif arg in ['-h', '--help']: + usage() + sys.exit(0) + elif arg in src_args.keys(): + set_key = src_args[arg] + set_obj_name = "src" + pass + elif arg in dst_args.keys(): + set_key = dst_args[arg] + set_obj_name = "dst" pass else: usage() error("Error: The argument is not valid: {}".format(arg)) sys.exit(1) prev_arg = arg + if set_key is not None: + usage() + error("Error: You must provide a value after {}" + "".format(set_key)) + sys.exit(1) - src_options['single_cache'] = SRC_CACHE - src_options['max_issue'] = SRC_MAX_ISSUE - # ^ INFO: start_issuesyncd warns if SRC_MAX_ISSUE is None. - dst_options['single_cache'] = DST_CACHE + # INFO: start_issuesyncd warns if SRC_MAX_ISSUE is None. + if src_options.get('single_cache') is None: + src_options['single_cache'] = os.path.join(data_directory, "source") + if dst_options.get('single_cache') is None: + dst_options['single_cache'] = os.path.join(data_directory, "destination") error("SRC_REPO (--src-repo) is {}" "".format(src_options.get('repo_url')))