From 4096b5cab15c7156d1297ec0747358070b8fadbe Mon Sep 17 00:00:00 2001 From: poikilos <7557867+poikilos@users.noreply.github.com> Date: Tue, 30 Nov 2021 19:33:40 -0500 Subject: [PATCH] Get issues until you can't. Further improve reporting and documentation. --- utilities/enissue.py | 63 ++++++++++++++++--- utilities/pyissuesyncd | 137 +++++++++++++++++++++++++++++++++++------ 2 files changed, 172 insertions(+), 28 deletions(-) diff --git a/utilities/enissue.py b/utilities/enissue.py index 0251efe..d1e0de7 100755 --- a/utilities/enissue.py +++ b/utilities/enissue.py @@ -400,8 +400,9 @@ class Repo: structure (except for issues and other subdirectories which mimic the web API routes). Only use this option if you set a different single_cache for each repo! + api_id -- a key in the global apis dict which determines the + defaults for accessing the web API. ''' - self.ERROR_410 = "Error 410" repo_url = options.get('repo_url') debug("* using URL {}".format(repo_url)) if repo_url is None: @@ -413,6 +414,10 @@ class Repo: self.api_id = options.get('api_id') if urlParts[-2] == "repo.or.cz": self.remote_user = "almikes@aol.com" # Wuzzy2 + if self.api_id is not None: + if self.api_id != 'git_instaweb': + error("WARNING: URL has [] but self.api_id was {}" + "".format(urlParts[-2], self.api_id)) self.api_id = "git_instaweb" # Such as https://repo.or.cz/minetest_treasurer.git # - locally, git instaweb is controlled via: @@ -448,7 +453,10 @@ class Repo: debug("* using specified API: {}".format(self.api_id)) if self.api_id is None: self.api_id = "Gitea" - error(" * assuming API is {}".format(self.api_id)) + if "github.com" in repo_url.lower(): + error("WARNING: assuming Gitea but URL has github.com.") + error(" * assuming API is {} for {}" + "".format(self.api_id, )) if self.api_id is None: raise RuntimeError("api_id is not set") api_meta = apis.get(self.api_id) @@ -566,11 +574,16 @@ class Repo: name -- a well-known issue key such as 'body' that will be translated to an API-specific key. ''' - key = self.options['known_issue_keys'].get(name) + known_issue_keys = self.options.get('known_issue_keys') + if known_issue_keys is None: + raise RuntimeError("known_issue_keys shouldn't be None.") + key = known_issue_keys.get(name) if key is None: raise KeyError("{} is not a well-known key in" " known_issue_keys. Try _getIssueValue to" - " forcefully get a value.") + " forcefully get a value but only if you" + " ran load_issues first--otherwise use" + " getKnown.") return key def _getIssueValue(self, index, key): @@ -581,16 +594,37 @@ class Repo: ''' return self.issues[index][key] - def getKnown(self, index, name): + def _getKnownAt(self, index, name): ''' Sequential arguments: index -- an index in self.issues name -- a well-known issue key such as 'body' that will be translated to an API-specific key. ''' + if self.issues is None: + raise RuntimeError("You cannot use _getKnownAt when there" + " no issues loaded (try getKnown).") key = self.getKnownKey(name) + if key is None: + raise RuntimeError("getKnownKey should not be None.") return self._getIssueValue(index, key) + def getKnown(self, issue, name): + ''' + Sequential arguments: + issue -- a full issue dict such as obtained via get_issue + name -- a well-known issue key such as 'body' that will be + translated to an API-specific key. + ''' + if issue is None: + raise ValueError("issue is None but must be an issue dict" + " such as obtained via get_issue.") + if not isinstance(issue, dict): + raise ValueError("issue must be an issue dict such as" + " obtained via get_issue.") + key = self.getKnownKey(name) + return issue[key] + def setCachesPath(self, path, flat=True): ''' @@ -653,7 +687,10 @@ class Repo: search_terms -- Search for each of these terms. Returns: - A 2-long tuple of: (results, error string (None if no error)). + A 2-long tuple of: (results, error_dict) where error_dict is + None if there is no error, otherwise contains a 'reason', + possibly a 'code' (standard website error code), and possibly a + 'url'. Raises: ValueError if query is not None and issue_no is not None. @@ -847,12 +884,14 @@ class Repo: msg = ex.reason if ex.code == 410: msg = ("The issue was apparently deleted ({})." - "".format(self.ERROR_410)) + "".format(ex.reason)) return ( None, { 'code': ex.code, 'reason': msg, + 'headers': ex.headers, + 'url': query_s, } ) # msg = str(ex) + ": " + self.rateLimitFmt.format(query_s) @@ -861,12 +900,15 @@ class Repo: { 'code': ex.code, 'reason': msg, + 'headers': ex.headers, + 'url': query_s, } ) response_s = decode_safe(response.read()) if not os.path.isdir(self.c_repo_path): os.makedirs(self.c_repo_path) - print(p+"Saving issues cache: {}".format(c_path)) + if not quiet: + print(p+"Saving issues cache: {}".format(c_path)) with open(c_path, "w") as outs: outs.write(response_s) result = json.loads(response_s) @@ -1055,6 +1097,7 @@ class Repo: 'code': ex.code, 'reason': ex.reason, 'headers': ex.headers, + 'url': url, } ) @@ -1372,14 +1415,14 @@ class Repo: " only one issue because a single" " issue has its own URL with only" " one result (not a list).") - results, msg = self._get_issues( + results, err = self._get_issues( options, query=query, issue_no=issue_no, search_terms=search_terms, ) self.issues = results - return results, msg + return results, err def get_match(self, mode, issue_no=None, match_all_labels_lower=[]): ''' diff --git a/utilities/pyissuesyncd b/utilities/pyissuesyncd index 10365bc..56eb60f 100755 --- a/utilities/pyissuesyncd +++ b/utilities/pyissuesyncd @@ -141,25 +141,86 @@ def get_issue(repo, options, issue_no): def start_issuesyncd(src_options, dst_options): # src_never_expire = src_options.get('never_expire') is True - non_issue = 1 - issue_no = non_issue - 1 - # while True: + max_issue = src_options.get('max_issue') + if max_issue is None: + max_issue = 1000 + error("WARNING: SRC_MAX_ISSUE set to default: {}" + "".format(max_issue)) + else: + max_issue = int(max_issue) + + issue_no = 0 # This is incremented to 1 before use. + # issue_no = max_issue - 1 # debug only src_res_code = 0 - # while issue_no < non_issue: # for debug only - while src_res_code != 404: + end_codes = [404, 403] + # while src_res_code not in end_codes: + while True: + # while (issue_no + 1) <= max_issue: # for debug only issue_no += 1 + if max_issue is not None: + if issue_no > max_issue: + error("* ending due to setting: --src-max-issue={}" + " (can also be set by SRC_MAX_ISSUE env var)" + "".format({})) + break src_repo = Repo(src_options) src_issue, err = get_issue(src_repo, src_options, issue_no) + deleted = False if err is not None: + ''' error("Error accessing source issue {}: {}: {}" "".format(issue_no, err.get('code'), err.get('reason'))) - continue - + ''' + src_res_code = err.get('code') + url = err.get('url') + if src_res_code in end_codes: + if src_res_code == 403: + error("#{}: stopping due to error {} ({})" + "".format(issue_no, err.get('code'), + err.get('reason'))) + # error(" * reason: {}".format()) + # error(" * headers: {}".format(err.get('headers'))) + break + elif src_res_code == 404: + error("#{}: Error 404: There is no {}" + " so the end of the issues may have been" + " reached.".format(issue_no, url)) + error(" * reason: {}".format(err.get('reason'))) + # error(" * headers: {}".format(err.get('headers'))) + continue + elif src_res_code == 410: + error("#{}: The issue seems to have been deleted." + "".format(issue_no)) + error(" * reason: {}".format(err.get('reason'))) + # error(" * headers: {}".format(err.get('headers'))) + deleted = False + # TODO: delete on dest (carefully!) + continue + else: + error("#{}: stopping due to error code {}" + "".format(issue_no, src_res_code)) + break + else: + error("#{}: continuing anyway but got error code {}" + "".format(issue_no, src_res_code)) + + + if src_issue is None: + if src_res_code not in end_codes: + error("#{}: Skipping due to unprocessed error {}" + "".format(issue_no, src_res_code)) + else: + error("#{}: Stopping due to unprocessed error {}" + "".format(issue_no, src_res_code)) + continue + else: + error("However, an issue was returned.") + error("Got issue {}".format(issue_no)) # Example: ~/.cache/pyissuesyncd/source/issues/1.json src_dt_parser = src_repo.options['default_dt_parser'] - src_created_dt_s = src_repo.getKnown(0, 'created_at') - src_updated_dt_s = src_repo.getKnown(0, 'updated_at') + src_created_dt_s = src_repo.getKnown(src_issue, 'created_at') + src_updated_dt_s = src_repo.getKnown(src_issue, 'updated_at') src_updated_dt = src_dt_parser(src_updated_dt_s) src_updated_ts = int(src_updated_dt.strftime("%s")) # ^ See