From ae38adb88e4727665f784555ac65d5da6b5008e7 Mon Sep 17 00:00:00 2001 From: poikilos <7557867+poikilos@users.noreply.github.com> Date: Tue, 30 Nov 2021 23:03:46 -0500 Subject: [PATCH] Handle non-standard and standard website errors better. --- utilities/enissue.py | 157 ++++++++++++++++++++++++++++++++--------- utilities/pyissuesyncd | 57 ++++++++++----- 2 files changed, 163 insertions(+), 51 deletions(-) diff --git a/utilities/enissue.py b/utilities/enissue.py index 728a15c..e26ba18 100755 --- a/utilities/enissue.py +++ b/utilities/enissue.py @@ -65,6 +65,13 @@ try: from urllib.parse import quote from urllib.parse import unquote from urllib.error import HTTPError + try: + import requests + except ImportError: + sys.stderr.write("If you try to use a token, you must have the" + " requests package for python3 such as via:\n" + " sudo apt-get install python3-requests") + sys.stderr.flush() except ImportError: # Python 2 # See @@ -75,6 +82,13 @@ except ImportError: from urllib import unquote from urllib2 import HTTPError # ^ urllib.error.HTTPError doesn't exist in Python 2 + try: + import requests + except ImportError: + sys.stderr.write("If you try to use a token, you must have the" + " requests package for python2 such as via:\n" + " sudo apt-get install python-requests") + sys.stderr.flush() @@ -319,6 +333,19 @@ def toSubQueryValue(value): return value +def to_error(results): + if hasattr(results, 'items'): + if results.get('documentation_url') is not None: + msg = results.get('message') + if msg is None: + msg = "There is nothing at the address." + return { + 'code': 404, # Silly GitHub, I will force a 404 of course since you didn't. + 'reason': msg, + } + return None + + def usage(): print("") print("Commands:") @@ -689,6 +716,10 @@ class Repo: from the internet and re-save the cached data). - 'never_expire': Never download unless 'refresh' is set or there is no cache file. + - 'token': The token such as generated by the web GUI of + your repo management system mitigates rate limiting to a + level that is probably not noticeable (5000/hr according + to GitHub API docs 2021-11-30). Keyword arguments: @@ -824,6 +855,7 @@ class Repo: debug(" There was no custom query.") self.last_query_s = query_s + # ^ Differs from self.last_src, which can be a file. if os.path.isfile(c_path): @@ -836,6 +868,7 @@ class Repo: is_fresh = filetime > cache_delta max_cache_d_s = "{}".format(max_cache_delta) expires_s = "{}".format(filetime + max_cache_delta) + self.last_src = c_path # Changed later if doesn't return if never_expire: max_cache_d_s = "never_expire" expires_s = "never_expire" @@ -847,11 +880,19 @@ class Repo: if not quiet: print(p+"Cache expires: {}".format(expires_s)) with open(c_path) as json_file: - self.last_src = c_path result = json.load(json_file) max_issue = None results = result - if results_key is not None: + err = to_error(result) + if err is not None: + error("WARNING: a website error was saved" + " as an issue, so it will be deleted:" + " \"{}\"" + "".format(c_path)) + result = None + os.remove(c_path) + err = None + elif results_key is not None: if hasattr(results, results_key): debug(" loaded result[{}]" "".format(results_key)) @@ -859,23 +900,25 @@ class Repo: else: error("WARNING: expected {} in dict" "".format(results_key)) - if hasattr(results, 'keys'): - debug(" issue not page: converting to list") - results = [result] - debug(p+"The cache file has" - " {} issue(s).".format(len(results))) - for issue in results: - issue_n = issue.get("number") - # debug("issue_n: {}".format(issue_n)) - if issue_n is not None: - if (max_issue is None) or (issue_n > max_issue): - max_issue = issue_n - if issue_no is None: - # Only mention this if more than one issue - debug(" The highest cached issue# (this run)" - " is {}.".format(max_issue)) - debug(" returning {} issue(s)".format(len(results))) - return results, None + if result is not None: + if hasattr(results, 'keys'): + debug(" issue not page: converting to list") + results = [result] + debug(p+"The cache file has" + " {} issue(s).".format(len(results))) + for issue in results: + issue_n = issue.get("number") + # debug("issue_n: {}".format(issue_n)) + if issue_n is not None: + if (max_issue is None) or (issue_n > max_issue): + max_issue = issue_n + if issue_no is None: + # Only mention this if more than one issue + debug(" The highest cached issue# (this run)" + " is {}.".format(max_issue)) + debug(" returning {} issue(s)".format(len(results))) + return results, None + # else load from URL (See os.remove() above for why) else: if refresh is True: if not quiet: @@ -891,10 +934,26 @@ class Repo: print(p+"There is no cache for \"{}\"".format( c_path )) + self.last_src = query_s + # ^ If didn't return yet, the source is a URL. + req_is_complex = False try: debug(p+"Query URL (query_s): {}".format(query_s)) - response = request.urlopen(query_s) + headers = {} + token = self.options.get('token') + if token is not None: + headers['Authorization'] = "token " + token + if len(headers) > 0: + req_is_complex = True + response = requests.get(query_s, headers=headers) + # response = req.urlopen(query_s) + res_text = response.text + # NOTE: In python3, response.content is in bytes + # (). + else: + response = request.urlopen(query_s) + res_text = decode_safe(response.read()) except HTTPError as ex: msg = ex.reason if ex.code == 410: @@ -919,14 +978,22 @@ class Repo: 'url': query_s, } ) - response_s = decode_safe(response.read()) + + + if not os.path.isdir(self.c_repo_path): os.makedirs(self.c_repo_path) - if not quiet: - print(p+"Saving issues cache: {}".format(c_path)) + # if not quiet: + # print(p+"Saving issues cache: {}".format(c_path)) + # with open(c_path, "w") as outs: + # outs.write(res_text) + result = json.loads(res_text) + err = to_error(result) + if err is not None: + return None, err with open(c_path, "w") as outs: - outs.write(response_s) - result = json.loads(response_s) + json.dump(result, outs, indent=2) + debug(p+"Wrote {}".format(c_path)) if results_key is not None: result = result[results_key] @@ -1072,6 +1139,7 @@ class Repo: "".format(url)) if os.path.isfile(c_path): + self.last_src = c_path # changed later if doesn't return # See max_cache_delta = timedelta(hours=12) @@ -1092,7 +1160,6 @@ class Repo: + max_cache_delta)) with open(c_path) as json_file: try: - self.last_src = c_path result = json.load(json_file) except json.decoder.JSONDecodeError as ex: error("") @@ -1103,20 +1170,42 @@ class Repo: # Do NOT set err NOR set to a tuple (A result # of None means it will load from the web # below)! + err = to_error(result) + if err is not None: + result = None + error("Error: An error was saved as an issue" + " so it will be deleted: {}" + "".format(c_path)) + os.remove(c_path) if result is not None: return result, None self.last_src = url try: - res = request.urlopen(url) - data_s = decode_safe(res.read()) + headers = {} + token = self.options.get('token') + if token is not None: + headers['Authorization'] = "token " + token + if len(headers) > 0: + res = requests.get(query_s, headers=headers) + # res = req.urlopen(query_s) + res_text = response.text + # NOTE: In python3, response.content is in bytes + # (). + else: + res = request.urlopen(url) + res_text = decode_safe(res.read()) parent = os.path.split(c_path)[0] if not os.path.isdir(parent): os.makedirs(parent) - data = json.loads(data_s) + data = json.loads(res_text) + err = to_error(data) + if err is not None: + return None, err # Only save if loads didn't raise an exception. with open(c_path, 'w') as outs: - outs.write(data_s) + # outs.write(res_text) + json.dump(data, outs, indent=2) debug(p+"Wrote {}".format(c_path)) except HTTPError as ex: return ( @@ -1128,7 +1217,9 @@ class Repo: 'url': url, } ) - + err = to_error(data) + if err is not None: + return None, err return data, None @@ -1482,8 +1573,8 @@ class Repo: "dump-issues.json") with open(dumpPath, 'w') as outs: json.dump(self.issues, outs, indent=2) - print("Error: dumped self.issues as {}" - "".format(dumpPath)) + print("Error: dumped self.issues as {}" + "".format(dumpPath)) raise ex for label in issue["labels"]: self.label_ids.append(label["id"]) diff --git a/utilities/pyissuesyncd b/utilities/pyissuesyncd index d248da7..d2ebcad 100755 --- a/utilities/pyissuesyncd +++ b/utilities/pyissuesyncd @@ -218,12 +218,19 @@ def start_issuesyncd(src_options, dst_options): # error(" * headers: {}".format(err.get('headers'))) break elif src_res_code == 404: - error("#{}: Error 404: There is no {}" - " so the end of the issues may have been" - " reached.".format(issue_no, url)) - error(" * reason: {}".format(err.get('reason'))) - # error(" * headers: {}".format(err.get('headers'))) - continue + reason_msg = err.get('reason') + if reason_msg is None: + reason_msg = "" + if "deleted" in reason_msg: + error("#{}: Error 404: \"{}\"" + "".format(issue_no, reason_msg)) + continue + else: + error("#{}: Error 404: \"{}\" (Are there no more?)" + "".format(issue_no, reason_msg)) + # error(" * reason: {}".format(err.get('reason'))) + # error(" * headers: {}".format(err.get('headers'))) + break elif src_res_code == 410: error("#{}: The issue seems to have been deleted." "".format(issue_no)) @@ -251,26 +258,40 @@ def start_issuesyncd(src_options, dst_options): continue else: error("However, an issue was returned.") + elif src_issue is None: + raise RuntimeError("The issue was None but the error_dict was None") got_fmt = "#{}: got (source not recorded)" if src_repo.last_src is not None: - if src_repo.last_src.startswith("http:"): + if src_repo.last_src.startswith("http"): got_fmt = "#{} downloaded" elif os.path.isfile(src_repo.last_src): - got_fmt = "#{} loaded from cache" + got_fmt = "#{} loaded from cache file" + else: + got_fmt = "#{} got " + src_repo.last_src error(got_fmt.format(issue_no)) # Example: ~/.cache/pyissuesyncd/source/issues/1.json src_dt_parser = src_repo.options['default_dt_parser'] - src_created_dt_s = src_repo.getKnown(src_issue, 'created_at') - src_updated_dt_s = src_repo.getKnown(src_issue, 'updated_at') + src_created_dt_s = None try: - src_updated_dt = src_dt_parser(src_updated_dt_s) - except ValueError as ex: - error("Error in {}".format(src_repo.last_src)) - error(ex) - error("If you changed repos and used the same cache dir," - " manually delete the cache file or directory above.") - sys.exit(1) - src_updated_ts = int(src_updated_dt.strftime("%s")) + src_created_dt_s = src_repo.getKnown(src_issue, 'created_at') + except KeyError: + pass + src_updated_dt_s = None + try: + src_updated_dt_s = src_repo.getKnown(src_issue, 'updated_at') + try: + src_updated_dt = src_dt_parser(src_updated_dt_s) + except ValueError as ex: + error("Error in {}".format(src_repo.last_src)) + error(ex) + error("If you changed repos and used the same cache dir," + " manually delete the cache file or directory above.") + sys.exit(1) + src_updated_ts = int(src_updated_dt.strftime("%s")) + except KeyError as ex: + error("Missing key: {}".format(ex)) + error("src_issue: {}" + "".format(json.dumps(src_issue, indent=2))) # ^ See '''