Browse Source

Handle non-standard and standard website errors better.

master
poikilos 3 years ago
parent
commit
ae38adb88e
  1. 157
      utilities/enissue.py
  2. 57
      utilities/pyissuesyncd

157
utilities/enissue.py

@ -65,6 +65,13 @@ try:
from urllib.parse import quote
from urllib.parse import unquote
from urllib.error import HTTPError
try:
import requests
except ImportError:
sys.stderr.write("If you try to use a token, you must have the"
" requests package for python3 such as via:\n"
" sudo apt-get install python3-requests")
sys.stderr.flush()
except ImportError:
# Python 2
# See <https://docs.python.org/2/howto/urllib2.html>
@ -75,6 +82,13 @@ except ImportError:
from urllib import unquote
from urllib2 import HTTPError
# ^ urllib.error.HTTPError doesn't exist in Python 2
try:
import requests
except ImportError:
sys.stderr.write("If you try to use a token, you must have the"
" requests package for python2 such as via:\n"
" sudo apt-get install python-requests")
sys.stderr.flush()
@ -319,6 +333,19 @@ def toSubQueryValue(value):
return value
def to_error(results):
if hasattr(results, 'items'):
if results.get('documentation_url') is not None:
msg = results.get('message')
if msg is None:
msg = "There is nothing at the address."
return {
'code': 404, # Silly GitHub, I will force a 404 of course since you didn't.
'reason': msg,
}
return None
def usage():
print("")
print("Commands:")
@ -689,6 +716,10 @@ class Repo:
from the internet and re-save the cached data).
- 'never_expire': Never download unless 'refresh' is set
or there is no cache file.
- 'token': The token such as generated by the web GUI of
your repo management system mitigates rate limiting to a
level that is probably not noticeable (5000/hr according
to GitHub API docs 2021-11-30).
Keyword arguments:
@ -824,6 +855,7 @@ class Repo:
debug(" There was no custom query.")
self.last_query_s = query_s
# ^ Differs from self.last_src, which can be a file.
if os.path.isfile(c_path):
@ -836,6 +868,7 @@ class Repo:
is_fresh = filetime > cache_delta
max_cache_d_s = "{}".format(max_cache_delta)
expires_s = "{}".format(filetime + max_cache_delta)
self.last_src = c_path # Changed later if doesn't return
if never_expire:
max_cache_d_s = "never_expire"
expires_s = "never_expire"
@ -847,11 +880,19 @@ class Repo:
if not quiet:
print(p+"Cache expires: {}".format(expires_s))
with open(c_path) as json_file:
self.last_src = c_path
result = json.load(json_file)
max_issue = None
results = result
if results_key is not None:
err = to_error(result)
if err is not None:
error("WARNING: a website error was saved"
" as an issue, so it will be deleted:"
" \"{}\""
"".format(c_path))
result = None
os.remove(c_path)
err = None
elif results_key is not None:
if hasattr(results, results_key):
debug(" loaded result[{}]"
"".format(results_key))
@ -859,23 +900,25 @@ class Repo:
else:
error("WARNING: expected {} in dict"
"".format(results_key))
if hasattr(results, 'keys'):
debug(" issue not page: converting to list")
results = [result]
debug(p+"The cache file has"
" {} issue(s).".format(len(results)))
for issue in results:
issue_n = issue.get("number")
# debug("issue_n: {}".format(issue_n))
if issue_n is not None:
if (max_issue is None) or (issue_n > max_issue):
max_issue = issue_n
if issue_no is None:
# Only mention this if more than one issue
debug(" The highest cached issue# (this run)"
" is {}.".format(max_issue))
debug(" returning {} issue(s)".format(len(results)))
return results, None
if result is not None:
if hasattr(results, 'keys'):
debug(" issue not page: converting to list")
results = [result]
debug(p+"The cache file has"
" {} issue(s).".format(len(results)))
for issue in results:
issue_n = issue.get("number")
# debug("issue_n: {}".format(issue_n))
if issue_n is not None:
if (max_issue is None) or (issue_n > max_issue):
max_issue = issue_n
if issue_no is None:
# Only mention this if more than one issue
debug(" The highest cached issue# (this run)"
" is {}.".format(max_issue))
debug(" returning {} issue(s)".format(len(results)))
return results, None
# else load from URL (See os.remove() above for why)
else:
if refresh is True:
if not quiet:
@ -891,10 +934,26 @@ class Repo:
print(p+"There is no cache for \"{}\"".format(
c_path
))
self.last_src = query_s
# ^ If didn't return yet, the source is a URL.
req_is_complex = False
try:
debug(p+"Query URL (query_s): {}".format(query_s))
response = request.urlopen(query_s)
headers = {}
token = self.options.get('token')
if token is not None:
headers['Authorization'] = "token " + token
if len(headers) > 0:
req_is_complex = True
response = requests.get(query_s, headers=headers)
# response = req.urlopen(query_s)
res_text = response.text
# NOTE: In python3, response.content is in bytes
# (<https://stackoverflow.com/a/18810889/4541104>).
else:
response = request.urlopen(query_s)
res_text = decode_safe(response.read())
except HTTPError as ex:
msg = ex.reason
if ex.code == 410:
@ -919,14 +978,22 @@ class Repo:
'url': query_s,
}
)
response_s = decode_safe(response.read())
if not os.path.isdir(self.c_repo_path):
os.makedirs(self.c_repo_path)
if not quiet:
print(p+"Saving issues cache: {}".format(c_path))
# if not quiet:
# print(p+"Saving issues cache: {}".format(c_path))
# with open(c_path, "w") as outs:
# outs.write(res_text)
result = json.loads(res_text)
err = to_error(result)
if err is not None:
return None, err
with open(c_path, "w") as outs:
outs.write(response_s)
result = json.loads(response_s)
json.dump(result, outs, indent=2)
debug(p+"Wrote {}".format(c_path))
if results_key is not None:
result = result[results_key]
@ -1072,6 +1139,7 @@ class Repo:
"".format(url))
if os.path.isfile(c_path):
self.last_src = c_path # changed later if doesn't return
# See <https://stackoverflow.com/questions/7430928/
# comparing-dates-to-check-for-old-files>
max_cache_delta = timedelta(hours=12)
@ -1092,7 +1160,6 @@ class Repo:
+ max_cache_delta))
with open(c_path) as json_file:
try:
self.last_src = c_path
result = json.load(json_file)
except json.decoder.JSONDecodeError as ex:
error("")
@ -1103,20 +1170,42 @@ class Repo:
# Do NOT set err NOR set to a tuple (A result
# of None means it will load from the web
# below)!
err = to_error(result)
if err is not None:
result = None
error("Error: An error was saved as an issue"
" so it will be deleted: {}"
"".format(c_path))
os.remove(c_path)
if result is not None:
return result, None
self.last_src = url
try:
res = request.urlopen(url)
data_s = decode_safe(res.read())
headers = {}
token = self.options.get('token')
if token is not None:
headers['Authorization'] = "token " + token
if len(headers) > 0:
res = requests.get(query_s, headers=headers)
# res = req.urlopen(query_s)
res_text = response.text
# NOTE: In python3, response.content is in bytes
# (<https://stackoverflow.com/a/18810889/4541104>).
else:
res = request.urlopen(url)
res_text = decode_safe(res.read())
parent = os.path.split(c_path)[0]
if not os.path.isdir(parent):
os.makedirs(parent)
data = json.loads(data_s)
data = json.loads(res_text)
err = to_error(data)
if err is not None:
return None, err
# Only save if loads didn't raise an exception.
with open(c_path, 'w') as outs:
outs.write(data_s)
# outs.write(res_text)
json.dump(data, outs, indent=2)
debug(p+"Wrote {}".format(c_path))
except HTTPError as ex:
return (
@ -1128,7 +1217,9 @@ class Repo:
'url': url,
}
)
err = to_error(data)
if err is not None:
return None, err
return data, None
@ -1482,8 +1573,8 @@ class Repo:
"dump-issues.json")
with open(dumpPath, 'w') as outs:
json.dump(self.issues, outs, indent=2)
print("Error: dumped self.issues as {}"
"".format(dumpPath))
print("Error: dumped self.issues as {}"
"".format(dumpPath))
raise ex
for label in issue["labels"]:
self.label_ids.append(label["id"])

57
utilities/pyissuesyncd

@ -218,12 +218,19 @@ def start_issuesyncd(src_options, dst_options):
# error(" * headers: {}".format(err.get('headers')))
break
elif src_res_code == 404:
error("#{}: Error 404: There is no {}"
" so the end of the issues may have been"
" reached.".format(issue_no, url))
error(" * reason: {}".format(err.get('reason')))
# error(" * headers: {}".format(err.get('headers')))
continue
reason_msg = err.get('reason')
if reason_msg is None:
reason_msg = ""
if "deleted" in reason_msg:
error("#{}: Error 404: \"{}\""
"".format(issue_no, reason_msg))
continue
else:
error("#{}: Error 404: \"{}\" (Are there no more?)"
"".format(issue_no, reason_msg))
# error(" * reason: {}".format(err.get('reason')))
# error(" * headers: {}".format(err.get('headers')))
break
elif src_res_code == 410:
error("#{}: The issue seems to have been deleted."
"".format(issue_no))
@ -251,26 +258,40 @@ def start_issuesyncd(src_options, dst_options):
continue
else:
error("However, an issue was returned.")
elif src_issue is None:
raise RuntimeError("The issue was None but the error_dict was None")
got_fmt = "#{}: got (source not recorded)"
if src_repo.last_src is not None:
if src_repo.last_src.startswith("http:"):
if src_repo.last_src.startswith("http"):
got_fmt = "#{} downloaded"
elif os.path.isfile(src_repo.last_src):
got_fmt = "#{} loaded from cache"
got_fmt = "#{} loaded from cache file"
else:
got_fmt = "#{} got " + src_repo.last_src
error(got_fmt.format(issue_no))
# Example: ~/.cache/pyissuesyncd/source/issues/1.json
src_dt_parser = src_repo.options['default_dt_parser']
src_created_dt_s = src_repo.getKnown(src_issue, 'created_at')
src_updated_dt_s = src_repo.getKnown(src_issue, 'updated_at')
src_created_dt_s = None
try:
src_updated_dt = src_dt_parser(src_updated_dt_s)
except ValueError as ex:
error("Error in {}".format(src_repo.last_src))
error(ex)
error("If you changed repos and used the same cache dir,"
" manually delete the cache file or directory above.")
sys.exit(1)
src_updated_ts = int(src_updated_dt.strftime("%s"))
src_created_dt_s = src_repo.getKnown(src_issue, 'created_at')
except KeyError:
pass
src_updated_dt_s = None
try:
src_updated_dt_s = src_repo.getKnown(src_issue, 'updated_at')
try:
src_updated_dt = src_dt_parser(src_updated_dt_s)
except ValueError as ex:
error("Error in {}".format(src_repo.last_src))
error(ex)
error("If you changed repos and used the same cache dir,"
" manually delete the cache file or directory above.")
sys.exit(1)
src_updated_ts = int(src_updated_dt.strftime("%s"))
except KeyError as ex:
error("Missing key: {}".format(ex))
error("src_issue: {}"
"".format(json.dumps(src_issue, indent=2)))
# ^ See <https://stackoverflow.com/questions/19801727/convert-
# datetime-to-unix-timestamp-and-convert-it-back-in-python>
'''

Loading…
Cancel
Save