Handle non-standard and standard website errors better.

4 years ago · ae38adb88e
2 changed files with 163 additions and 51 deletions
--- a/utilities/enissue.py
+++ b/utilities/enissue.py
@ -65,6 +65,13 @@ try:
    from urllib.parse import quote
    from urllib.parse import unquote
    from urllib.error import HTTPError
    try:
        import requests
    except ImportError:
        sys.stderr.write("If you try to use a token, you must have the"
                         " requests package for python3 such as via:\n"
                         "    sudo apt-get install python3-requests")
        sys.stderr.flush()
 except ImportError:
    # Python 2
    # See <https://docs.python.org/2/howto/urllib2.html>
@ -75,6 +82,13 @@ except ImportError:
    from urllib import unquote
    from urllib2 import HTTPError
    # ^ urllib.error.HTTPError doesn't exist in Python 2
    try:
        import requests
    except ImportError:
        sys.stderr.write("If you try to use a token, you must have the"
                         " requests package for python2 such as via:\n"
                         "    sudo apt-get install python-requests")
        sys.stderr.flush()
@ -319,6 +333,19 @@ def toSubQueryValue(value):
    return value
 def to_error(results):
    if hasattr(results, 'items'):
        if results.get('documentation_url') is not None:
            msg = results.get('message')
            if msg is None:
                msg = "There is nothing at the address."
            return {
                'code': 404,  # Silly GitHub, I will force a 404 of course since you didn't.
                'reason': msg,
            }
    return None
 def usage():
    print("")
    print("Commands:")
@ -689,6 +716,10 @@ class Repo:
              from the internet and re-save the cached data).
            - 'never_expire': Never download unless 'refresh' is set
              or there is no cache file.
            - 'token': The token such as generated by the web GUI of
              your repo management system mitigates rate limiting to a
              level that is probably not noticeable (5000/hr according
              to GitHub API docs 2021-11-30).
        Keyword arguments:
@ -824,6 +855,7 @@ class Repo:
            debug("  There was no custom query.")
        self.last_query_s = query_s
        # ^ Differs from self.last_src, which can be a file.
        if os.path.isfile(c_path):
@ -836,6 +868,7 @@ class Repo:
            is_fresh = filetime > cache_delta
            max_cache_d_s = "{}".format(max_cache_delta)
            expires_s = "{}".format(filetime + max_cache_delta)
            self.last_src = c_path  # Changed later if doesn't return
            if never_expire:
                max_cache_d_s = "never_expire"
                expires_s = "never_expire"
@ -847,11 +880,19 @@ class Repo:
                if not quiet:
                    print(p+"Cache expires: {}".format(expires_s))
                with open(c_path) as json_file:
                    self.last_src = c_path
                    result = json.load(json_file)
                max_issue = None
                results = result
-                if results_key is not None:
+                err = to_error(result)
                if err is not None:
                    error("WARNING: a website error was saved"
                          " as an issue, so it will be deleted:"
                          " \"{}\""
                          "".format(c_path))
                    result = None
                    os.remove(c_path)
                    err = None
                elif results_key is not None:
                    if hasattr(results, results_key):
                        debug("  loaded result[{}]"
                              "".format(results_key))
@ -859,6 +900,7 @@ class Repo:
                    else:
                        error("WARNING: expected {} in dict"
                              "".format(results_key))
                if result is not None:
                    if hasattr(results, 'keys'):
                        debug("  issue not page: converting to list")
                        results = [result]
@ -876,6 +918,7 @@ class Repo:
                              " is {}.".format(max_issue))
                    debug("  returning {} issue(s)".format(len(results)))
                    return results, None
                # else load from URL (See os.remove() above for why)
            else:
                if refresh is True:
                    if not quiet:
@ -891,10 +934,26 @@ class Repo:
                print(p+"There is no cache for \"{}\"".format(
                    c_path
                ))
        self.last_src = query_s
        # ^ If didn't return yet, the source is a URL.
        req_is_complex = False
        try:
            debug(p+"Query URL (query_s): {}".format(query_s))
            headers = {}
            token = self.options.get('token')
            if token is not None:
                headers['Authorization'] = "token " + token
            if len(headers) > 0:
                req_is_complex = True
                response = requests.get(query_s, headers=headers)
                # response = req.urlopen(query_s)
                res_text = response.text
                # NOTE: In python3, response.content is in bytes
                # (<https://stackoverflow.com/a/18810889/4541104>).
            else:
                response = request.urlopen(query_s)
                res_text = decode_safe(response.read())
        except HTTPError as ex:
            msg = ex.reason
            if ex.code == 410:
@ -919,14 +978,22 @@ class Repo:
                    'url': query_s,
                }
            )
-        response_s = decode_safe(response.read())
+
        if not os.path.isdir(self.c_repo_path):
            os.makedirs(self.c_repo_path)
-        if not quiet:
+        # if not quiet:
-            print(p+"Saving issues cache: {}".format(c_path))
+        #     print(p+"Saving issues cache: {}".format(c_path))
        # with open(c_path, "w") as outs:
        #     outs.write(res_text)
        result = json.loads(res_text)
        err = to_error(result)
        if err is not None:
            return None, err
        with open(c_path, "w") as outs:
-            outs.write(response_s)
+            json.dump(result, outs, indent=2)
-        result = json.loads(response_s)
+            debug(p+"Wrote {}".format(c_path))
        if results_key is not None:
            result = result[results_key]
@ -1072,6 +1139,7 @@ class Repo:
                                      "".format(url))
        if os.path.isfile(c_path):
            self.last_src = c_path  # changed later if doesn't return
            # See <https://stackoverflow.com/questions/7430928/
            # comparing-dates-to-check-for-old-files>
            max_cache_delta = timedelta(hours=12)
@ -1092,7 +1160,6 @@ class Repo:
                                                   + max_cache_delta))
                with open(c_path) as json_file:
                    try:
                        self.last_src = c_path
                        result = json.load(json_file)
                    except json.decoder.JSONDecodeError as ex:
                        error("")
@ -1103,20 +1170,42 @@ class Repo:
                        # Do NOT set err NOR set to a tuple (A result
                        # of None means it will load from the web
                        # below)!
                err = to_error(result)
                if err is not None:
                    result = None
                    error("Error: An error was saved as an issue"
                          " so it will be deleted: {}"
                          "".format(c_path))
                    os.remove(c_path)
        if result is not None:
            return result, None
        self.last_src = url
        try:
            headers = {}
            token = self.options.get('token')
            if token is not None:
                headers['Authorization'] = "token " + token
            if len(headers) > 0:
                res = requests.get(query_s, headers=headers)
                # res = req.urlopen(query_s)
                res_text = response.text
                # NOTE: In python3, response.content is in bytes
                # (<https://stackoverflow.com/a/18810889/4541104>).
            else:
                res = request.urlopen(url)
-            data_s = decode_safe(res.read())
+                res_text = decode_safe(res.read())
            parent = os.path.split(c_path)[0]
            if not os.path.isdir(parent):
                os.makedirs(parent)
-            data = json.loads(data_s)
+            data = json.loads(res_text)
            err = to_error(data)
            if err is not None:
                return None, err
            # Only save if loads didn't raise an exception.
            with open(c_path, 'w') as outs:
-                outs.write(data_s)
+                # outs.write(res_text)
                json.dump(data, outs, indent=2)
                debug(p+"Wrote {}".format(c_path))
        except HTTPError as ex:
            return (
@ -1128,7 +1217,9 @@ class Repo:
                    'url': url,
                }
            )
-
+        err = to_error(data)
        if err is not None:
            return None, err
        return data, None
--- a/utilities/pyissuesyncd
+++ b/utilities/pyissuesyncd
@ -218,12 +218,19 @@ def start_issuesyncd(src_options, dst_options):
                    # error("  * headers: {}".format(err.get('headers')))
                    break
                elif src_res_code == 404:
-                    error("#{}: Error 404: There is no {}"
+                    reason_msg = err.get('reason')
-                          " so the end of the issues may have been"
+                    if reason_msg is None:
-                          " reached.".format(issue_no, url))
+                        reason_msg = ""
-                    error("  * reason: {}".format(err.get('reason')))
+                    if "deleted" in reason_msg:
-                    # error("  * headers: {}".format(err.get('headers')))
+                        error("#{}: Error 404: \"{}\""
                              "".format(issue_no, reason_msg))
                        continue
                    else:
                        error("#{}: Error 404: \"{}\" (Are there no more?)"
                              "".format(issue_no, reason_msg))
                        # error("  * reason: {}".format(err.get('reason')))
                        # error("  * headers: {}".format(err.get('headers')))
                        break
                elif src_res_code == 410:
                    error("#{}: The issue seems to have been deleted."
                          "".format(issue_no))
@ -251,16 +258,26 @@ def start_issuesyncd(src_options, dst_options):
                continue
            else:
                error("However, an issue was returned.")
        elif src_issue is None:
            raise RuntimeError("The issue was None but the error_dict was None")
        got_fmt = "#{}: got (source not recorded)"
        if src_repo.last_src is not None:
-            if src_repo.last_src.startswith("http:"):
+            if src_repo.last_src.startswith("http"):
                got_fmt = "#{} downloaded"
            elif os.path.isfile(src_repo.last_src):
-                got_fmt = "#{} loaded from cache"
+                got_fmt = "#{} loaded from cache file"
            else:
                got_fmt = "#{} got " + src_repo.last_src
        error(got_fmt.format(issue_no))
        # Example: ~/.cache/pyissuesyncd/source/issues/1.json
        src_dt_parser = src_repo.options['default_dt_parser']
        src_created_dt_s = None
        try:
            src_created_dt_s = src_repo.getKnown(src_issue, 'created_at')
        except KeyError:
            pass
        src_updated_dt_s = None
        try:
            src_updated_dt_s = src_repo.getKnown(src_issue, 'updated_at')
            try:
                src_updated_dt = src_dt_parser(src_updated_dt_s)
@ -271,6 +288,10 @@ def start_issuesyncd(src_options, dst_options):
                      " manually delete the cache file or directory above.")
                sys.exit(1)
            src_updated_ts = int(src_updated_dt.strftime("%s"))
        except KeyError as ex:
            error("Missing key: {}".format(ex))
            error("src_issue: {}"
                  "".format(json.dumps(src_issue, indent=2)))
        # ^ See <https://stackoverflow.com/questions/19801727/convert-
        #   datetime-to-unix-timestamp-and-convert-it-back-in-python>
        '''