Browse Source

Get issues until you can't. Further improve reporting and documentation.

master
poikilos 3 years ago
parent
commit
4096b5cab1
  1. 63
      utilities/enissue.py
  2. 137
      utilities/pyissuesyncd

63
utilities/enissue.py

@ -400,8 +400,9 @@ class Repo:
structure (except for issues and other subdirectories
which mimic the web API routes). Only use this option
if you set a different single_cache for each repo!
api_id -- a key in the global apis dict which determines the
defaults for accessing the web API.
'''
self.ERROR_410 = "Error 410"
repo_url = options.get('repo_url')
debug("* using URL {}".format(repo_url))
if repo_url is None:
@ -413,6 +414,10 @@ class Repo:
self.api_id = options.get('api_id')
if urlParts[-2] == "repo.or.cz":
self.remote_user = "almikes@aol.com" # Wuzzy2
if self.api_id is not None:
if self.api_id != 'git_instaweb':
error("WARNING: URL has [] but self.api_id was {}"
"".format(urlParts[-2], self.api_id))
self.api_id = "git_instaweb"
# Such as https://repo.or.cz/minetest_treasurer.git
# - locally, git instaweb is controlled via:
@ -448,7 +453,10 @@ class Repo:
debug("* using specified API: {}".format(self.api_id))
if self.api_id is None:
self.api_id = "Gitea"
error(" * assuming API is {}".format(self.api_id))
if "github.com" in repo_url.lower():
error("WARNING: assuming Gitea but URL has github.com.")
error(" * assuming API is {} for {}"
"".format(self.api_id, ))
if self.api_id is None:
raise RuntimeError("api_id is not set")
api_meta = apis.get(self.api_id)
@ -566,11 +574,16 @@ class Repo:
name -- a well-known issue key such as 'body' that will be
translated to an API-specific key.
'''
key = self.options['known_issue_keys'].get(name)
known_issue_keys = self.options.get('known_issue_keys')
if known_issue_keys is None:
raise RuntimeError("known_issue_keys shouldn't be None.")
key = known_issue_keys.get(name)
if key is None:
raise KeyError("{} is not a well-known key in"
" known_issue_keys. Try _getIssueValue to"
" forcefully get a value.")
" forcefully get a value but only if you"
" ran load_issues first--otherwise use"
" getKnown.")
return key
def _getIssueValue(self, index, key):
@ -581,16 +594,37 @@ class Repo:
'''
return self.issues[index][key]
def getKnown(self, index, name):
def _getKnownAt(self, index, name):
'''
Sequential arguments:
index -- an index in self.issues
name -- a well-known issue key such as 'body' that will be
translated to an API-specific key.
'''
if self.issues is None:
raise RuntimeError("You cannot use _getKnownAt when there"
" no issues loaded (try getKnown).")
key = self.getKnownKey(name)
if key is None:
raise RuntimeError("getKnownKey should not be None.")
return self._getIssueValue(index, key)
def getKnown(self, issue, name):
'''
Sequential arguments:
issue -- a full issue dict such as obtained via get_issue
name -- a well-known issue key such as 'body' that will be
translated to an API-specific key.
'''
if issue is None:
raise ValueError("issue is None but must be an issue dict"
" such as obtained via get_issue.")
if not isinstance(issue, dict):
raise ValueError("issue must be an issue dict such as"
" obtained via get_issue.")
key = self.getKnownKey(name)
return issue[key]
def setCachesPath(self, path, flat=True):
'''
@ -653,7 +687,10 @@ class Repo:
search_terms -- Search for each of these terms.
Returns:
A 2-long tuple of: (results, error string (None if no error)).
A 2-long tuple of: (results, error_dict) where error_dict is
None if there is no error, otherwise contains a 'reason',
possibly a 'code' (standard website error code), and possibly a
'url'.
Raises:
ValueError if query is not None and issue_no is not None.
@ -847,12 +884,14 @@ class Repo:
msg = ex.reason
if ex.code == 410:
msg = ("The issue was apparently deleted ({})."
"".format(self.ERROR_410))
"".format(ex.reason))
return (
None,
{
'code': ex.code,
'reason': msg,
'headers': ex.headers,
'url': query_s,
}
)
# msg = str(ex) + ": " + self.rateLimitFmt.format(query_s)
@ -861,12 +900,15 @@ class Repo:
{
'code': ex.code,
'reason': msg,
'headers': ex.headers,
'url': query_s,
}
)
response_s = decode_safe(response.read())
if not os.path.isdir(self.c_repo_path):
os.makedirs(self.c_repo_path)
print(p+"Saving issues cache: {}".format(c_path))
if not quiet:
print(p+"Saving issues cache: {}".format(c_path))
with open(c_path, "w") as outs:
outs.write(response_s)
result = json.loads(response_s)
@ -1055,6 +1097,7 @@ class Repo:
'code': ex.code,
'reason': ex.reason,
'headers': ex.headers,
'url': url,
}
)
@ -1372,14 +1415,14 @@ class Repo:
" only one issue because a single"
" issue has its own URL with only"
" one result (not a list).")
results, msg = self._get_issues(
results, err = self._get_issues(
options,
query=query,
issue_no=issue_no,
search_terms=search_terms,
)
self.issues = results
return results, msg
return results, err
def get_match(self, mode, issue_no=None, match_all_labels_lower=[]):
'''

137
utilities/pyissuesyncd

@ -141,25 +141,86 @@ def get_issue(repo, options, issue_no):
def start_issuesyncd(src_options, dst_options):
# src_never_expire = src_options.get('never_expire') is True
non_issue = 1
issue_no = non_issue - 1
# while True:
max_issue = src_options.get('max_issue')
if max_issue is None:
max_issue = 1000
error("WARNING: SRC_MAX_ISSUE set to default: {}"
"".format(max_issue))
else:
max_issue = int(max_issue)
issue_no = 0 # This is incremented to 1 before use.
# issue_no = max_issue - 1 # debug only
src_res_code = 0
# while issue_no < non_issue: # for debug only
while src_res_code != 404:
end_codes = [404, 403]
# while src_res_code not in end_codes:
while True:
# while (issue_no + 1) <= max_issue: # for debug only
issue_no += 1
if max_issue is not None:
if issue_no > max_issue:
error("* ending due to setting: --src-max-issue={}"
" (can also be set by SRC_MAX_ISSUE env var)"
"".format({}))
break
src_repo = Repo(src_options)
src_issue, err = get_issue(src_repo, src_options, issue_no)
deleted = False
if err is not None:
'''
error("Error accessing source issue {}: {}: {}"
"".format(issue_no, err.get('code'),
err.get('reason')))
continue
'''
src_res_code = err.get('code')
url = err.get('url')
if src_res_code in end_codes:
if src_res_code == 403:
error("#{}: stopping due to error {} ({})"
"".format(issue_no, err.get('code'),
err.get('reason')))
# error(" * reason: {}".format())
# error(" * headers: {}".format(err.get('headers')))
break
elif src_res_code == 404:
error("#{}: Error 404: There is no {}"
" so the end of the issues may have been"
" reached.".format(issue_no, url))
error(" * reason: {}".format(err.get('reason')))
# error(" * headers: {}".format(err.get('headers')))
continue
elif src_res_code == 410:
error("#{}: The issue seems to have been deleted."
"".format(issue_no))
error(" * reason: {}".format(err.get('reason')))
# error(" * headers: {}".format(err.get('headers')))
deleted = False
# TODO: delete on dest (carefully!)
continue
else:
error("#{}: stopping due to error code {}"
"".format(issue_no, src_res_code))
break
else:
error("#{}: continuing anyway but got error code {}"
"".format(issue_no, src_res_code))
if src_issue is None:
if src_res_code not in end_codes:
error("#{}: Skipping due to unprocessed error {}"
"".format(issue_no, src_res_code))
else:
error("#{}: Stopping due to unprocessed error {}"
"".format(issue_no, src_res_code))
continue
else:
error("However, an issue was returned.")
error("Got issue {}".format(issue_no))
# Example: ~/.cache/pyissuesyncd/source/issues/1.json
src_dt_parser = src_repo.options['default_dt_parser']
src_created_dt_s = src_repo.getKnown(0, 'created_at')
src_updated_dt_s = src_repo.getKnown(0, 'updated_at')
src_created_dt_s = src_repo.getKnown(src_issue, 'created_at')
src_updated_dt_s = src_repo.getKnown(src_issue, 'updated_at')
src_updated_dt = src_dt_parser(src_updated_dt_s)
src_updated_ts = int(src_updated_dt.strftime("%s"))
# ^ See <https://stackoverflow.com/questions/19801727/convert-
@ -170,12 +231,42 @@ def start_issuesyncd(src_options, dst_options):
'''
# print(json.dumps(src_issue, indent=2))
enissue.set_verbose(True)
# enissue.set_verbose(True)
dst_repo = Repo(dst_options)
dst_issue, err = get_issue(dst_repo, dst_options, issue_no)
if err is not None:
dst_res_code = err.get('code')
url = err.get('url')
'''
if dst_res_code in end_codes:
if dst_res_code == 403:
error("* stopping due to: {}"
"".format(err.get('reason')))
break
elif dst_res_code == 404:
error("* 404: There is no issue {} at {} so the end"
" of the issues may have been reached."
"".format(issue_no, url))
error(" * reason: {}".format(err.get('reason')))
# error(" * headers: {}".format(err.get('headers')))
continue
elif dst_res_code == 410:
error(err.get('reason'))
error("* Issue {} seems to have been deleted."
"".format(issue_no))
continue
break
'''
if dst_issue is None:
# TODO: write the issue
continue
if dst_issue is None:
raise RuntimeError("dst_issue shouldn't be None when error"
" is None.")
dst_dt_parser = dst_repo.options['default_dt_parser']
dst_created_dt_s = dst_repo.getKnown(0, 'created_at')
dst_updated_dt_s = dst_repo.getKnown(0, 'updated_at')
dst_created_dt_s = dst_repo.getKnown(dst_issue, 'created_at')
dst_updated_dt_s = dst_repo.getKnown(dst_issue, 'updated_at')
dst_updated_dt = dst_dt_parser(dst_updated_dt_s)
dst_updated_ts = int(dst_updated_dt.strftime("%s"))
# ^ See <https://stackoverflow.com/questions/19801727/convert-
@ -185,7 +276,8 @@ def start_issuesyncd(src_options, dst_options):
"".format(issue_no, dst_updated_ts, dst_updated_dt))
'''
# Example: ~/.cache/pyissuesyncd/destination/issues/1.json
break # for debug only
# break # for debug only
continue # for debug only
# print(" * dst_issue:")
# print(json.dumps(dst_issue, indent=2))
@ -211,31 +303,36 @@ if __name__ == "__main__":
'repo_url': "https://github.com/poikilos/EnlivenMinetest",
'never_expire': True,
'quiet': True,
'api_id': "GitHub",
}
dst_options = {
'never_expire': True,
'quiet': True,
'api_id': "Gitea",
}
DST_REPO = os.environ.get("DST_REPO")
DST_REPO = os.environ.get('DST_REPO')
if DST_REPO is not None:
dst_options['repo_url'] = DST_REPO
del DST_REPO
SRC_REPO = os.environ.get("SRC_REPO")
SRC_REPO = os.environ.get('SRC_REPO')
if DST_REPO is not None:
src_options['repo_url'] = SRC_REPO
del SRC_REPO
SRC_CACHE = os.environ.get("SRC_CACHE")
SRC_CACHE = os.environ.get('SRC_CACHE')
if SRC_CACHE is None:
SRC_CACHE = os.path.join(data_directory, "source")
DST_CACHE = os.environ.get("DST_CACHE")
DST_CACHE = os.environ.get('DST_CACHE')
if DST_CACHE is None:
DST_CACHE = os.path.join(data_directory, "destination")
SRC_MAX_ISSUE = os.environ.get('SRC_MAX_ISSUE')
prev_arg = None
manual_args = ['--dst-repo', '--src-repo', '--src-cache', '--dst-cache']
manual_args = ['--dst-repo', '--src-repo', '--src-cache',
'--dst-cache', '--src-max-issue']
for arg in sys.argv[1:]:
if prev_arg == "--dst-repo":
@ -246,6 +343,8 @@ if __name__ == "__main__":
SRC_CACHE = arg
elif prev_arg == "--dst_cache":
DST_CACHE = arg
elif prev_arg == "--src-max-issue":
SRC_MAX_ISSUE = int(arg)
elif arg in manual_args:
pass
else:
@ -255,6 +354,8 @@ if __name__ == "__main__":
prev_arg = arg
src_options['single_cache'] = SRC_CACHE
src_options['max_issue'] = SRC_MAX_ISSUE
# ^ INFO: start_issuesyncd warns if SRC_MAX_ISSUE is None.
dst_options['single_cache'] = DST_CACHE

Loading…
Cancel
Save