Browse Source

Get issues until you can't. Further improve reporting and documentation.

master
poikilos 3 years ago
parent
commit
4096b5cab1
  1. 63
      utilities/enissue.py
  2. 137
      utilities/pyissuesyncd

63
utilities/enissue.py

@ -400,8 +400,9 @@ class Repo:
structure (except for issues and other subdirectories structure (except for issues and other subdirectories
which mimic the web API routes). Only use this option which mimic the web API routes). Only use this option
if you set a different single_cache for each repo! if you set a different single_cache for each repo!
api_id -- a key in the global apis dict which determines the
defaults for accessing the web API.
''' '''
self.ERROR_410 = "Error 410"
repo_url = options.get('repo_url') repo_url = options.get('repo_url')
debug("* using URL {}".format(repo_url)) debug("* using URL {}".format(repo_url))
if repo_url is None: if repo_url is None:
@ -413,6 +414,10 @@ class Repo:
self.api_id = options.get('api_id') self.api_id = options.get('api_id')
if urlParts[-2] == "repo.or.cz": if urlParts[-2] == "repo.or.cz":
self.remote_user = "almikes@aol.com" # Wuzzy2 self.remote_user = "almikes@aol.com" # Wuzzy2
if self.api_id is not None:
if self.api_id != 'git_instaweb':
error("WARNING: URL has [] but self.api_id was {}"
"".format(urlParts[-2], self.api_id))
self.api_id = "git_instaweb" self.api_id = "git_instaweb"
# Such as https://repo.or.cz/minetest_treasurer.git # Such as https://repo.or.cz/minetest_treasurer.git
# - locally, git instaweb is controlled via: # - locally, git instaweb is controlled via:
@ -448,7 +453,10 @@ class Repo:
debug("* using specified API: {}".format(self.api_id)) debug("* using specified API: {}".format(self.api_id))
if self.api_id is None: if self.api_id is None:
self.api_id = "Gitea" self.api_id = "Gitea"
error(" * assuming API is {}".format(self.api_id)) if "github.com" in repo_url.lower():
error("WARNING: assuming Gitea but URL has github.com.")
error(" * assuming API is {} for {}"
"".format(self.api_id, ))
if self.api_id is None: if self.api_id is None:
raise RuntimeError("api_id is not set") raise RuntimeError("api_id is not set")
api_meta = apis.get(self.api_id) api_meta = apis.get(self.api_id)
@ -566,11 +574,16 @@ class Repo:
name -- a well-known issue key such as 'body' that will be name -- a well-known issue key such as 'body' that will be
translated to an API-specific key. translated to an API-specific key.
''' '''
key = self.options['known_issue_keys'].get(name) known_issue_keys = self.options.get('known_issue_keys')
if known_issue_keys is None:
raise RuntimeError("known_issue_keys shouldn't be None.")
key = known_issue_keys.get(name)
if key is None: if key is None:
raise KeyError("{} is not a well-known key in" raise KeyError("{} is not a well-known key in"
" known_issue_keys. Try _getIssueValue to" " known_issue_keys. Try _getIssueValue to"
" forcefully get a value.") " forcefully get a value but only if you"
" ran load_issues first--otherwise use"
" getKnown.")
return key return key
def _getIssueValue(self, index, key): def _getIssueValue(self, index, key):
@ -581,16 +594,37 @@ class Repo:
''' '''
return self.issues[index][key] return self.issues[index][key]
def getKnown(self, index, name): def _getKnownAt(self, index, name):
''' '''
Sequential arguments: Sequential arguments:
index -- an index in self.issues index -- an index in self.issues
name -- a well-known issue key such as 'body' that will be name -- a well-known issue key such as 'body' that will be
translated to an API-specific key. translated to an API-specific key.
''' '''
if self.issues is None:
raise RuntimeError("You cannot use _getKnownAt when there"
" no issues loaded (try getKnown).")
key = self.getKnownKey(name) key = self.getKnownKey(name)
if key is None:
raise RuntimeError("getKnownKey should not be None.")
return self._getIssueValue(index, key) return self._getIssueValue(index, key)
def getKnown(self, issue, name):
'''
Sequential arguments:
issue -- a full issue dict such as obtained via get_issue
name -- a well-known issue key such as 'body' that will be
translated to an API-specific key.
'''
if issue is None:
raise ValueError("issue is None but must be an issue dict"
" such as obtained via get_issue.")
if not isinstance(issue, dict):
raise ValueError("issue must be an issue dict such as"
" obtained via get_issue.")
key = self.getKnownKey(name)
return issue[key]
def setCachesPath(self, path, flat=True): def setCachesPath(self, path, flat=True):
''' '''
@ -653,7 +687,10 @@ class Repo:
search_terms -- Search for each of these terms. search_terms -- Search for each of these terms.
Returns: Returns:
A 2-long tuple of: (results, error string (None if no error)). A 2-long tuple of: (results, error_dict) where error_dict is
None if there is no error, otherwise contains a 'reason',
possibly a 'code' (standard website error code), and possibly a
'url'.
Raises: Raises:
ValueError if query is not None and issue_no is not None. ValueError if query is not None and issue_no is not None.
@ -847,12 +884,14 @@ class Repo:
msg = ex.reason msg = ex.reason
if ex.code == 410: if ex.code == 410:
msg = ("The issue was apparently deleted ({})." msg = ("The issue was apparently deleted ({})."
"".format(self.ERROR_410)) "".format(ex.reason))
return ( return (
None, None,
{ {
'code': ex.code, 'code': ex.code,
'reason': msg, 'reason': msg,
'headers': ex.headers,
'url': query_s,
} }
) )
# msg = str(ex) + ": " + self.rateLimitFmt.format(query_s) # msg = str(ex) + ": " + self.rateLimitFmt.format(query_s)
@ -861,12 +900,15 @@ class Repo:
{ {
'code': ex.code, 'code': ex.code,
'reason': msg, 'reason': msg,
'headers': ex.headers,
'url': query_s,
} }
) )
response_s = decode_safe(response.read()) response_s = decode_safe(response.read())
if not os.path.isdir(self.c_repo_path): if not os.path.isdir(self.c_repo_path):
os.makedirs(self.c_repo_path) os.makedirs(self.c_repo_path)
print(p+"Saving issues cache: {}".format(c_path)) if not quiet:
print(p+"Saving issues cache: {}".format(c_path))
with open(c_path, "w") as outs: with open(c_path, "w") as outs:
outs.write(response_s) outs.write(response_s)
result = json.loads(response_s) result = json.loads(response_s)
@ -1055,6 +1097,7 @@ class Repo:
'code': ex.code, 'code': ex.code,
'reason': ex.reason, 'reason': ex.reason,
'headers': ex.headers, 'headers': ex.headers,
'url': url,
} }
) )
@ -1372,14 +1415,14 @@ class Repo:
" only one issue because a single" " only one issue because a single"
" issue has its own URL with only" " issue has its own URL with only"
" one result (not a list).") " one result (not a list).")
results, msg = self._get_issues( results, err = self._get_issues(
options, options,
query=query, query=query,
issue_no=issue_no, issue_no=issue_no,
search_terms=search_terms, search_terms=search_terms,
) )
self.issues = results self.issues = results
return results, msg return results, err
def get_match(self, mode, issue_no=None, match_all_labels_lower=[]): def get_match(self, mode, issue_no=None, match_all_labels_lower=[]):
''' '''

137
utilities/pyissuesyncd

@ -141,25 +141,86 @@ def get_issue(repo, options, issue_no):
def start_issuesyncd(src_options, dst_options): def start_issuesyncd(src_options, dst_options):
# src_never_expire = src_options.get('never_expire') is True # src_never_expire = src_options.get('never_expire') is True
non_issue = 1 max_issue = src_options.get('max_issue')
issue_no = non_issue - 1 if max_issue is None:
# while True: max_issue = 1000
error("WARNING: SRC_MAX_ISSUE set to default: {}"
"".format(max_issue))
else:
max_issue = int(max_issue)
issue_no = 0 # This is incremented to 1 before use.
# issue_no = max_issue - 1 # debug only
src_res_code = 0 src_res_code = 0
# while issue_no < non_issue: # for debug only end_codes = [404, 403]
while src_res_code != 404: # while src_res_code not in end_codes:
while True:
# while (issue_no + 1) <= max_issue: # for debug only
issue_no += 1 issue_no += 1
if max_issue is not None:
if issue_no > max_issue:
error("* ending due to setting: --src-max-issue={}"
" (can also be set by SRC_MAX_ISSUE env var)"
"".format({}))
break
src_repo = Repo(src_options) src_repo = Repo(src_options)
src_issue, err = get_issue(src_repo, src_options, issue_no) src_issue, err = get_issue(src_repo, src_options, issue_no)
deleted = False
if err is not None: if err is not None:
'''
error("Error accessing source issue {}: {}: {}" error("Error accessing source issue {}: {}: {}"
"".format(issue_no, err.get('code'), "".format(issue_no, err.get('code'),
err.get('reason'))) err.get('reason')))
continue '''
src_res_code = err.get('code')
url = err.get('url')
if src_res_code in end_codes:
if src_res_code == 403:
error("#{}: stopping due to error {} ({})"
"".format(issue_no, err.get('code'),
err.get('reason')))
# error(" * reason: {}".format())
# error(" * headers: {}".format(err.get('headers')))
break
elif src_res_code == 404:
error("#{}: Error 404: There is no {}"
" so the end of the issues may have been"
" reached.".format(issue_no, url))
error(" * reason: {}".format(err.get('reason')))
# error(" * headers: {}".format(err.get('headers')))
continue
elif src_res_code == 410:
error("#{}: The issue seems to have been deleted."
"".format(issue_no))
error(" * reason: {}".format(err.get('reason')))
# error(" * headers: {}".format(err.get('headers')))
deleted = False
# TODO: delete on dest (carefully!)
continue
else:
error("#{}: stopping due to error code {}"
"".format(issue_no, src_res_code))
break
else:
error("#{}: continuing anyway but got error code {}"
"".format(issue_no, src_res_code))
if src_issue is None:
if src_res_code not in end_codes:
error("#{}: Skipping due to unprocessed error {}"
"".format(issue_no, src_res_code))
else:
error("#{}: Stopping due to unprocessed error {}"
"".format(issue_no, src_res_code))
continue
else:
error("However, an issue was returned.")
error("Got issue {}".format(issue_no))
# Example: ~/.cache/pyissuesyncd/source/issues/1.json # Example: ~/.cache/pyissuesyncd/source/issues/1.json
src_dt_parser = src_repo.options['default_dt_parser'] src_dt_parser = src_repo.options['default_dt_parser']
src_created_dt_s = src_repo.getKnown(0, 'created_at') src_created_dt_s = src_repo.getKnown(src_issue, 'created_at')
src_updated_dt_s = src_repo.getKnown(0, 'updated_at') src_updated_dt_s = src_repo.getKnown(src_issue, 'updated_at')
src_updated_dt = src_dt_parser(src_updated_dt_s) src_updated_dt = src_dt_parser(src_updated_dt_s)
src_updated_ts = int(src_updated_dt.strftime("%s")) src_updated_ts = int(src_updated_dt.strftime("%s"))
# ^ See <https://stackoverflow.com/questions/19801727/convert- # ^ See <https://stackoverflow.com/questions/19801727/convert-
@ -170,12 +231,42 @@ def start_issuesyncd(src_options, dst_options):
''' '''
# print(json.dumps(src_issue, indent=2)) # print(json.dumps(src_issue, indent=2))
enissue.set_verbose(True) # enissue.set_verbose(True)
dst_repo = Repo(dst_options) dst_repo = Repo(dst_options)
dst_issue, err = get_issue(dst_repo, dst_options, issue_no) dst_issue, err = get_issue(dst_repo, dst_options, issue_no)
if err is not None:
dst_res_code = err.get('code')
url = err.get('url')
'''
if dst_res_code in end_codes:
if dst_res_code == 403:
error("* stopping due to: {}"
"".format(err.get('reason')))
break
elif dst_res_code == 404:
error("* 404: There is no issue {} at {} so the end"
" of the issues may have been reached."
"".format(issue_no, url))
error(" * reason: {}".format(err.get('reason')))
# error(" * headers: {}".format(err.get('headers')))
continue
elif dst_res_code == 410:
error(err.get('reason'))
error("* Issue {} seems to have been deleted."
"".format(issue_no))
continue
break
'''
if dst_issue is None:
# TODO: write the issue
continue
if dst_issue is None:
raise RuntimeError("dst_issue shouldn't be None when error"
" is None.")
dst_dt_parser = dst_repo.options['default_dt_parser'] dst_dt_parser = dst_repo.options['default_dt_parser']
dst_created_dt_s = dst_repo.getKnown(0, 'created_at') dst_created_dt_s = dst_repo.getKnown(dst_issue, 'created_at')
dst_updated_dt_s = dst_repo.getKnown(0, 'updated_at') dst_updated_dt_s = dst_repo.getKnown(dst_issue, 'updated_at')
dst_updated_dt = dst_dt_parser(dst_updated_dt_s) dst_updated_dt = dst_dt_parser(dst_updated_dt_s)
dst_updated_ts = int(dst_updated_dt.strftime("%s")) dst_updated_ts = int(dst_updated_dt.strftime("%s"))
# ^ See <https://stackoverflow.com/questions/19801727/convert- # ^ See <https://stackoverflow.com/questions/19801727/convert-
@ -185,7 +276,8 @@ def start_issuesyncd(src_options, dst_options):
"".format(issue_no, dst_updated_ts, dst_updated_dt)) "".format(issue_no, dst_updated_ts, dst_updated_dt))
''' '''
# Example: ~/.cache/pyissuesyncd/destination/issues/1.json # Example: ~/.cache/pyissuesyncd/destination/issues/1.json
break # for debug only # break # for debug only
continue # for debug only continue # for debug only
# print(" * dst_issue:") # print(" * dst_issue:")
# print(json.dumps(dst_issue, indent=2)) # print(json.dumps(dst_issue, indent=2))
@ -211,31 +303,36 @@ if __name__ == "__main__":
'repo_url': "https://github.com/poikilos/EnlivenMinetest", 'repo_url': "https://github.com/poikilos/EnlivenMinetest",
'never_expire': True, 'never_expire': True,
'quiet': True, 'quiet': True,
'api_id': "GitHub",
} }
dst_options = { dst_options = {
'never_expire': True, 'never_expire': True,
'quiet': True, 'quiet': True,
'api_id': "Gitea",
} }
DST_REPO = os.environ.get("DST_REPO") DST_REPO = os.environ.get('DST_REPO')
if DST_REPO is not None: if DST_REPO is not None:
dst_options['repo_url'] = DST_REPO dst_options['repo_url'] = DST_REPO
del DST_REPO del DST_REPO
SRC_REPO = os.environ.get("SRC_REPO") SRC_REPO = os.environ.get('SRC_REPO')
if DST_REPO is not None: if DST_REPO is not None:
src_options['repo_url'] = SRC_REPO src_options['repo_url'] = SRC_REPO
del SRC_REPO del SRC_REPO
SRC_CACHE = os.environ.get("SRC_CACHE") SRC_CACHE = os.environ.get('SRC_CACHE')
if SRC_CACHE is None: if SRC_CACHE is None:
SRC_CACHE = os.path.join(data_directory, "source") SRC_CACHE = os.path.join(data_directory, "source")
DST_CACHE = os.environ.get("DST_CACHE") DST_CACHE = os.environ.get('DST_CACHE')
if DST_CACHE is None: if DST_CACHE is None:
DST_CACHE = os.path.join(data_directory, "destination") DST_CACHE = os.path.join(data_directory, "destination")
SRC_MAX_ISSUE = os.environ.get('SRC_MAX_ISSUE')
prev_arg = None prev_arg = None
manual_args = ['--dst-repo', '--src-repo', '--src-cache', '--dst-cache'] manual_args = ['--dst-repo', '--src-repo', '--src-cache',
'--dst-cache', '--src-max-issue']
for arg in sys.argv[1:]: for arg in sys.argv[1:]:
if prev_arg == "--dst-repo": if prev_arg == "--dst-repo":
@ -246,6 +343,8 @@ if __name__ == "__main__":
SRC_CACHE = arg SRC_CACHE = arg
elif prev_arg == "--dst_cache": elif prev_arg == "--dst_cache":
DST_CACHE = arg DST_CACHE = arg
elif prev_arg == "--src-max-issue":
SRC_MAX_ISSUE = int(arg)
elif arg in manual_args: elif arg in manual_args:
pass pass
else: else:
@ -255,6 +354,8 @@ if __name__ == "__main__":
prev_arg = arg prev_arg = arg
src_options['single_cache'] = SRC_CACHE src_options['single_cache'] = SRC_CACHE
src_options['max_issue'] = SRC_MAX_ISSUE
# ^ INFO: start_issuesyncd warns if SRC_MAX_ISSUE is None.
dst_options['single_cache'] = DST_CACHE dst_options['single_cache'] = DST_CACHE

Loading…
Cancel
Save