fix matching

This commit is contained in:
Jan-Marten Brüggemann 2024-09-14 01:17:30 +02:00
parent 3b368d1a81
commit 5749fb4db1

View file

@ -60,47 +60,18 @@ def retreive_filelist(session: requests.Session, url: str) -> list:
return result
def find_wiki_files(filelist: list, wiki: str) -> list:
result = list()
for file in filelist:
if file[1].startswith(wiki):
result.append(file)
return result
def error_multiple_files(wiki: str, wiki_files: str):
print(f"{wiki} has multiple matches. Please specify your input more precisely.\n", file=sys.stderr)
print(f"{wiki} matched to:", file=sys.stderr)
for file in wiki_files:
url = urljoin(file[0], file[1])
print(f" - {url}", file=sys.stderr)
raise SystemExit("Aborting.")
def get_download_candidates(wiki_files: list, wiki: str) -> dict:
def get_download_candidates(filelist: list, wiki: str) -> dict:
candidates = list()
# Check if results are unique
path0 = None
file_base0 = None
for wiki_file in wiki_files:
path = wiki_file[0]
file = wiki_file[1]
if path0 is None:
path0 = path
if path0 != path:
raise MultipleFileException(wiki)
for file_item in filelist:
path = file_item[0]
file = file_item[1]
file_name, file_extension = os.path.splitext(file)
file_base, file_date = file_name.rsplit('_', 1)
if file_base0 is None:
file_base0 = file_base
if file_base0 != file_base:
raise MultipleFileException(wiki)
if wiki != file_base:
continue
candidates.append((
path,
@ -113,15 +84,25 @@ def get_download_candidates(wiki_files: list, wiki: str) -> dict:
return candidates
def get_wiki_files(wikis: list, filelist: list) -> list:
def error_no_candidate(filelist: list, wiki: str):
print(f"Could not find any match to {wiki}.\n", file=sys.stderr)
print(f"Here is a list of urls similar to your request:", file=sys.stderr)
for file_item in filelist:
url = urljoin(file_item[0], file_item[1])
if wiki in url:
print(f" - {url}", file=sys.stderr)
raise SystemExit("Aborting.")
def get_download_files(wikis: list, filelist: list) -> list:
download_files = list()
for wiki in wikis:
wiki_files = find_wiki_files(filelist=filelist, wiki=wiki)
try:
candidates = get_download_candidates(wiki_files=wiki_files, wiki=wiki)
except MultipleFileException:
error_multiple_files(wiki=wiki, wiki_files=wiki_files)
candidates = get_download_candidates(filelist=filelist, wiki=wiki)
if not candidates:
error_no_candidate(wiki=wiki, filelist=filelist)
# Get most current candidate
candidate0 = None
@ -136,9 +117,6 @@ def get_wiki_files(wikis: list, filelist: list) -> list:
if candidate_date > candidate0_date:
candidate0 = candidate
if candidate0 is None:
raise SystemExit(f"Could not find any download candidate for {wiki}. Aborting.")
download_files.append((
wiki,
candidate0[3],
@ -217,12 +195,12 @@ def main():
save_cached_filelist(cache_file=args.cache_file, filelist=filelist)
# Get downlaod files list
wiki_files = get_wiki_files(wikis=args.wiki, filelist=filelist)
download_files = get_download_files(wikis=args.wiki, filelist=filelist)
# Download files
for wiki_file in wiki_files:
file_path = os.path.join(args.destination, wiki_file[0] + wiki_file[1])
download_wiki(session=session, title=wiki_file[3], src=wiki_file[2], dst=file_path)
for download_file in download_files:
file_path = os.path.join(args.destination, download_file[0] + download_file[1])
download_wiki(session=session, title=download_file[3], src=download_file[2], dst=file_path)
if __name__ == "__main__":
main()