fix matching
This commit is contained in:
parent
3b368d1a81
commit
5749fb4db1
1 changed files with 26 additions and 48 deletions
|
@ -60,47 +60,18 @@ def retreive_filelist(session: requests.Session, url: str) -> list:
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def find_wiki_files(filelist: list, wiki: str) -> list:
|
def get_download_candidates(filelist: list, wiki: str) -> dict:
|
||||||
result = list()
|
|
||||||
for file in filelist:
|
|
||||||
if file[1].startswith(wiki):
|
|
||||||
result.append(file)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def error_multiple_files(wiki: str, wiki_files: str):
|
|
||||||
print(f"{wiki} has multiple matches. Please specify your input more precisely.\n", file=sys.stderr)
|
|
||||||
print(f"{wiki} matched to:", file=sys.stderr)
|
|
||||||
|
|
||||||
for file in wiki_files:
|
|
||||||
url = urljoin(file[0], file[1])
|
|
||||||
print(f" - {url}", file=sys.stderr)
|
|
||||||
|
|
||||||
raise SystemExit("Aborting.")
|
|
||||||
|
|
||||||
def get_download_candidates(wiki_files: list, wiki: str) -> dict:
|
|
||||||
candidates = list()
|
candidates = list()
|
||||||
|
|
||||||
# Check if results are unique
|
for file_item in filelist:
|
||||||
path0 = None
|
path = file_item[0]
|
||||||
file_base0 = None
|
file = file_item[1]
|
||||||
|
|
||||||
for wiki_file in wiki_files:
|
|
||||||
path = wiki_file[0]
|
|
||||||
file = wiki_file[1]
|
|
||||||
|
|
||||||
if path0 is None:
|
|
||||||
path0 = path
|
|
||||||
if path0 != path:
|
|
||||||
raise MultipleFileException(wiki)
|
|
||||||
|
|
||||||
|
|
||||||
file_name, file_extension = os.path.splitext(file)
|
file_name, file_extension = os.path.splitext(file)
|
||||||
file_base, file_date = file_name.rsplit('_', 1)
|
file_base, file_date = file_name.rsplit('_', 1)
|
||||||
|
|
||||||
if file_base0 is None:
|
if wiki != file_base:
|
||||||
file_base0 = file_base
|
continue
|
||||||
if file_base0 != file_base:
|
|
||||||
raise MultipleFileException(wiki)
|
|
||||||
|
|
||||||
candidates.append((
|
candidates.append((
|
||||||
path,
|
path,
|
||||||
|
@ -113,15 +84,25 @@ def get_download_candidates(wiki_files: list, wiki: str) -> dict:
|
||||||
|
|
||||||
return candidates
|
return candidates
|
||||||
|
|
||||||
def get_wiki_files(wikis: list, filelist: list) -> list:
|
def error_no_candidate(filelist: list, wiki: str):
|
||||||
|
print(f"Could not find any match to {wiki}.\n", file=sys.stderr)
|
||||||
|
print(f"Here is a list of urls similar to your request:", file=sys.stderr)
|
||||||
|
|
||||||
|
for file_item in filelist:
|
||||||
|
url = urljoin(file_item[0], file_item[1])
|
||||||
|
if wiki in url:
|
||||||
|
print(f" - {url}", file=sys.stderr)
|
||||||
|
|
||||||
|
raise SystemExit("Aborting.")
|
||||||
|
|
||||||
|
def get_download_files(wikis: list, filelist: list) -> list:
|
||||||
download_files = list()
|
download_files = list()
|
||||||
|
|
||||||
for wiki in wikis:
|
for wiki in wikis:
|
||||||
wiki_files = find_wiki_files(filelist=filelist, wiki=wiki)
|
candidates = get_download_candidates(filelist=filelist, wiki=wiki)
|
||||||
try:
|
|
||||||
candidates = get_download_candidates(wiki_files=wiki_files, wiki=wiki)
|
if not candidates:
|
||||||
except MultipleFileException:
|
error_no_candidate(wiki=wiki, filelist=filelist)
|
||||||
error_multiple_files(wiki=wiki, wiki_files=wiki_files)
|
|
||||||
|
|
||||||
# Get most current candidate
|
# Get most current candidate
|
||||||
candidate0 = None
|
candidate0 = None
|
||||||
|
@ -136,9 +117,6 @@ def get_wiki_files(wikis: list, filelist: list) -> list:
|
||||||
if candidate_date > candidate0_date:
|
if candidate_date > candidate0_date:
|
||||||
candidate0 = candidate
|
candidate0 = candidate
|
||||||
|
|
||||||
if candidate0 is None:
|
|
||||||
raise SystemExit(f"Could not find any download candidate for {wiki}. Aborting.")
|
|
||||||
|
|
||||||
download_files.append((
|
download_files.append((
|
||||||
wiki,
|
wiki,
|
||||||
candidate0[3],
|
candidate0[3],
|
||||||
|
@ -217,12 +195,12 @@ def main():
|
||||||
save_cached_filelist(cache_file=args.cache_file, filelist=filelist)
|
save_cached_filelist(cache_file=args.cache_file, filelist=filelist)
|
||||||
|
|
||||||
# Get downlaod files list
|
# Get downlaod files list
|
||||||
wiki_files = get_wiki_files(wikis=args.wiki, filelist=filelist)
|
download_files = get_download_files(wikis=args.wiki, filelist=filelist)
|
||||||
|
|
||||||
# Download files
|
# Download files
|
||||||
for wiki_file in wiki_files:
|
for download_file in download_files:
|
||||||
file_path = os.path.join(args.destination, wiki_file[0] + wiki_file[1])
|
file_path = os.path.join(args.destination, download_file[0] + download_file[1])
|
||||||
download_wiki(session=session, title=wiki_file[3], src=wiki_file[2], dst=file_path)
|
download_wiki(session=session, title=download_file[3], src=download_file[2], dst=file_path)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
Loading…
Reference in a new issue