fix matching
This commit is contained in:
parent
3b368d1a81
commit
5749fb4db1
1 changed files with 26 additions and 48 deletions
|
@ -60,47 +60,18 @@ def retreive_filelist(session: requests.Session, url: str) -> list:
|
|||
|
||||
return result
|
||||
|
||||
def find_wiki_files(filelist: list, wiki: str) -> list:
|
||||
result = list()
|
||||
for file in filelist:
|
||||
if file[1].startswith(wiki):
|
||||
result.append(file)
|
||||
return result
|
||||
|
||||
def error_multiple_files(wiki: str, wiki_files: str):
|
||||
print(f"{wiki} has multiple matches. Please specify your input more precisely.\n", file=sys.stderr)
|
||||
print(f"{wiki} matched to:", file=sys.stderr)
|
||||
|
||||
for file in wiki_files:
|
||||
url = urljoin(file[0], file[1])
|
||||
print(f" - {url}", file=sys.stderr)
|
||||
|
||||
raise SystemExit("Aborting.")
|
||||
|
||||
def get_download_candidates(wiki_files: list, wiki: str) -> dict:
|
||||
def get_download_candidates(filelist: list, wiki: str) -> dict:
|
||||
candidates = list()
|
||||
|
||||
# Check if results are unique
|
||||
path0 = None
|
||||
file_base0 = None
|
||||
|
||||
for wiki_file in wiki_files:
|
||||
path = wiki_file[0]
|
||||
file = wiki_file[1]
|
||||
|
||||
if path0 is None:
|
||||
path0 = path
|
||||
if path0 != path:
|
||||
raise MultipleFileException(wiki)
|
||||
|
||||
for file_item in filelist:
|
||||
path = file_item[0]
|
||||
file = file_item[1]
|
||||
|
||||
file_name, file_extension = os.path.splitext(file)
|
||||
file_base, file_date = file_name.rsplit('_', 1)
|
||||
|
||||
if file_base0 is None:
|
||||
file_base0 = file_base
|
||||
if file_base0 != file_base:
|
||||
raise MultipleFileException(wiki)
|
||||
if wiki != file_base:
|
||||
continue
|
||||
|
||||
candidates.append((
|
||||
path,
|
||||
|
@ -113,15 +84,25 @@ def get_download_candidates(wiki_files: list, wiki: str) -> dict:
|
|||
|
||||
return candidates
|
||||
|
||||
def get_wiki_files(wikis: list, filelist: list) -> list:
|
||||
def error_no_candidate(filelist: list, wiki: str):
|
||||
print(f"Could not find any match to {wiki}.\n", file=sys.stderr)
|
||||
print(f"Here is a list of urls similar to your request:", file=sys.stderr)
|
||||
|
||||
for file_item in filelist:
|
||||
url = urljoin(file_item[0], file_item[1])
|
||||
if wiki in url:
|
||||
print(f" - {url}", file=sys.stderr)
|
||||
|
||||
raise SystemExit("Aborting.")
|
||||
|
||||
def get_download_files(wikis: list, filelist: list) -> list:
|
||||
download_files = list()
|
||||
|
||||
for wiki in wikis:
|
||||
wiki_files = find_wiki_files(filelist=filelist, wiki=wiki)
|
||||
try:
|
||||
candidates = get_download_candidates(wiki_files=wiki_files, wiki=wiki)
|
||||
except MultipleFileException:
|
||||
error_multiple_files(wiki=wiki, wiki_files=wiki_files)
|
||||
candidates = get_download_candidates(filelist=filelist, wiki=wiki)
|
||||
|
||||
if not candidates:
|
||||
error_no_candidate(wiki=wiki, filelist=filelist)
|
||||
|
||||
# Get most current candidate
|
||||
candidate0 = None
|
||||
|
@ -136,9 +117,6 @@ def get_wiki_files(wikis: list, filelist: list) -> list:
|
|||
if candidate_date > candidate0_date:
|
||||
candidate0 = candidate
|
||||
|
||||
if candidate0 is None:
|
||||
raise SystemExit(f"Could not find any download candidate for {wiki}. Aborting.")
|
||||
|
||||
download_files.append((
|
||||
wiki,
|
||||
candidate0[3],
|
||||
|
@ -217,12 +195,12 @@ def main():
|
|||
save_cached_filelist(cache_file=args.cache_file, filelist=filelist)
|
||||
|
||||
# Get downlaod files list
|
||||
wiki_files = get_wiki_files(wikis=args.wiki, filelist=filelist)
|
||||
download_files = get_download_files(wikis=args.wiki, filelist=filelist)
|
||||
|
||||
# Download files
|
||||
for wiki_file in wiki_files:
|
||||
file_path = os.path.join(args.destination, wiki_file[0] + wiki_file[1])
|
||||
download_wiki(session=session, title=wiki_file[3], src=wiki_file[2], dst=file_path)
|
||||
for download_file in download_files:
|
||||
file_path = os.path.join(args.destination, download_file[0] + download_file[1])
|
||||
download_wiki(session=session, title=download_file[3], src=download_file[2], dst=file_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in a new issue