From 3b368d1a814911ab3a6731a76409830edd775943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Marten=20Br=C3=BCggemann?= Date: Sat, 14 Sep 2024 00:40:17 +0200 Subject: [PATCH] fix regex for files --- sync_kiwix.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/sync_kiwix.py b/sync_kiwix.py index f75447a..989ef3d 100644 --- a/sync_kiwix.py +++ b/sync_kiwix.py @@ -43,21 +43,12 @@ def save_cached_filelist(cache_file: str, filelist: list): with open(file=cache_file, mode='w') as file: json.dump(filelist, file) -def retreive_filelist(session: requests.Session, url: str, cache_file: str = "", cache_max_age: datetime.timedelta = datetime.timedelta(days=1)) -> list: - - if cache_file != "": - try: - mtime = datetime.datetime.fromtimestamp(os.path.getmtime(cache_file)) - if datetime.datetime.now() - mtime < cache_max_age: - return load_cached_filelist(cache_file=cache_file) - except (FileNotFoundError, CacheException): - pass - +def retreive_filelist(session: requests.Session, url: str) -> list: response = session.get(url) response.raise_for_status() directories = re.findall(r'', response.text) - files = re.findall(r'', response.text) + files = re.findall(r'', response.text) result = list() @@ -67,9 +58,6 @@ def retreive_filelist(session: requests.Session, url: str, cache_file: str = "", for file in files: result.append([url, file]) - if cache_file != "": - save_cached_filelist(cache_file=cache_file, filelist=result) - return result def find_wiki_files(filelist: list, wiki: str) -> list: @@ -104,7 +92,7 @@ def get_download_candidates(wiki_files: list, wiki: str) -> dict: path0 = path if path0 != path: raise MultipleFileException(wiki) - + file_name, file_extension = os.path.splitext(file) file_base, file_date = file_name.rsplit('_', 1) @@ -148,6 +136,9 @@ def get_wiki_files(wikis: list, filelist: list) -> list: if candidate_date > candidate0_date: candidate0 = candidate + if candidate0 is None: + raise SystemExit(f"Could not find any download candidate for {wiki}. Aborting.") + download_files.append(( wiki, candidate0[3], @@ -211,7 +202,19 @@ def main(): session = requests.Session() # Get Filelist - filelist = retreive_filelist(session=session, url=KIWIX_BASE_URL, cache_file=args.cache_file if args.cache else "") + filelist = None + + if args.cache == True: + try: + mtime = datetime.datetime.fromtimestamp(os.path.getmtime(args.cache_file)) + if datetime.datetime.now() - mtime < datetime.timedelta(days=1): + filelist = load_cached_filelist(cache_file=args.cache_file) + except (FileNotFoundError, CacheException): + pass + + if filelist is None: + filelist = retreive_filelist(session=session, url=KIWIX_BASE_URL) + save_cached_filelist(cache_file=args.cache_file, filelist=filelist) # Get downlaod files list wiki_files = get_wiki_files(wikis=args.wiki, filelist=filelist)