diff --git a/sync_kiwix.py b/sync_kiwix.py
index f75447a..989ef3d 100644
--- a/sync_kiwix.py
+++ b/sync_kiwix.py
@@ -43,21 +43,12 @@ def save_cached_filelist(cache_file: str, filelist: list):
with open(file=cache_file, mode='w') as file:
json.dump(filelist, file)
-def retreive_filelist(session: requests.Session, url: str, cache_file: str = "", cache_max_age: datetime.timedelta = datetime.timedelta(days=1)) -> list:
-
- if cache_file != "":
- try:
- mtime = datetime.datetime.fromtimestamp(os.path.getmtime(cache_file))
- if datetime.datetime.now() - mtime < cache_max_age:
- return load_cached_filelist(cache_file=cache_file)
- except (FileNotFoundError, CacheException):
- pass
-
+def retreive_filelist(session: requests.Session, url: str) -> list:
response = session.get(url)
response.raise_for_status()
directories = re.findall(r'', response.text)
- files = re.findall(r'', response.text)
+ files = re.findall(r'', response.text)
result = list()
@@ -67,9 +58,6 @@ def retreive_filelist(session: requests.Session, url: str, cache_file: str = "",
for file in files:
result.append([url, file])
- if cache_file != "":
- save_cached_filelist(cache_file=cache_file, filelist=result)
-
return result
def find_wiki_files(filelist: list, wiki: str) -> list:
@@ -104,7 +92,7 @@ def get_download_candidates(wiki_files: list, wiki: str) -> dict:
path0 = path
if path0 != path:
raise MultipleFileException(wiki)
-
+
file_name, file_extension = os.path.splitext(file)
file_base, file_date = file_name.rsplit('_', 1)
@@ -148,6 +136,9 @@ def get_wiki_files(wikis: list, filelist: list) -> list:
if candidate_date > candidate0_date:
candidate0 = candidate
+ if candidate0 is None:
+ raise SystemExit(f"Could not find any download candidate for {wiki}. Aborting.")
+
download_files.append((
wiki,
candidate0[3],
@@ -211,7 +202,19 @@ def main():
session = requests.Session()
# Get Filelist
- filelist = retreive_filelist(session=session, url=KIWIX_BASE_URL, cache_file=args.cache_file if args.cache else "")
+ filelist = None
+
+ if args.cache == True:
+ try:
+ mtime = datetime.datetime.fromtimestamp(os.path.getmtime(args.cache_file))
+ if datetime.datetime.now() - mtime < datetime.timedelta(days=1):
+ filelist = load_cached_filelist(cache_file=args.cache_file)
+ except (FileNotFoundError, CacheException):
+ pass
+
+ if filelist is None:
+ filelist = retreive_filelist(session=session, url=KIWIX_BASE_URL)
+ save_cached_filelist(cache_file=args.cache_file, filelist=filelist)
# Get downlaod files list
wiki_files = get_wiki_files(wikis=args.wiki, filelist=filelist)