fix regex for files
This commit is contained in:
parent
d153e41715
commit
3b368d1a81
1 changed files with 19 additions and 16 deletions
|
@ -43,21 +43,12 @@ def save_cached_filelist(cache_file: str, filelist: list):
|
|||
with open(file=cache_file, mode='w') as file:
|
||||
json.dump(filelist, file)
|
||||
|
||||
def retreive_filelist(session: requests.Session, url: str, cache_file: str = "", cache_max_age: datetime.timedelta = datetime.timedelta(days=1)) -> list:
|
||||
|
||||
if cache_file != "":
|
||||
try:
|
||||
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(cache_file))
|
||||
if datetime.datetime.now() - mtime < cache_max_age:
|
||||
return load_cached_filelist(cache_file=cache_file)
|
||||
except (FileNotFoundError, CacheException):
|
||||
pass
|
||||
|
||||
def retreive_filelist(session: requests.Session, url: str) -> list:
|
||||
response = session.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
directories = re.findall(r'<a href="(\w+\/)">', response.text)
|
||||
files = re.findall(r'<a href="([\w-]+\.zim)">', response.text)
|
||||
files = re.findall(r'<a href="([\w\-\.]+\.zim)">', response.text)
|
||||
|
||||
result = list()
|
||||
|
||||
|
@ -67,9 +58,6 @@ def retreive_filelist(session: requests.Session, url: str, cache_file: str = "",
|
|||
for file in files:
|
||||
result.append([url, file])
|
||||
|
||||
if cache_file != "":
|
||||
save_cached_filelist(cache_file=cache_file, filelist=result)
|
||||
|
||||
return result
|
||||
|
||||
def find_wiki_files(filelist: list, wiki: str) -> list:
|
||||
|
@ -148,6 +136,9 @@ def get_wiki_files(wikis: list, filelist: list) -> list:
|
|||
if candidate_date > candidate0_date:
|
||||
candidate0 = candidate
|
||||
|
||||
if candidate0 is None:
|
||||
raise SystemExit(f"Could not find any download candidate for {wiki}. Aborting.")
|
||||
|
||||
download_files.append((
|
||||
wiki,
|
||||
candidate0[3],
|
||||
|
@ -211,7 +202,19 @@ def main():
|
|||
session = requests.Session()
|
||||
|
||||
# Get Filelist
|
||||
filelist = retreive_filelist(session=session, url=KIWIX_BASE_URL, cache_file=args.cache_file if args.cache else "")
|
||||
filelist = None
|
||||
|
||||
if args.cache == True:
|
||||
try:
|
||||
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(args.cache_file))
|
||||
if datetime.datetime.now() - mtime < datetime.timedelta(days=1):
|
||||
filelist = load_cached_filelist(cache_file=args.cache_file)
|
||||
except (FileNotFoundError, CacheException):
|
||||
pass
|
||||
|
||||
if filelist is None:
|
||||
filelist = retreive_filelist(session=session, url=KIWIX_BASE_URL)
|
||||
save_cached_filelist(cache_file=args.cache_file, filelist=filelist)
|
||||
|
||||
# Get downlaod files list
|
||||
wiki_files = get_wiki_files(wikis=args.wiki, filelist=filelist)
|
||||
|
|
Loading…
Reference in a new issue