fix regex for files
This commit is contained in:
parent
d153e41715
commit
3b368d1a81
1 changed files with 19 additions and 16 deletions
|
@ -43,21 +43,12 @@ def save_cached_filelist(cache_file: str, filelist: list):
|
||||||
with open(file=cache_file, mode='w') as file:
|
with open(file=cache_file, mode='w') as file:
|
||||||
json.dump(filelist, file)
|
json.dump(filelist, file)
|
||||||
|
|
||||||
def retreive_filelist(session: requests.Session, url: str, cache_file: str = "", cache_max_age: datetime.timedelta = datetime.timedelta(days=1)) -> list:
|
def retreive_filelist(session: requests.Session, url: str) -> list:
|
||||||
|
|
||||||
if cache_file != "":
|
|
||||||
try:
|
|
||||||
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(cache_file))
|
|
||||||
if datetime.datetime.now() - mtime < cache_max_age:
|
|
||||||
return load_cached_filelist(cache_file=cache_file)
|
|
||||||
except (FileNotFoundError, CacheException):
|
|
||||||
pass
|
|
||||||
|
|
||||||
response = session.get(url)
|
response = session.get(url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
directories = re.findall(r'<a href="(\w+\/)">', response.text)
|
directories = re.findall(r'<a href="(\w+\/)">', response.text)
|
||||||
files = re.findall(r'<a href="([\w-]+\.zim)">', response.text)
|
files = re.findall(r'<a href="([\w\-\.]+\.zim)">', response.text)
|
||||||
|
|
||||||
result = list()
|
result = list()
|
||||||
|
|
||||||
|
@ -67,9 +58,6 @@ def retreive_filelist(session: requests.Session, url: str, cache_file: str = "",
|
||||||
for file in files:
|
for file in files:
|
||||||
result.append([url, file])
|
result.append([url, file])
|
||||||
|
|
||||||
if cache_file != "":
|
|
||||||
save_cached_filelist(cache_file=cache_file, filelist=result)
|
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def find_wiki_files(filelist: list, wiki: str) -> list:
|
def find_wiki_files(filelist: list, wiki: str) -> list:
|
||||||
|
@ -104,7 +92,7 @@ def get_download_candidates(wiki_files: list, wiki: str) -> dict:
|
||||||
path0 = path
|
path0 = path
|
||||||
if path0 != path:
|
if path0 != path:
|
||||||
raise MultipleFileException(wiki)
|
raise MultipleFileException(wiki)
|
||||||
|
|
||||||
|
|
||||||
file_name, file_extension = os.path.splitext(file)
|
file_name, file_extension = os.path.splitext(file)
|
||||||
file_base, file_date = file_name.rsplit('_', 1)
|
file_base, file_date = file_name.rsplit('_', 1)
|
||||||
|
@ -148,6 +136,9 @@ def get_wiki_files(wikis: list, filelist: list) -> list:
|
||||||
if candidate_date > candidate0_date:
|
if candidate_date > candidate0_date:
|
||||||
candidate0 = candidate
|
candidate0 = candidate
|
||||||
|
|
||||||
|
if candidate0 is None:
|
||||||
|
raise SystemExit(f"Could not find any download candidate for {wiki}. Aborting.")
|
||||||
|
|
||||||
download_files.append((
|
download_files.append((
|
||||||
wiki,
|
wiki,
|
||||||
candidate0[3],
|
candidate0[3],
|
||||||
|
@ -211,7 +202,19 @@ def main():
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
|
|
||||||
# Get Filelist
|
# Get Filelist
|
||||||
filelist = retreive_filelist(session=session, url=KIWIX_BASE_URL, cache_file=args.cache_file if args.cache else "")
|
filelist = None
|
||||||
|
|
||||||
|
if args.cache == True:
|
||||||
|
try:
|
||||||
|
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(args.cache_file))
|
||||||
|
if datetime.datetime.now() - mtime < datetime.timedelta(days=1):
|
||||||
|
filelist = load_cached_filelist(cache_file=args.cache_file)
|
||||||
|
except (FileNotFoundError, CacheException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
if filelist is None:
|
||||||
|
filelist = retreive_filelist(session=session, url=KIWIX_BASE_URL)
|
||||||
|
save_cached_filelist(cache_file=args.cache_file, filelist=filelist)
|
||||||
|
|
||||||
# Get downlaod files list
|
# Get downlaod files list
|
||||||
wiki_files = get_wiki_files(wikis=args.wiki, filelist=filelist)
|
wiki_files = get_wiki_files(wikis=args.wiki, filelist=filelist)
|
||||||
|
|
Loading…
Reference in a new issue