1
0
Fork 0

Compare commits

...

5 Commits

  1. 9
      CHANGELOG.md
  2. 2
      Makefile
  3. 4
      scrapthechan/__init__.py
  4. 2
      scrapthechan/cli/scraper.py
  5. 11
      scrapthechan/scraper.py

9
CHANGELOG.md

@ -1,5 +1,14 @@
# Changelog
## 0.5.1 - 2021-05-04
## Added
- Message when a file cannot be retrieved.
## Fixed
- Removed excessive hash comparison when files has same name;
- A string forgotten to set to be a f-string, so now it displays a reason of why
thread wasn't found.
## 0.5.0 - 2021-05-03
## Added
- Now program makes use of skip_posts argument. Use CLI option `-S <number>`

2
Makefile

@ -1,7 +1,7 @@
build: scrapthechan README.md setup.cfg
python setup.py sdist bdist_wheel
install:
python -m pip install --upgrade dist/scrapthechan-0.5.0-py3-none-any.whl --user
python -m pip install --upgrade dist/scrapthechan-0.5.1-py3-none-any.whl --user
uninstall:
# We change directory so pip uninstall will run, it'll fail otherwise.
@cd ~/

4
scrapthechan/__init__.py

@ -1,5 +1,5 @@
__date__ = "3 May 2021"
__version__ = "0.5.0"
__date__ = "4 May 2021"
__version__ = "0.5.1"
__author__ = "Alexander \"Arav\" Andreev"
__email__ = "me@arav.top"
__copyright__ = f"Copyright (c) 2020,2021 {__author__} <{__email__}>"

2
scrapthechan/cli/scraper.py

@ -97,7 +97,7 @@ def main() -> None:
exit()
except ThreadNotFoundError as e:
print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
"not found. Reason: {e.reason}")
f"not found. Reason: {e.reason}")
exit()
files_count = len(parser.files)

11
scrapthechan/scraper.py

@ -110,11 +110,16 @@ class Scraper:
retries -= 1
else:
break
if retries == 0:
print(f"Cannot retrieve {f.download_url}, {filepath}.")
return
if is_same_filename:
f1_hexdig, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
f2_hexdig, f2_dig = self._hash_file(filepath, f.hash_algorithm)
if f1_hexdig == f2_hexdig or f1_dig == f2_dig:
_, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
_, f2_dig = self._hash_file(filepath, f.hash_algorithm)
if f1_dig == f2_dig:
remove(filepath)
except FileNotFoundError as e:
print("File Not Found", filepath)
except HTTPError as e:
print("HTTP Error", e.code, e.reason, f.download_url)
if exists(filepath):

Loading…
Cancel
Save