Snippets

grimhacker Download Hashes.org

Created by grimhacker

File download_hashes.org.py Added

  • Ignore whitespace
  • Hide word diff
+#!/usr/bin/env python3
+'''
+.       .1111...          | Title: download_hashes.org.py
+    .10000000000011.   .. | Author: Oliver Morton
+ .00              000...  | Email: grimhacker@grimhacker.com
+1                  01..   | Description:
+                    ..    |  Download left and found hashes from hashes.org
+                   ..     |  public leaks
+GrimHacker        ..      |
+                 ..       | Requires Python 3, requests, beautifulsoup4
+grimhacker.com  ..        |
+@grimhacker    ..         |
+----------------------------------------------------------------------------
+download_hashes.org.py
+    Copyright (C) 2017  Oliver Morton
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+'''
+
+__version__ = "$Revision: 1.0$"
+
+# $Source$
+
+import requests
+import json
+import sys
+import re
+import os
+
+from bs4 import BeautifulSoup
+from pathlib import Path
+
+
+def print_version():
+    """Print command line version banner."""
+    print("""
+.       .1111...          | Title: download_hashes.org.py
+    .10000000000011.   .. | Author: Oliver Morton
+ .00              000...  | Email: grimhacker@grimhacker.com
+1                  01..   | Description:
+                    ..    |  Download left and found hashes from hashes.org
+                   ..     |  public leaks
+GrimHacker        ..      |
+                 ..       | Requires Python 3, requests, beautifulsoup4
+grimhacker.com  ..        |
+@grimhacker    ..         |
+----------------------------------------------------------------------------
+    This program comes with ABSOLUTELY NO WARRANTY.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions. See GPLv2 License.
+----------------------------------------------------------------------------
+""".format(__version__))
+
+def parse_public():
+    base_url = "https://hashes.org/"
+    public_url = "{}public.php".format(base_url)
+    r = requests.get(public_url)
+    html = r.text
+    soup = BeautifulSoup(html, "html5lib")
+    table = soup.find("table")
+    rows = table.find_all("tr")[2:]
+    leaks = {}
+    for row in rows:
+        cells = row.find_all("td")
+        keys = ["id", "name", "last-update", "num-hashes", "progress", "left-link", "found-link"]
+        leak = {}
+        for num, key in enumerate(keys):
+            cell = cells[num]
+            if "link" in key:
+                relative_link = cell.find("a")["href"]
+                data = "{}{}".format(base_url, relative_link)
+            else:
+                data = cell.string
+            leak[key] = data
+        leaks[leak["id"]] = leak
+    print("Found {} leaks".format(len(leaks)))
+    return leaks
+
+def download_file(url):
+    print("Downloading '{}'".format(url))
+    # NOTE the stream=True parameter
+    r = requests.get(url, stream=True)
+    d = r.headers['content-disposition']
+    local_filename = re.findall('filename="(.+)"', d)[0]
+    path_filename = Path(local_filename)
+    if path_filename.exists():
+        print("Backing up previous version of '{}'.".format(local_filename))
+        os.rename(local_filename, "{}.old".format(local_filename))
+    
+    with open(local_filename, 'wb') as f:
+        for chunk in r.iter_content(chunk_size=1024): 
+            if chunk: # filter out keep-alive new chunks
+                f.write(chunk)
+    print("Downloaded '{}' to '{}'".format(url, local_filename))
+    r.close()
+    return local_filename
+
+def download_leaks(oldleaks, newleaks, link_keys):
+    print(link_keys)
+    fresh_downloads = {}
+    for leak in newleaks:
+        newleak = newleaks.get(leak)
+        oldleak = oldleaks.get(leak)
+        if oldleak is not None:
+            if oldleak.get("last-update") == newleak.get("last-update"):
+                print("Skipping '{}' as no update.".format(newleak.get("name")))
+                continue # skip because no update
+        # download the new or updated leak
+        for key in link_keys:
+            url = newleak[key]
+            filename = download_file(url)
+            filename_key = key.replace("link", "filename")
+            newleak[filename_key] = filename
+        fresh_downloads[leak] = newleaks[leak]
+    # make sure we keep any old leaks that are no longer on the site.
+    for leak in oldleaks:
+        if leak not in newleaks:
+            newleaks[leak] = oldleaks[leak]
+    return fresh_downloads
+
+def load_json(name):
+    with open(name, "r") as f:
+        leaks = json.load(f)
+    return leaks
+
+def output_json(leaks, name):
+    with open(name, "w") as f:
+        json.dump(leaks, f, sort_keys=True, indent=4, separators=(',', ': '))
+
+if __name__ == "__main__":
+    print_version()
+    get_left = bool(int(sys.argv[1])) # 1 = download left hashes as well as found. 0 = just found.
+    link_keys = ["found-link"]
+    if get_left is True:
+        print("getting left")
+        link_keys.append("left-link")
+    leakfile = "hashes.org_leaks.json"
+    freshfile = "hashes.org_fresh_leaks.json"
+    newleaks = parse_public()
+    try:
+        oldleaks = load_json(leakfile)
+    except FileNotFoundError as e:
+        print("Assuming first run")
+        oldleaks = {}
+    fresh = download_leaks(oldleaks, newleaks, link_keys)
+    output_json(newleaks, leakfile)
+    output_json(fresh, freshfile)