#!/usr/bin/env python import sys import re import os fileName = "output.txt" outFile = "broken_links.html" whitelistFile = "link-whitelist.txt" if len(sys.argv) < 2: print ("Enter path of input directory") sys.exit() scriptPath = sys.argv[0] outputPath = sys.argv[1] fileNamePath = outputPath + "/" + fileName outFilePath = outputPath + "/" + outFile whitelistFilePath = os.path.dirname(scriptPath) + "/" + whitelistFile with open (whitelistFilePath) as w: whLines = w.readlines() whitelist = [] for line in whLines: link = line.rstrip() whitelist.append(link) with open (fileNamePath) as f: lines = f.readlines() numBrokenLinks = 0 numWhiteListMatches = 0 newLines = ["
"] whiteListLines = [] for line in lines: if "[broken]" in line: strings = line.split(" ") link = strings[2][:-1] link = link.strip() if link in whitelist: whiteListLines.append("" + strings[0] + "\n[whitelist] " + link + "\n") numWhiteListMatches += 1 else: newLines.append("" + strings[0] + "\n
[broken] " + link + "\n") numBrokenLinks += 1 newLines.insert(0,"