#!/usr/bin/env python #*********************************************************** # # parse-link-check.py # # Arguments: # 1. path to input file # # External file dependencies: # 1. output.txt - the output of sphinx-build # 2. link-whitelist.txt - broken links that should be ignored # # Output: # 1. broken_links.html - provides count of broken and whitelist # matches. Also provides links to all flagged links. Will # appear in the same directory as output.txt # 2. Error code 255 if unexpected broken links are found # #*********************************************************** import sys import re import os fileName = "output.txt" outFile = "broken_links.html" whitelistFile = "link-whitelist.txt" if len(sys.argv) < 2: print ("Enter path of input directory") sys.exit() scriptPath = sys.argv[0] outputPath = sys.argv[1] fileNamePath = outputPath + "/" + fileName outFilePath = outputPath + "/" + outFile whitelistFilePath = os.path.dirname(scriptPath) + "/" + whitelistFile with open (whitelistFilePath) as w: whLines = w.readlines() whitelist = [] for line in whLines: link = line.rstrip() whitelist.append(link) with open (fileNamePath) as f: lines = f.readlines() numBrokenLinks = 0 numWhiteListMatches = 0 newLines = ["
"] whiteListLines = [] for line in lines: if "[broken]" in line: strings = line.split(" ") link = strings[2][:-1] link = link.strip() if link in whitelist: whiteListLines.append("" + strings[0] + "\n[whitelist] " + link + "\n") numWhiteListMatches += 1 else: newLines.append("" + strings[0] + "\n
[broken] " + link + "\n") numBrokenLinks += 1 newLines.insert(0,"