From 92c499067dc6d3d47bd99456f924241db542512f Mon Sep 17 00:00:00 2001 From: Michael Vincerra Date: Sat, 15 Dec 2018 18:29:00 -0800 Subject: [PATCH] Adds new scripts dir and Python bundle_lister.py. Includes: - Local Makefile to be called from parent Makefile at source/ - README.md - cloned_repo dir necessary for executing bundle_lister.py - requirements.txt per developer standard - template.html also necessary for executing bundle_lister.py Signed-off-by: Michael Vincerra --- source/scripts/_python/Makefile | 9 ++ source/scripts/_python/README.md | 19 ++++ source/scripts/_python/bundle_lister.py | 98 +++++++++++++++++++++ source/scripts/_python/cloned_repo/.gitkeep | 0 source/scripts/_python/requirements.txt | 2 + source/scripts/_python/styles.css | 48 ++++++++++ source/scripts/_python/template.html | 55 ++++++++++++ 7 files changed, 231 insertions(+) create mode 100644 source/scripts/_python/Makefile create mode 100644 source/scripts/_python/README.md create mode 100644 source/scripts/_python/bundle_lister.py create mode 100644 source/scripts/_python/cloned_repo/.gitkeep create mode 100644 source/scripts/_python/requirements.txt create mode 100644 source/scripts/_python/styles.css create mode 100644 source/scripts/_python/template.html diff --git a/source/scripts/_python/Makefile b/source/scripts/_python/Makefile new file mode 100644 index 00000000..251254ac --- /dev/null +++ b/source/scripts/_python/Makefile @@ -0,0 +1,9 @@ + +py: + python bundle_lister.py + cp bundles.html.txt ../../introduction + rm -rf cloned_repo/* + rm bundles.html.txt + @echo "Python script finished succesfully!" + @echo "Next run make html. Then run make publish." + diff --git a/source/scripts/_python/README.md b/source/scripts/_python/README.md new file mode 100644 index 00000000..6fa5005c --- /dev/null +++ b/source/scripts/_python/README.md @@ -0,0 +1,19 @@ +`bundle_lister.py` is a Python (3.6.0) web scraper and file generator. First, it clones the clr-bundles directory, https://github.com/clearlinux/clr-bundles. Second, it parses content all bundles in the clr-bundles/ directory and the `packages-descriptions` file. Third, it uses Jinja2 to output the result of the analysis to: bundles.html.txt. This ``.txt`` file is then referenced in `bundles.rst`, whose title is `Available bundles`, which is currently: https://clearlinux.org/documentation/clear-linux/reference/bundles. + +`bundle_lister.py` automates documentation so it shows current bundles and packages per daily updates to the clr-bundles GitHub repository. + +`bundle_lister.py` will be invoked in a bash script in the `source/Makefile` of clear-linux documentation. Therefore, `bundle_lister.py` will automatically create newly scraped and parsed data upon each build of the clearlinux.org website, and output an accurate, up-to-date table that shows all bundles and packages for interested Linux developers and admins. + +See `requirements.txt` for dependencies necessary to run this application. + +Python==3.6.0 + +To run `bundle_lister.py` in the terminal, enter: `python bundle_lister.py`. + +Note: The `cloned_repo` directory must remain in the parent directory in order for this code to work. + +Note: A successful build will produce a file named `bundles.html.txt` showing a table of current bundles and pundles (packages) alphabetized, with a (UTC) time and date stamp in the right corner. +An unsuccessful build will result in traceback errors, which should be analyzed before running a new build. + + +`~$~` diff --git a/source/scripts/_python/bundle_lister.py b/source/scripts/_python/bundle_lister.py new file mode 100644 index 00000000..8440ab61 --- /dev/null +++ b/source/scripts/_python/bundle_lister.py @@ -0,0 +1,98 @@ +import io +import os +import re +import urllib +import jinja2 +from jinja2 import Environment, FileSystemLoader, Template +import git +from operator import itemgetter +from datetime import datetime + +GITHUB_BASE = "https://github.com/clearlinux/clr-bundles/tree/master/bundles/" +PUNDLES = "https://github.com/clearlinux/clr-bundles/blob/master/packages" + +PATTERN1 = re.compile(r"#\s?\[TITLE]:\w?(.*)") +PATTERN2 = re.compile(r"#\s?\[DESCRIPTION]:\w?(.*)") +PATTERN3 = re.compile(r"\(([^()]*|include)\)", re.MULTILINE) +PATTERN4 = re.compile(r"^((?:(?!#)\w+[^-\s][-])\w+|\w+[^\s-])", re.MULTILINE) +# ALT PATTERN4 = re.compile(r"^((?:(?!#)(\w+[^-\s])[-]\w+.)[^\s]{1,}[^\s]|\w+[^\s-])", re.MULTILINE) + +PATTERN5 = re.compile(r"^(?!=a)\w.+\s[#]\s(\w+.*)?", re.MULTILINE) +# Previous version: PATTERN5 = re.compile(r"^[^#].*(?<=\s\-\s)(\w+.*)?", re.MULTILINE) + +def extractor(lines): + bundle_title = "title" + data_desc = "description" + url = "url" + include_list = [] + + for i in lines: + title = PATTERN1.match(i) + desc = PATTERN2.match(i) + includes = PATTERN3.findall(i) + + if title: + bundle_title = title.groups(0)[0].strip() + if desc: + data_desc = desc.groups(0)[0].strip() + if url: + url = os.path.join(GITHUB_BASE, bundle_title) + + if includes: + include_text = includes[0].strip("()") + include_list.append(include_text) + return {"title": bundle_title, "data_desc": data_desc, "include_list": include_list, "url": url} + +def pundler(): + with io.open("./cloned_repo/clr-bundles/packages") as file_obj: + lines = file_obj.readlines() + pundle_title = "pundle_title" + pundle_desc = "pundle_desc" + purl = "purl" + pundle_list = [] + pun_desc = [] + pundle_master = [] + + for i in lines: + pundle = PATTERN4.findall(i) + pundle_plus = PATTERN5.findall(i) + + if pundle: + pundle_title = pundle[0] + pundle_list.append(pundle_title) + + if pundle_plus: + pundle_desc = pundle_plus[0].strip("[]") + pun_desc.append(pundle_desc) + + for pun, desc in zip(pundle_list, pun_desc): + pundle_master.append({"title": pun, "pun_desc": desc, "purl": PUNDLES}) + return pundle_master + +def bundler(): + data = [] + try: + git.Git("./cloned_repo/").clone("https://github.com/clearlinux/clr-bundles.git") + except: + pass + for root, dirs, files in os.walk("./cloned_repo/clr-bundles/bundles", topdown=False): + for name in files: + with open(os.path.join(root, name)) as file_obj: + lines = file_obj.readlines() + data.append(extractor(lines)) + + pundle_master = pundler() + data = data + pundle_master + filtered = list(filter(lambda x: x.get('title'), data)) + sortedData = sorted(filtered, key=lambda x:x['title'].lower()) + #ALT sortedData2 = sorted(sortedData, key=itemgetter('title')) + loader = jinja2.FileSystemLoader(searchpath='./') + env = jinja2.Environment(loader=loader) + template = env.get_template('template.html') + template.globals['now'] = datetime.utcnow + + output = template.render(data=sortedData, now=datetime.utcnow()) + with io.open('bundles.html.txt', 'w') as file: + file.write(output) + +bundler() diff --git a/source/scripts/_python/cloned_repo/.gitkeep b/source/scripts/_python/cloned_repo/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/source/scripts/_python/requirements.txt b/source/scripts/_python/requirements.txt new file mode 100644 index 00000000..7b142b78 --- /dev/null +++ b/source/scripts/_python/requirements.txt @@ -0,0 +1,2 @@ +Jinja2==2.10 +GitPython==2.1.11 diff --git a/source/scripts/_python/styles.css b/source/scripts/_python/styles.css new file mode 100644 index 00000000..5f18a5af --- /dev/null +++ b/source/scripts/_python/styles.css @@ -0,0 +1,48 @@ +table { + margin: 32px; + border: 1px solid #e0e0e0; + border-collapse: collapse; + width: auto; +} +th { + font-family: IntelClear-Regular,Helvetica,Arial,sans-serif; + align-content: center; + padding: 5px; + border: #ccc solid 1px; + background-color: #555; + color: #fff; + text-transform: uppercase; + font-size: 18px; +} +tr { + padding-top: 20px ; + padding-bottom: 10px; +} + tbody tr:nth-child(odd) { + background-color: #e0e0e0; +} + .bundlename { + font-family: IntelClear-Regular,Helvetica,Arial,sans-serif; + font-size: 16px; + font-weight: bolder; + padding-left: 6px; + line-height: 18px; + padding-top:7px ; + padding-bottom: 5px; +} +.bundledesc { + font-family: IntelClear-Regular,Helvetica,Arial,sans-serif; + font: italic; + font-size: 16px; + padding-left: 6px; + line-height: 18px; + padding-top: 7px ; + padding-bottom: 5px; +} + ul, li { + margin-left: 8px; + /* padding: 0; */ + padding-left: 5px; + padding-top: 2px; + line-height: 16px; +} diff --git a/source/scripts/_python/template.html b/source/scripts/_python/template.html new file mode 100644 index 00000000..416d4e73 --- /dev/null +++ b/source/scripts/_python/template.html @@ -0,0 +1,55 @@ + + + + + + + Bundles in Clear Linux* OS + + + + + + + + + + + + + + + + + + + + + {% for d in data %} + {% if d.url %} + + + + + {% else %} + + + + + {% endif %} + {% endfor %} + +
+ Updated: {{ now.strftime('%x %H:%M') }} UTC +
Name Description
{{d.title}}{{d.data_desc}}
+ {% if d.include_list %} +

Includes bundle(s): + {% for include in d.include_list %} +

  • {{include}}
  • + {% endfor %} +

    + {% endif %} +
    {{d.title}} {{d.pun_desc}}
    + + + \ No newline at end of file