~cypheon/ocaml-docset

d712ce48aa45780169638ad2c7705dd63b2b1743 — Johann Rudloff 4 years ago
Initial commit.
5 files changed, 192 insertions(+), 0 deletions(-)

A Info.plist
A Makefile
A Pipfile
A Pipfile.lock
A mkindex.py
A  => Info.plist +14 -0
@@ 1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>CFBundleIdentifier</key>
	<string>ocaml-unofficial</string>
	<key>CFBundleName</key>
	<string>OCaml (Unofficial)</string>
	<key>DocSetPlatformFamily</key>
	<string>ocaml-unofficial</string>
	<key>isDashDocset</key>
	<true/>
</dict>
</plist>

A  => Makefile +18 -0
@@ 1,18 @@
ROOT=ocaml-unofficial.docset
CONTENTS=$(ROOT)/Contents/Resources/Documents
ORIGINAL_DOC=files/ocaml-4.09-refman-html.tar.gz
BUILD=_build

all: extract copy

$(CONTENTS):
	mkdir -p $@

extract:
	mkdir -p $(BUILD)/source
	tar xf $(ORIGINAL_DOC) -C $(BUILD)/source

copy: extract $(CONTENTS)
	cp -av $(BUILD)/source/htmlman/. $(CONTENTS)

.PHONY: extract

A  => Pipfile +12 -0
@@ 1,12 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]

[packages]
beautifulsoup4 = "==4.4.0"

[requires]
python_version = "3.7"

A  => Pipfile.lock +30 -0
@@ 1,30 @@
{
    "_meta": {
        "hash": {
            "sha256": "cdbce09c5c844c3ea54b79ebecc701da7b6c93518f7dda706bd261fced4dcb69"
        },
        "pipfile-spec": 6,
        "requires": {
            "python_version": "3.7"
        },
        "sources": [
            {
                "name": "pypi",
                "url": "https://pypi.org/simple",
                "verify_ssl": true
            }
        ]
    },
    "default": {
        "beautifulsoup4": {
            "hashes": [
                "sha256:a43204aaee0ef590a6073470f094564ddef6aca5f68c143c99262f8b4923af8c",
                "sha256:b702721f8722c94fbd2fbdfb42183c4b5b3d8871186b92a8898e2dfe4869aadb",
                "sha256:fad91da88f69438b9ba939ab1b2cabaa31b1d914f1cccb4bb157a993ed2917f6"
            ],
            "index": "pypi",
            "version": "==4.4.0"
        }
    },
    "develop": {}
}

A  => mkindex.py +118 -0
@@ 1,118 @@
#!/usr/bin/env python3

import os
import re

from bs4 import BeautifulSoup

TYPE_CONSTRUCTOR = 'Constructor'
TYPE_EXCEPTION   = 'Exception'
TYPE_FIELD       = 'Field'
TYPE_FUNCTION    = 'Function'
TYPE_LIBRARY     = 'Library'
TYPE_MODULE      = 'Module'
TYPE_TYPE        = 'Type'
TYPE_VALUE       = 'Value'

RE_LIBRARY_CHAPTER = re.compile(r'.+The ([^ ]+) library')

def add_index(name, typ, path):
    print(f'{name:32s}  {typ:12s}  {path}')

def contains(node, string):
    for s in node.strings:
        if string in s:
            return True
    return False

def run(filename):
    with open(filename) as fp:
        soup = BeautifulSoup(fp, 'html.parser')
    soup.made_changes = False
    h1 = soup.find('h1')
    if h1 is None:
        print('WARN: no h1: ' + filename)
        return
    h1_content = list(h1.stripped_strings)
    libmatch = RE_LIBRARY_CHAPTER.fullmatch(' '.join(h1_content))
    def anchor(id):
        return filename + '#' + id
    if h1_content[0].startswith('Module'):
        module_name = h1_content[1]
        add_index(module_name, TYPE_MODULE, filename)
        handle_module(filename, module_name, soup)
    elif libmatch is not None:
        libname = libmatch.group(1)
        add_index(libname, TYPE_LIBRARY, anchor(h1['id']))
        handle_library(filename, libname, soup)
    else:
        print('WARN: no module: ' + filename)
        return

RE_LIB_TYPE = re.compile(r'type (?:.+ |)([a-zA-Z_][a-zA-Z0-9_]*)')
RE_LIB_EXN = re.compile(r'exception ([a-zA-Z_][a-zA-Z0-9_]*)(?: of .+|)')

def handle_library(filename, library_name, soup):
    def anchor(id):
        return filename + '#' + id

    next_id = {'id': 0}
    def autoid():
        id, next_id['id'] = next_id['id'], next_id['id'] + 1
        return f'autoid_{id:04x}'
    def getid(element):
        if 'id' not in element.attrs:
            element['id'] = autoid()
        return element['id']

    for pre in soup.find_all('pre'):
        pretext = ' '.join(pre.stripped_strings)
        m_type = RE_LIB_TYPE.fullmatch(pretext)
        if m_type is not None:
            typname = m_type.group(1)
            add_index(typname, TYPE_TYPE, anchor(getid(pre)))
            continue

        m_exn = RE_LIB_EXN.fullmatch(pretext)
        if m_exn is not None:
            exnname = m_exn.group(1)
            add_index(exnname, TYPE_EXCEPTION, anchor(getid(pre)))
            continue

def handle_module(filename, module_name, soup):
    def anchor(id):
        return filename + '#' + id

    for span in soup.find_all('span', id=True):
        spanid = span['id']
        if spanid.startswith('TYPEELT'):
            name = spanid[7:]
            # this can either be a constructor or a record field
            full_code = ' '.join(span.parent.stripped_strings)
            if ':' in full_code:
                add_index(f'{module_name}.{name}', TYPE_FIELD, anchor(spanid))
            else:
                add_index(f'{module_name}.{name}', TYPE_CONSTRUCTOR, anchor(spanid))
        elif spanid.startswith('TYPE'):
            name = spanid[4:]
            add_index(f'{module_name}.{name}', TYPE_TYPE, anchor(spanid))
        elif spanid.startswith('EXCEPTION'):
            name = spanid[9:]
            add_index(f'{module_name}.{name}', TYPE_EXCEPTION, anchor(spanid))
        elif spanid.startswith('VAL'):
            name = spanid[3:]
            if contains(span.parent, '->'):
                valtype = TYPE_FUNCTION
            else:
                valtype = TYPE_VALUE
            add_index(f'{module_name}.{name}', valtype, anchor(spanid))
            # print(list(span.parent.strings))

if __name__ == '__main__':
    import sys
    import traceback
    for filename in sys.argv[1:]:
        try:
            run(filename)
        except:
            traceback.print_exc()