gregtaole/pyParseDep.go

## pyParseDep.go
package main

import (
	"bufio"
	"flag"
	"fmt"
	"os"
	"path/filepath"
	"regexp"
	"strings"
	"sync"
)

var wg sync.WaitGroup
var pyStdlib = []string{
	"__future__",
	"__main__",
	"_dummy_thread",
	"_thread",
	"abc",
	"aifc",
	"argparse",
	"array",
	"ast",
	"asynchat",
	"asyncio",
	"asyncore",
	"atexit",
	"audioop",
	"base64",
	"bdb",
	"binascii",
	"binhex",
	"bisect",
	"builtins",
	"bz2",
	"calendar",
	"cgi",
	"cgitb",
	"chunk",
	"cmath",
	"cmd",
	"code",
	"codecs",
	"codeop",
	"collections",
	"collections.abc",
	"colorsys",
	"compileall",
	"concurrent",
	"concurrent.futures",
	"configparser",
	"contextlib",
	"copy",
	"copyreg",
	"cProfile",
	"crypt",
	"csv",
	"ctypes",
	"curses",
	"curses.ascii",
	"curses.panel",
	"curses.textpad",
	"datetime",
	"dbm",
	"dbm.dumb",
	"dbm.gnu",
	"dbm.ndbm",
	"decimal",
	"difflib",
	"dis",
	"distutils",
	"distutils.archive_util",
	"distutils.bcppcompiler",
	"distutils.ccompiler",
	"distutils.cmd",
	"distutils.command",
	"distutils.command.bdist",
	"distutils.command.bdist_dumb",
	"distutils.command.bdist_msi",
	"distutils.command.bdist_packager",
	"distutils.command.bdist_rpm",
	"distutils.command.bdist_wininst",
	"distutils.command.build",
	"distutils.command.build_clib",
	"distutils.command.build_ext",
	"distutils.command.build_py",
	"distutils.command.build_scripts",
	"distutils.command.check",
	"distutils.command.clean",
	"distutils.command.config",
	"distutils.command.install",
	"distutils.command.install_data",
	"distutils.command.install_headers",
	"distutils.command.install_lib",
	"distutils.command.install_scripts",
	"distutils.command.register",
	"distutils.command.sdist",
	"distutils.core",
	"distutils.cygwinccompiler",
	"distutils.debug",
	"distutils.dep_util",
	"distutils.dir_util",
	"distutils.dist",
	"distutils.errors",
	"distutils.extension",
	"distutils.fancy_getopt",
	"distutils.file_util",
	"distutils.filelist",
	"distutils.log",
	"distutils.msvccompiler",
	"distutils.spawn",
	"distutils.sysconfig",
	"distutils.text_file",
	"distutils.unixccompiler",
	"distutils.util",
	"distutils.version",
	"doctest",
	"dummy_threading",
	"email",
	"email.charset",
	"email.contentmanager",
	"email.encoders",
	"email.errors",
	"email.generator",
	"email.header",
	"email.headerregistry",
	"email.iterators",
	"email.message",
	"email.mime",
	"email.parser",
	"email.policy",
	"email.utils",
	"encodings",
	"encodings.idna",
	"encodings.mbcs",
	"encodings.utf_8_sig",
	"ensurepip",
	"enum",
	"errno",
	"faulthandler",
	"fcntl",
	"filecmp",
	"fileinput",
	"fnmatch",
	"formatter",
	"fpectl",
	"fractions",
	"ftplib",
	"functools",
	"gc",
	"getopt",
	"getpass",
	"gettext",
	"glob",
	"grp",
	"gzip",
	"hashlib",
	"heapq",
	"hmac",
	"html",
	"html.entities",
	"html.parser",
	"http",
	"http.client",
	"http.cookiejar",
	"http.cookies",
	"http.server",
	"imaplib",
	"imghdr",
	"imp",
	"importlib",
	"importlib.abc",
	"importlib.machinery",
	"importlib.util",
	"inspect",
	"io",
	"ipaddress",
	"itertools",
	"json",
	"json.tool",
	"keyword",
	"lib2to3",
	"linecache",
	"locale",
	"logging",
	"logging.config",
	"logging.handlers",
	"lzma",
	"macpath",
	"mailbox",
	"mailcap",
	"marshal",
	"math",
	"mimetypes",
	"mmap",
	"modulefinder",
	"msilib",
	"msvcrt",
	"multiprocessing",
	"multiprocessing.connection",
	"multiprocessing.dummy",
	"multiprocessing.managers",
	"multiprocessing.pool",
	"multiprocessing.sharedctypes",
	"netrc",
	"nis",
	"nntplib",
	"numbers",
	"operator",
	"optparse",
	"os",
	"os.path",
	"ossaudiodev",
	"parser",
	"pathlib",
	"pdb",
	"pickle",
	"pickletools",
	"pipes",
	"pkgutil",
	"platform",
	"plistlib",
	"poplib",
	"posix",
	"pprint",
	"profile",
	"pstats",
	"pty",
	"pwd",
	"py_compile",
	"pyclbr",
	"pydoc",
	"queue",
	"quopri",
	"random",
	"re",
	"readline",
	"reprlib",
	"resource",
	"rlcompleter",
	"runpy",
	"sched",
	"secrets",
	"select",
	"selectors",
	"shelve",
	"shlex",
	"shutil",
	"signal",
	"site",
	"smtpd",
	"smtplib",
	"sndhdr",
	"socket",
	"socketserver",
	"spwd",
	"sqlite3",
	"ssl",
	"stat",
	"statistics",
	"string",
	"stringprep",
	"struct",
	"subprocess",
	"sunau",
	"symbol",
	"symtable",
	"sys",
	"sysconfig",
	"syslog",
	"tabnanny",
	"tarfile",
	"telnetlib",
	"tempfile",
	"termios",
	"test",
	"test.support",
	"textwrap",
	"threading",
	"time",
	"timeit",
	"tkinter",
	"tkinter.scrolledtext",
	"tkinter.tix",
	"tkinter.ttk",
	"token",
	"tokenize",
	"trace",
	"traceback",
	"tracemalloc",
	"tty",
	"turtle",
	"turtledemo",
	"types",
	"typing",
	"unicodedata",
	"unittest",
	"unittest.mock",
	"urllib",
	"urllib.error",
	"urllib.parse",
	"urllib.request",
	"urllib.response",
	"urllib.robotparser",
	"uu",
	"uuid",
	"venv",
	"warnings",
	"wave",
	"weakref",
	"webbrowser",
	"winreg",
	"winsound",
	"wsgiref",
	"wsgiref.handlers",
	"wsgiref.headers",
	"wsgiref.simple_server",
	"wsgiref.util",
	"wsgiref.validate",
	"xdrlib",
	"xml",
	"xml.dom",
	"xml.dom.minidom",
	"xml.dom.pulldom",
	"xml.etree.ElementTree",
	"xml.parsers.expat",
	"xml.parsers.expat.errors",
	"xml.parsers.expat.model",
	"xml.sax",
	"xml.sax.handler",
	"xml.sax.saxutils",
	"xml.sax.xmlreader",
	"xmlrpc",
	"xmlrpc.client",
	"xmlrpc.server",
	"zipapp",
	"zipfile",
	"zipimport",
	"zlib",
}

func main() {
	pathFlag := flag.String("d", ".", "Path to the directory containing the python source files")
	excludeFlag := flag.String("e", "__pycache__", "Comma-separated list of directories to exclude")
	flag.Parse()

	excludeDirs := strings.Split(*excludeFlag, ",")
	pyFiles := make([]string, 0)

	err := filepath.Walk(*pathFlag, func(path string, info os.FileInfo, err error) error {
		if err != nil {
			return fmt.Errorf("could not read filepath %q : %v", *pathFlag, err)
		}
		for _, dir := range excludeDirs {
			if info.IsDir() && info.Name() == dir {
				return filepath.SkipDir
			}
		}
		matched, err := regexp.MatchString(".py", path)
		if err != nil {
			return fmt.Errorf("error applying regular expression to %q : %v", path, err)
		}
		if matched {
			pyFiles = append(pyFiles, path)
		}
		return nil
	})
	if err != nil {
		fmt.Printf("error while walking the directory tree at %q : %v", *pathFlag, err)
	}

	importsChan := make(chan string)
	errChan := make(chan error)

	wg.Add(len(pyFiles))
	for _, file := range pyFiles {
		go findImports(file, importsChan, errChan)
	}

	imports := make([]string, 0)
	go func() {
		for val := range importsChan {
			imports = append(imports, val)
		}
	}()

	go func() {
		for err := range errChan {
			fmt.Fprintf(os.Stderr, "%v", err)
		}
	}()
	wg.Wait()

	packagesChan := make(chan string)
	packages := make([]string, 0)
	for _, importString := range imports {
		wg.Add(1)
		go ParseImports(importString, packagesChan, errChan)
	}
	go func() {
		for pack := range packagesChan {
			packages = append(packages, pack)
		}
	}()
	wg.Wait()
	uniq := unique(packages)
	clean := removeStdlibAndUser(uniq, pyFiles)

	for _, mod := range clean {
		fmt.Println(mod)
	}
}

func findImports(filePath string, importsChan chan<- string, errChan chan<- error) {
	defer wg.Done()
	file, err := os.Open(filePath)
	if err != nil {
		errChan <- fmt.Errorf("could open file %v for reading : %v", filePath, err)
		return
	}
	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
		text := scanner.Text()
		matches, err := regexp.MatchString("^import|from.*import", text)
		if err != nil {
			errChan <- fmt.Errorf("error while parsing regular expression ^import|from.*import : %v", err)
			return
		}
		if matches {
			importsChan <- text
		}
	}
	if scanner.Err() != nil {
		errChan <- fmt.Errorf("error while scanning %v : %v", filePath, err)
	}
}

/*
ParseImports extracts the name of the python library contained in importString
*/
func ParseImports(importString string, packagesChan chan<- string, errChan chan<- error) {
	defer wg.Done()

	imp := strings.Split(importString, " ")[1]
	if strings.Contains(imp, ".") {
		module := strings.Split(imp, ".")[0]
		packagesChan <- module
		return
	}
	packagesChan <- imp
}

func unique(imports []string) []string {
	uniqueImports := make(map[string]bool)
	for _, imp := range imports {
		_, ok := uniqueImports[imp]
		if !ok {
			uniqueImports[imp] = true
		}
	}
	uniqueImportsList := make([]string, 0)
	for key := range uniqueImports {
		uniqueImportsList = append(uniqueImportsList, key)
	}
	return uniqueImportsList
}

func removeStdlibAndUser(uniq, pyFiles []string) []string {
	clean := make([]string, 0)
	for _, mod := range uniq {
		found := false
		for _, ex := range pyStdlib {
			if mod == ex {
				found = true
			}
		}
		for _, ex := range pyFiles {
			if strings.Contains(ex, mod) {
				found = true
			}
		}
		if !found {
			clean = append(clean, mod)
		}
	}
	return clean
}

## pyStdlib.go
package main

import (
	"bytes"
	"errors"
	"fmt"
	"io"
	"log"
	"net/http"
	"regexp"

	"golang.org/x/net/html"
)

const url = "https://docs.python.org/3/py-modindex.html"

func main() {
	resp, err := http.Get(url)
	if err != nil {
		log.Fatalf("could not get url %v : %v", url, err)
	}
	defer resp.Body.Close()

	doc, err := html.Parse(resp.Body)
	if err != nil {
		log.Fatalf("could not parse response body : %v", err)
	}

	content, err := getContent(doc)
	if err != nil {
		log.Fatalf("could not get content from parsed document : %v", err)
	}
	fmt.Println("[]string{")
	re := regexp.MustCompile("(<code class=\"xref\">)|(</code>)")
	for _, mod := range content {
		fmt.Printf("\t\"%v\",\n", re.ReplaceAllString(string(renderNode(mod)), ""))
	}
	fmt.Println("}")
}

func getContent(doc *html.Node) ([]*html.Node, error) {
	modules := make([]*html.Node, 0)
	var f func(*html.Node)
	f = func(n *html.Node) {
		if n.Type == html.ElementNode {
			for _, attr := range n.Attr {
				if attr.Val == "xref" {
					modules = append(modules, n)
				}
			}
		}
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			f(c)
		}
	}
	f(doc)

	if len(modules) == 0 {
		return nil, errors.New("\"code\" tag not found")
	}
	return modules, nil
}

func renderNode(n *html.Node) []byte {
	var buf bytes.Buffer
	w := io.Writer(&buf)
	html.Render(w, n)
	return buf.Bytes()
}
	package main

	import (
	"bufio"
	"flag"
	"fmt"
	"os"
	"path/filepath"
	"regexp"
	"strings"
	"sync"
	)

	var wg sync.WaitGroup
	var pyStdlib = []string{
	"__future__",
	"__main__",
	"_dummy_thread",
	"_thread",
	"abc",
	"aifc",
	"argparse",
	"array",
	"ast",
	"asynchat",
	"asyncio",
	"asyncore",
	"atexit",
	"audioop",
	"base64",
	"bdb",
	"binascii",
	"binhex",
	"bisect",
	"builtins",
	"bz2",
	"calendar",
	"cgi",
	"cgitb",
	"chunk",
	"cmath",
	"cmd",
	"code",
	"codecs",
	"codeop",
	"collections",
	"collections.abc",
	"colorsys",
	"compileall",
	"concurrent",
	"concurrent.futures",
	"configparser",
	"contextlib",
	"copy",
	"copyreg",
	"cProfile",
	"crypt",
	"csv",
	"ctypes",
	"curses",
	"curses.ascii",
	"curses.panel",
	"curses.textpad",
	"datetime",
	"dbm",
	"dbm.dumb",
	"dbm.gnu",
	"dbm.ndbm",
	"decimal",
	"difflib",
	"dis",
	"distutils",
	"distutils.archive_util",
	"distutils.bcppcompiler",
	"distutils.ccompiler",
	"distutils.cmd",
	"distutils.command",
	"distutils.command.bdist",
	"distutils.command.bdist_dumb",
	"distutils.command.bdist_msi",
	"distutils.command.bdist_packager",
	"distutils.command.bdist_rpm",
	"distutils.command.bdist_wininst",
	"distutils.command.build",
	"distutils.command.build_clib",
	"distutils.command.build_ext",
	"distutils.command.build_py",
	"distutils.command.build_scripts",
	"distutils.command.check",
	"distutils.command.clean",
	"distutils.command.config",
	"distutils.command.install",
	"distutils.command.install_data",
	"distutils.command.install_headers",
	"distutils.command.install_lib",
	"distutils.command.install_scripts",
	"distutils.command.register",
	"distutils.command.sdist",
	"distutils.core",
	"distutils.cygwinccompiler",
	"distutils.debug",
	"distutils.dep_util",
	"distutils.dir_util",
	"distutils.dist",
	"distutils.errors",
	"distutils.extension",
	"distutils.fancy_getopt",
	"distutils.file_util",
	"distutils.filelist",
	"distutils.log",
	"distutils.msvccompiler",
	"distutils.spawn",
	"distutils.sysconfig",
	"distutils.text_file",
	"distutils.unixccompiler",
	"distutils.util",
	"distutils.version",
	"doctest",
	"dummy_threading",
	"email",
	"email.charset",
	"email.contentmanager",
	"email.encoders",
	"email.errors",
	"email.generator",
	"email.header",
	"email.headerregistry",
	"email.iterators",
	"email.message",
	"email.mime",
	"email.parser",
	"email.policy",
	"email.utils",
	"encodings",
	"encodings.idna",
	"encodings.mbcs",
	"encodings.utf_8_sig",
	"ensurepip",
	"enum",
	"errno",
	"faulthandler",
	"fcntl",
	"filecmp",
	"fileinput",
	"fnmatch",
	"formatter",
	"fpectl",
	"fractions",
	"ftplib",
	"functools",
	"gc",
	"getopt",
	"getpass",
	"gettext",
	"glob",
	"grp",
	"gzip",
	"hashlib",
	"heapq",
	"hmac",
	"html",
	"html.entities",
	"html.parser",
	"http",
	"http.client",
	"http.cookiejar",
	"http.cookies",
	"http.server",
	"imaplib",
	"imghdr",
	"imp",
	"importlib",
	"importlib.abc",
	"importlib.machinery",
	"importlib.util",
	"inspect",
	"io",
	"ipaddress",
	"itertools",
	"json",
	"json.tool",
	"keyword",
	"lib2to3",
	"linecache",
	"locale",
	"logging",
	"logging.config",
	"logging.handlers",
	"lzma",
	"macpath",
	"mailbox",
	"mailcap",
	"marshal",
	"math",
	"mimetypes",
	"mmap",
	"modulefinder",
	"msilib",
	"msvcrt",
	"multiprocessing",
	"multiprocessing.connection",
	"multiprocessing.dummy",
	"multiprocessing.managers",
	"multiprocessing.pool",
	"multiprocessing.sharedctypes",
	"netrc",
	"nis",
	"nntplib",
	"numbers",
	"operator",
	"optparse",
	"os",
	"os.path",
	"ossaudiodev",
	"parser",
	"pathlib",
	"pdb",
	"pickle",
	"pickletools",
	"pipes",
	"pkgutil",
	"platform",
	"plistlib",
	"poplib",
	"posix",
	"pprint",
	"profile",
	"pstats",
	"pty",
	"pwd",
	"py_compile",
	"pyclbr",
	"pydoc",
	"queue",
	"quopri",
	"random",
	"re",
	"readline",
	"reprlib",
	"resource",
	"rlcompleter",
	"runpy",
	"sched",
	"secrets",
	"select",
	"selectors",
	"shelve",
	"shlex",
	"shutil",
	"signal",
	"site",
	"smtpd",
	"smtplib",
	"sndhdr",
	"socket",
	"socketserver",
	"spwd",
	"sqlite3",
	"ssl",
	"stat",
	"statistics",
	"string",
	"stringprep",
	"struct",
	"subprocess",
	"sunau",
	"symbol",
	"symtable",
	"sys",
	"sysconfig",
	"syslog",
	"tabnanny",
	"tarfile",
	"telnetlib",
	"tempfile",
	"termios",
	"test",
	"test.support",
	"textwrap",
	"threading",
	"time",
	"timeit",
	"tkinter",
	"tkinter.scrolledtext",
	"tkinter.tix",
	"tkinter.ttk",
	"token",
	"tokenize",
	"trace",
	"traceback",
	"tracemalloc",
	"tty",
	"turtle",
	"turtledemo",
	"types",
	"typing",
	"unicodedata",
	"unittest",
	"unittest.mock",
	"urllib",
	"urllib.error",
	"urllib.parse",
	"urllib.request",
	"urllib.response",
	"urllib.robotparser",
	"uu",
	"uuid",
	"venv",
	"warnings",
	"wave",
	"weakref",
	"webbrowser",
	"winreg",
	"winsound",
	"wsgiref",
	"wsgiref.handlers",
	"wsgiref.headers",
	"wsgiref.simple_server",
	"wsgiref.util",
	"wsgiref.validate",
	"xdrlib",
	"xml",
	"xml.dom",
	"xml.dom.minidom",
	"xml.dom.pulldom",
	"xml.etree.ElementTree",
	"xml.parsers.expat",
	"xml.parsers.expat.errors",
	"xml.parsers.expat.model",
	"xml.sax",
	"xml.sax.handler",
	"xml.sax.saxutils",
	"xml.sax.xmlreader",
	"xmlrpc",
	"xmlrpc.client",
	"xmlrpc.server",
	"zipapp",
	"zipfile",
	"zipimport",
	"zlib",
	}

	func main() {
	pathFlag := flag.String("d", ".", "Path to the directory containing the python source files")
	excludeFlag := flag.String("e", "__pycache__", "Comma-separated list of directories to exclude")
	flag.Parse()

	excludeDirs := strings.Split(*excludeFlag, ",")
	pyFiles := make([]string, 0)

	err := filepath.Walk(*pathFlag, func(path string, info os.FileInfo, err error) error {
	if err != nil {
	return fmt.Errorf("could not read filepath %q : %v", *pathFlag, err)
	}
	for _, dir := range excludeDirs {
	if info.IsDir() && info.Name() == dir {
	return filepath.SkipDir
	}
	}
	matched, err := regexp.MatchString(".py", path)
	if err != nil {
	return fmt.Errorf("error applying regular expression to %q : %v", path, err)
	}
	if matched {
	pyFiles = append(pyFiles, path)
	}
	return nil
	})
	if err != nil {
	fmt.Printf("error while walking the directory tree at %q : %v", *pathFlag, err)
	}

	importsChan := make(chan string)
	errChan := make(chan error)

	wg.Add(len(pyFiles))
	for _, file := range pyFiles {
	go findImports(file, importsChan, errChan)
	}

	imports := make([]string, 0)
	go func() {
	for val := range importsChan {
	imports = append(imports, val)
	}
	}()

	go func() {
	for err := range errChan {
	fmt.Fprintf(os.Stderr, "%v", err)
	}
	}()
	wg.Wait()

	packagesChan := make(chan string)
	packages := make([]string, 0)
	for _, importString := range imports {
	wg.Add(1)
	go ParseImports(importString, packagesChan, errChan)
	}
	go func() {
	for pack := range packagesChan {
	packages = append(packages, pack)
	}
	}()
	wg.Wait()
	uniq := unique(packages)
	clean := removeStdlibAndUser(uniq, pyFiles)

	for _, mod := range clean {
	fmt.Println(mod)
	}
	}

	func findImports(filePath string, importsChan chan<- string, errChan chan<- error) {
	defer wg.Done()
	file, err := os.Open(filePath)
	if err != nil {
	errChan <- fmt.Errorf("could open file %v for reading : %v", filePath, err)
	return
	}
	scanner := bufio.NewScanner(file)
	for scanner.Scan() {
	text := scanner.Text()
	matches, err := regexp.MatchString("^import\|from.*import", text)
	if err != nil {
	errChan <- fmt.Errorf("error while parsing regular expression ^import\|from.*import : %v", err)
	return
	}
	if matches {
	importsChan <- text
	}
	}
	if scanner.Err() != nil {
	errChan <- fmt.Errorf("error while scanning %v : %v", filePath, err)
	}
	}

	/*
	ParseImports extracts the name of the python library contained in importString
	*/
	func ParseImports(importString string, packagesChan chan<- string, errChan chan<- error) {
	defer wg.Done()

	imp := strings.Split(importString, " ")[1]
	if strings.Contains(imp, ".") {
	module := strings.Split(imp, ".")[0]
	packagesChan <- module
	return
	}
	packagesChan <- imp
	}

	func unique(imports []string) []string {
	uniqueImports := make(map[string]bool)
	for _, imp := range imports {
	_, ok := uniqueImports[imp]
	if !ok {
	uniqueImports[imp] = true
	}
	}
	uniqueImportsList := make([]string, 0)
	for key := range uniqueImports {
	uniqueImportsList = append(uniqueImportsList, key)
	}
	return uniqueImportsList
	}

	func removeStdlibAndUser(uniq, pyFiles []string) []string {
	clean := make([]string, 0)
	for _, mod := range uniq {
	found := false
	for _, ex := range pyStdlib {
	if mod == ex {
	found = true
	}
	}
	for _, ex := range pyFiles {
	if strings.Contains(ex, mod) {
	found = true
	}
	}
	if !found {
	clean = append(clean, mod)
	}
	}
	return clean
	}
	package main

	import (
	"bytes"
	"errors"
	"fmt"
	"io"
	"log"
	"net/http"
	"regexp"

	"golang.org/x/net/html"
	)

	const url = "https://docs.python.org/3/py-modindex.html"

	func main() {
	resp, err := http.Get(url)
	if err != nil {
	log.Fatalf("could not get url %v : %v", url, err)
	}
	defer resp.Body.Close()

	doc, err := html.Parse(resp.Body)
	if err != nil {
	log.Fatalf("could not parse response body : %v", err)
	}

	content, err := getContent(doc)
	if err != nil {
	log.Fatalf("could not get content from parsed document : %v", err)
	}
	fmt.Println("[]string{")
	re := regexp.MustCompile("(<code class=\"xref\">)\|(</code>)")
	for _, mod := range content {
	fmt.Printf("\t\"%v\",\n", re.ReplaceAllString(string(renderNode(mod)), ""))
	}
	fmt.Println("}")
	}

	func getContent(doc html.Node) ([]html.Node, error) {
	modules := make([]*html.Node, 0)
	var f func(*html.Node)
	f = func(n *html.Node) {
	if n.Type == html.ElementNode {
	for _, attr := range n.Attr {
	if attr.Val == "xref" {
	modules = append(modules, n)
	}
	}
	}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
	f(c)
	}
	}
	f(doc)

	if len(modules) == 0 {
	return nil, errors.New("\"code\" tag not found")
	}
	return modules, nil
	}

	func renderNode(n *html.Node) []byte {
	var buf bytes.Buffer
	w := io.Writer(&buf)
	html.Render(w, n)
	return buf.Bytes()
	}