#!/usr/bin/env python3

# Copyright (C) 2020  Matthew "strager" Glazar
# See end of file for extended copyright information.

"""Generate quick-lint-js copyright documentation.

Run this program to generate a text file listing copyrights and licenses for
software used by quick-lint-js (and quick-lint-js' own copyright and license).
"""

import argparse
import collections
import json
import logging
import os
import pathlib
import re
import subprocess
import sys
import tarfile
import typing
import unittest
import urllib.parse

logger = logging.getLogger(__name__)

# Initialized in main.
global_vendor_projects: typing.List["VendorProject"] = []


if typing.TYPE_CHECKING:

    class Project(typing.Protocol):
        def print_copyright(self) -> None:
            ...


class VendorProject(typing.NamedTuple):
    """A project listed in vendor/README.txt."""

    name: str

    def print_copyright(self) -> None:
        readme_project = self._readme_project
        license_files = readme_project.properties["License file"].split("; ")
        license = (readme_project.vendor_directory / license_files[0]).read_text()

        print(
            f"""\
## {self.name}

Version {readme_project.properties['Version']} of {self.name} is statically linked into quick-lint-js.

Source code for {self.name} is available at:
* {readme_project.properties['Download URL']}

Copyright information for {self.name}:
{markdown_block_quote(license)}
"""
        )

    @property
    def _readme_project(self) -> "VendorReadmeProject":
        matching_vendor_projects = [
            p for p in global_vendor_projects if p.project_name == self.name
        ]
        if len(matching_vendor_projects) != 1:
            raise Exception(
                f"could not find vendor project for project name: {self.name}"
            ) from None
        return matching_vendor_projects[0]


class QLJSSubproject(typing.NamedTuple):
    """A sub-project of quick-lint-js. Has no additional copyright information."""

    def print_copyright(self) -> None:
        pass


class IgnoredLibrary(typing.NamedTuple):
    """HACK(strager)"""

    def print_copyright(self) -> None:
        pass


class WindowsSoftwareImportLibrary(typing.NamedTuple):
    """A Windows implib containing only imports to DLLs not shipped with
    Windows.
    """

    def print_copyright(self) -> None:
        # TODO(strager): Should we mention dynamic linking?
        pass


class WindowsSystemImportLibrary(typing.NamedTuple):
    """A Windows implib containing only imports to system DLLs."""

    def print_copyright(self) -> None:
        # TODO(strager): Should we mention dynamic linking
        # to the OS?
        pass


archive_to_project = {
    "libmongoose.a": VendorProject("simdjson"),
    "libsimdjson.a": VendorProject("simdjson"),
    "mongoose.lib": VendorProject("simdjson"),
    "simdjson.lib": VendorProject("simdjson"),
    #
    "libnode-hook.a": QLJSSubproject(),
    "libquick-lint-js-diagnostic-metadata.a": QLJSSubproject(),
    "libquick-lint-js-i18n.a": QLJSSubproject(),
    "libquick-lint-js-lib-debug.a": QLJSSubproject(),
    "libquick-lint-js-lib.a": QLJSSubproject(),
    "libquick-lint-js-tool-lib.a": QLJSSubproject(),
    "libquick-lint-js-trace.a": QLJSSubproject(),
    "node-hook.lib": QLJSSubproject(),
    "quick-lint-js-diagnostic-metadata.lib": QLJSSubproject(),
    "quick-lint-js-i18n.lib": QLJSSubproject(),
    "quick-lint-js-lib-debug.lib": QLJSSubproject(),
    "quick-lint-js-lib.lib": QLJSSubproject(),
    "quick-lint-js-tool-lib.lib": QLJSSubproject(),
    "quick-lint-js-trace.lib": QLJSSubproject(),
    #
    "node-napi.lib": WindowsSoftwareImportLibrary(),
    #
    "pathcch.lib": WindowsSystemImportLibrary(),
    "wsock32.lib": WindowsSystemImportLibrary(),
    # HACK(strager): Ignore these libraries for now.
    "LIBCMT.lib": IgnoredLibrary(),
    "LIBCMTD.lib": IgnoredLibrary(),
    "MSVCRT.lib": IgnoredLibrary(),
    "MSVCRTD.lib": IgnoredLibrary(),
    "delayimp.lib": IgnoredLibrary(),
    "kernel32.lib": IgnoredLibrary(),
    "libcpmt.lib": IgnoredLibrary(),
    "libcpmtd.lib": IgnoredLibrary(),
    "libucrt.lib": IgnoredLibrary(),
    "libucrtd.lib": IgnoredLibrary(),
    "libvcruntime.lib": IgnoredLibrary(),
    "libvcruntimed.lib": IgnoredLibrary(),
    "msvcprt.lib": IgnoredLibrary(),
    "msvcprtd.lib": IgnoredLibrary(),
    "ucrt.lib": IgnoredLibrary(),
    "ucrtd.lib": IgnoredLibrary(),
    "vcruntime.lib": IgnoredLibrary(),
    "vcruntimed.lib": IgnoredLibrary(),
}


def main() -> None:
    logging.basicConfig()

    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument(
        "--build-directory",
        help="Required for PE/COFF LLD traces",
        required=False,
        type=pathlib.Path,
    )
    parser.add_argument(
        "--linkmap",
        help="File generated by an Mach-O linker's -map option or an ELF linker's -Map option or a PE linker's /MAP option",
        required=True,
    )
    parser.add_argument(
        "--cross-compiling-emscripten",
        action="store_true",
        default=False,
        help="Don't assume system package manager manages the standard library",
    )
    parser.add_argument(
        "--extra-vendor-projects",
        default=[],
        help="Include license information for these projects from --vendor-readme",
        nargs="+",
    )
    parser.add_argument(
        "--vendor-readme",
        required=False,
        default=str(pathlib.Path(__file__).parent / ".." / "vendor" / "README.txt"),
    )
    parser.add_argument(
        "--verbose",
        action="store_true",
        default=False,
        help="Print debugging messages",
    )
    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    qljs_license_text = (pathlib.Path(__file__).parent / ".." / "COPYING").read_text()
    print(
        f"""\
# quick-lint-js licenses

The quick-lint-js program is Copyright (C) 2020 Matthew "strager" Glazar. It is
distributed under the following license:
{markdown_block_quote(qljs_license_text)}

Parts of quick-lint-js might be distributed under different licenses:

# Arm Optimized Routines

src/quick-lint-js/simd-neon-arm.h in quick-lint-js is derived from
string/aarch64/memchr.S in Arm Optimized Routines.

Source code of Arm Optimized Routines is available at:
* https://github.com/ARM-software/optimized-routines/tree/7a9fd1603e1179b044406fb9b6cc5770d736cde7

Copyright information for Arm Optimized Routines:
> MIT License
>
> Copyright (c) 1999-2019, Arm Limited.
>
> Permission is hereby granted, free of charge, to any person obtaining a copy
> of this software and associated documentation files (the "Software"), to deal
> in the Software without restriction, including without limitation the rights
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> copies of the Software, and to permit persons to whom the Software is
> furnished to do so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in all
> copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> SOFTWARE.

# sse2neon

src/quick-lint-js/simd-neon-arm.h in quick-lint-js is derived from sse2neon.

Source code of Arm Optimized Routines is available at:
* https://github.com/DLTcollab/sse2neon/blob/814935c9ba06f68e9549272dbf5df0db8dab2a00/sse2neon.h

Copyright information for sse2neon:
> sse2neon is freely redistributable under the MIT License.
>
> Permission is hereby granted, free of charge, to any person obtaining a copy
> of this software and associated documentation files (the "Software"), to deal
> in the Software without restriction, including without limitation the rights
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> copies of the Software, and to permit persons to whom the Software is
> furnished to do so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in
> all copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> SOFTWARE.
"""
    )

    vendor_readme_file = pathlib.Path(args.vendor_readme)
    global global_vendor_projects
    global_vendor_projects = parse_vendor_readme_txt(
        vendor_readme_file.read_text(), vendor_directory=vendor_readme_file.parent
    )

    dump_from_linkmap(
        link_map_file=pathlib.Path(args.linkmap),
        build_directory=args.build_directory,
        is_cross_compiling_emscripten=args.cross_compiling_emscripten,
        extra_vendor_projects=args.extra_vendor_projects,
    )


def dump_from_linkmap(
    link_map_file: pathlib.Path,
    build_directory: typing.Optional[pathlib.Path],
    is_cross_compiling_emscripten: bool,
    extra_vendor_projects: typing.List[str],
) -> None:
    if tarfile.is_tarfile(link_map_file):
        if build_directory is None:
            raise Exception("--build-directory is required")
        link_map = parse_coff_lld_link_map(
            link_map_file, build_directory=build_directory
        )
    else:
        link_map = parse_text_link_map(link_map_file.read_text())
    if not link_map.linked_archives:
        warn("link map lists no archives; this is probably a bug in collect-copyright")
    linked_system_archives = [
        file for file in link_map.linked_archives if pathlib.Path(file).is_absolute()
    ]
    linked_vendored_archives = [
        file
        for file in link_map.linked_archives
        if not pathlib.Path(file).is_absolute()
    ]
    dump_from_projects(
        linked_vendored_archives,
        extra_projects=[VendorProject(name) for name in extra_vendor_projects],
    )
    dump_from_system_archives(
        linked_system_archives,
        is_cross_compiling_emscripten=is_cross_compiling_emscripten,
    )


def dump_from_projects(
    archives: typing.List[str],
    extra_projects: typing.List["Project"],
) -> None:
    all_projects = extra_projects
    for archive in sorted(archives):
        try:
            project = archive_to_project[pathlib.Path(archive).name]
        except KeyError:
            raise Exception(f"could not find source for archive: {archive}") from None
        all_projects.append(project)

    all_projects = sorted(set(all_projects))
    for project in all_projects:
        project.print_copyright()


def detect_system_package_manager(
    files: typing.Iterable[str], is_cross_compiling_emscripten: bool
):
    if is_cross_compiling_emscripten:
        return Emscripten()

    if os.environ.get("NIX_STORE", ""):
        warn("Nix not well supported")
        return Nix()

    # HACK(strager): Detect LLVM MinGW as used by our GitHub Actions workers.
    if os.name != "nt":
        llvm_mingw_root = pathlib.Path("/opt/llvm-mingw")
        if any(is_path_relative(root_path=llvm_mingw_root, sub_path=f) for f in files):
            return LLVMMinGW(root=llvm_mingw_root)

    msys_path = find_msys_install_path()
    if msys_path and any(
        is_path_relative(root_path=msys_path, sub_path=f) for f in files
    ):
        return Pacman(
            pacman_exe=msys_path / "usr" / "bin" / "pacman.exe",
            root=msys_path,
            source_uri="https://packages.msys2.org/search?t=binpkg&q=__PACKAGE__",
        )

    try:
        brew_prefix = subprocess.check_output(
            ["brew", "--prefix"], encoding="utf-8"
        ).rstrip("\n")
        if any(is_path_relative(root_path=brew_prefix, sub_path=f) for f in files):
            return Homebrew(brew_prefix=brew_prefix)
    except (subprocess.CalledProcessError, FileNotFoundError, PermissionError):
        pass

    try:
        subprocess.check_output(["rpm", "--query", "glibc"])
        return RedHat()
    except (subprocess.CalledProcessError, FileNotFoundError, PermissionError):
        pass

    try:
        subprocess.check_output(["dpkg-query", "--list", "libc6"])
        return Debian()
    except (subprocess.CalledProcessError, FileNotFoundError, PermissionError):
        pass

    try:
        subprocess.check_output(["pkg", "info", "pkg"])
        return FreeBSD()
    except (subprocess.CalledProcessError, FileNotFoundError, PermissionError):
        pass

    return None


def find_msys_install_path() -> typing.Optional[pathlib.Path]:
    """Figure out where MSYS2 is installed on Windows.

    If MSYS2 is not installed, this function returns None.
    """
    try:
        import winreg

        with winreg.OpenKey(
            winreg.HKEY_CURRENT_USER,
            r"SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\{f77d714f-1d9f-4795-858c-9633aeec04b8}",
        ) as key:
            (path, _type) = winreg.QueryValueEx(key, "InstallLocation")
            return pathlib.Path(path)
    except (FileNotFoundError, ImportError):
        pass

    paths_to_guess = ["c:/msys64"]
    for path_to_guess in paths_to_guess:
        path = pathlib.Path(path_to_guess)
        if path.is_absolute() and (path / "msys2.ini").is_file():
            return path

    return None


def dump_from_system_archives(
    linked_system_archives: typing.List[str], is_cross_compiling_emscripten: bool
) -> None:
    if not linked_system_archives:
        return

    system_package_manager = detect_system_package_manager(
        linked_system_archives,
        is_cross_compiling_emscripten=is_cross_compiling_emscripten,
    )
    if system_package_manager is None:
        warn("Could not detect system package manager")
        print(
            """## Other packages

The following files are statically linked into quick-lint-js. Their copyright and licenses are unknown:
"""
        )
        for archive in sorted(linked_system_archives):
            print(f"* {archive}")
        return

    installed_packages = system_package_manager.packages_for_installed_files(
        linked_system_archives
    )
    source_package_names = set(p.source for p in installed_packages)

    for source_package_name in sorted(source_package_names):
        print(f"## {source_package_name}\n")

        relevant_installed_packages = [
            p for p in sorted(installed_packages) if p.source == source_package_name
        ]

        for p in relevant_installed_packages:
            print(
                f"Version {p.version} of package {p.name} is statically linked into quick-lint-js.\n"
            )

        print(f"Source code for {source_package_name} is available at:")
        source_uris = []
        for p in relevant_installed_packages:
            source_uris.extend(p.source_uris)
        if source_uris:
            for source_uri in sorted(set(source_uris)):
                print(f"* {source_uri}")
        else:
            print(f"* (location unknown!)")

        for p in relevant_installed_packages:
            print(f"\nCopyright information for {p.name}:")
            copyright = system_package_manager.get_copyright(p)
            print(markdown_block_quote(copyright))

        print(f"")


def markdown_block_quote(text: str) -> str:
    return "> " + text.strip("\n").replace("\n", "\n> ")


class Homebrew:
    class InstalledPackage(typing.NamedTuple):
        name: str
        source: str
        version: str
        source_uris: typing.Tuple[str, ...]
        path: str

    class _NameAndVersion(typing.NamedTuple):
        name: str
        version: str

    def __init__(self, brew_prefix: str) -> None:
        self.__brew_prefix = brew_prefix

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        files_from_unknown_formula = set()
        formulae_names_and_versions = set()
        for file in files:
            formula_name_and_version = self._get_formula_name_and_version_from_path(file)
            if formula_name_and_version is not None:
                formulae_names_and_versions.add(formula_name_and_version)
            else:
                files_from_unknown_formula.add(file)
        if files_from_unknown_formula:
            warn(
                f"could not determine formula for files: {' '.join(sorted(files_from_unknown_formula))}"
            )
        return [self._make_installed_package(formula_name=f.name, formula_version=f.version) for f in formulae_names_and_versions]

    def _get_formula_name_and_version_from_path(self, path: str) -> typing.Optional["_NameAndVersion"]:
        try:
            # path_in_brew will look like:
            # Cellar/llvm@13/13.0.1/lib/libc++.a
            path_in_brew = pathlib.Path(os.path.realpath(path)).relative_to(
                self.__brew_prefix
            )
            if len(path_in_brew.parts) >= 2 and path_in_brew.parts[0] == "Cellar":
                return Homebrew._NameAndVersion(
                    name=path_in_brew.parts[1],
                    version=path_in_brew.parts[2],
                )
        except ValueError:
            # Path isn't in the brew prefix.
            return None

    def get_copyright(self, package: InstalledPackage) -> str:
        if re.search(r"^llvm(@\d+)?$", package.name):
            return """\
Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
See https://llvm.org/LICENSE.txt for license information.
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""
        else:
            raise Exception(
                f"could not determine copyright for formula: {package.name}"
            )

    def _make_installed_package(self, formula_name: str, formula_version: str) -> InstalledPackage:
        # TODO(strager): Cache output of 'brew info'.
        brew_info_json = subprocess.check_output(
            [pathlib.Path(self.__brew_prefix) / "bin" / "brew", "info", "--json", "--installed"],
            encoding="utf-8",
        ).rstrip("\n")
        brew_info = json.loads(brew_info_json)

        formula_brew_info = None
        for f in brew_info:
            if f["name"] == formula_name:
                formula_brew_info = f
                break
        if formula_brew_info is None:
            raise Exception(f"Could not find formula {formula_name} in output of 'brew info --installed --json'")

        installed_path = pathlib.Path(self.__brew_prefix) / "opt" / formula_name
        tap = formula_brew_info["tap"]
        if tap != "homebrew/core":
            raise Exception(f"Unsupported tap for formula {formula_name}: {tap}")
        tap_git_commit = formula_brew_info["tap_git_head"]
        formula_rb_repo_path = formula_brew_info["ruby_source_path"]

        return self.InstalledPackage(
            name=formula_name,
            source=formula_name,
            version=formula_version,
            source_uris=[
                f"https://github.com/Homebrew/homebrew-core/blob/{tap_git_commit}/{formula_rb_repo_path}"
            ],
            path=str(installed_path),
        )

    def _is_llvm_file(self, file_path: str) -> bool:
        return is_path_relative(
            sub_path=file_path,
            root_path=pathlib.Path(self.__brew_prefix) / "opt" / "llvm",
        )


class LLVMMinGW:
    """An installation of LLVM MinGW."""

    class ToolchainComponent(typing.NamedTuple):
        name: str
        version: str
        source_uris: typing.Tuple[str, ...]
        license_paths: typing.Tuple[pathlib.Path, ...]

        @property
        def source(self) -> str:
            return self.name

    def __init__(self, root: pathlib.Path) -> None:
        self.__root = root

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[ToolchainComponent]:
        files = list(files)
        for file in files:
            if not is_path_relative(root_path=self.__root, sub_path=file):
                warn(f"found {file} outside LLVM MinGW root ({self.__root})")
        # TODO(strager): Map .a/.lib files to projects, similar to Emscripten.
        return [
            # TODO(strager): Return version and source information.
            self.ToolchainComponent(
                name="LLVM",
                version="(unknown version)",
                source_uris=(),
                license_paths=(self.__root / "LICENSE.TXT",),
            ),
            self.ToolchainComponent(
                name="MinGW",
                version="(unknown version)",
                source_uris=(),
                license_paths=(
                    self.__root
                    / "aarch64-w64-mingw32"
                    / "share"
                    / "mingw32"
                    / "COPYING",
                    self.__root
                    / "aarch64-w64-mingw32"
                    / "share"
                    / "mingw32"
                    / "COPYING.MinGW-w64-runtime.txt",
                ),
            ),
        ]

    def get_copyright(self, package: ToolchainComponent) -> str:
        license_files = {path: path.read_text() for path in package.license_paths}
        result_lines = []
        for (license_path, license) in license_files.items():
            result_lines.append(f"Contents of {license_path.name}:")
            result_lines.append(license)
            result_lines.append("")
        return "\n".join(result_lines)


class Pacman:
    """The Pacman package manager, used on Arch Linux and in MSYS2."""

    class InstalledPackage(typing.NamedTuple):
        name: str
        version: str
        source_uris: typing.Tuple[str, ...]
        licenses: str
        files: typing.Tuple[str, ...]

        @property
        def source(self) -> str:
            return self.name

    def __init__(
        self, pacman_exe: pathlib.Path, root: pathlib.Path, source_uri: str
    ) -> None:
        self.__pacman_exe = pacman_exe
        self.__root = root
        self.__source_uri = source_uri

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        files = list(files)
        package_names = self._packages_for_installed_files(files)
        packages = self._get_installed_packages(list(set(package_names)))
        return packages

    def _packages_for_installed_files(
        self,
        files: typing.List[str],
    ) -> typing.List[str]:
        # HACK(strager): On Windows, Pacman doesn't work with \ as the path
        # separator.
        pacman_friendly_files = [f.replace("\\", "/") for f in files]
        process = subprocess.run(
            [self.__pacman_exe, "--query", "--quiet", "--owns", "--"]
            + pacman_friendly_files,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            encoding="utf-8",
        )
        process.check_returncode()

        packages = []
        for (i, line) in enumerate(process.stdout.splitlines()):
            file = files[i]
            if line.startswith("error:"):
                warn("could not determine Pacman package for file %s: %s", file, line)
            else:
                package_name = line
                logger.debug("Pacman package %s contains file %s", package_name, file)
                packages.append(package_name)
        return packages

    def _get_installed_packages(
        self, package_names: typing.List[str]
    ) -> typing.List[InstalledPackage]:
        package_to_paths = self._get_files_for_packages(package_names)

        process = subprocess.run(
            [self.__pacman_exe, "--query", "--quiet", "--info", "--"] + package_names,
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        process.check_returncode()

        packages = []
        current_package_properties = {}

        def finish_package() -> None:
            nonlocal current_package_properties
            name = current_package_properties["Name"]
            packages.append(
                self.InstalledPackage(
                    name=name,
                    version=current_package_properties["Version"],
                    source_uris=(
                        self.__source_uri.replace(
                            "__PACKAGE__", urllib.parse.quote(name)
                        ),
                    ),
                    licenses=current_package_properties["Licenses"],
                    files=tuple(sorted(package_to_paths.get(name, []))),
                )
            )
            current_package_properties = {}

        for line in process.stdout.splitlines():
            match = re.match(r"^(?P<key>[^:]+?)\s*:\s*(?P<value>.*)$", line)
            if match is not None:
                key = match.group("key")
                value = match.group("value")
                if key == "Name":
                    if current_package_properties:
                        finish_package()
                current_package_properties[key] = value
        finish_package()
        return packages

    def _get_files_for_packages(
        self, package_names: typing.List[str]
    ) -> typing.Mapping[str, typing.Set[pathlib.Path]]:
        process = subprocess.run(
            [self.__pacman_exe, "--query", "--list", "--"] + package_names,
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        process.check_returncode()

        package_to_paths = collections.defaultdict(set)
        for line in process.stdout.splitlines():
            (package_name, path) = line.split(" ", 1)
            package_to_paths[package_name].add(self._absolute_path(path))
        return package_to_paths

    def _absolute_path(self, pacman_path: str) -> pathlib.Path:
        """On MSYS2, Pacman returns paths like "/ucrt64/include/agtctl_i.c".
        Convert this path to "C:/msys64/ucrt64/include/agtctl_i.c".
        """
        assert pacman_path[0] == "/"
        return self.__root / pacman_path[1:]

    def get_copyright(self, package: InstalledPackage) -> str:
        license_files = self._read_license_files(package.files)
        if not license_files:
            # We don't have the proper copyright information. Report the SPDX
            # license names from the PKGBUILD files.
            return package.licenses
        result_lines = []
        for (license_path, license) in license_files.items():
            result_lines.append(f"Contents of {license_path.name}:")
            result_lines.append(license)
            result_lines.append("")
        return "\n".join(result_lines)

    def _read_license_files(
        self, package_files: typing.Iterable[pathlib.Path]
    ) -> typing.Mapping[pathlib.Path, str]:
        license_files = {}
        for path in package_files:
            if "COPYING" in path.name:
                license_files[path] = path.read_text()
        return license_files


def is_path_relative(root_path: str, sub_path: typing.Union[str, pathlib.Path]) -> bool:
    try:
        pathlib.Path(sub_path).relative_to(root_path)
        return True
    except ValueError:
        return False


class Debian:
    class InstalledPackage(typing.NamedTuple):
        name: str
        version: str
        source: str
        source_version: str
        source_uris: typing.Tuple[str, ...]

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        package_names = self._dpkg_packages_for_installed_files(files)
        packages = self._get_installed_debian_packages(set(package_names))
        logger.debug("Debian packages: %s", ", ".join(repr(p) for p in packages))
        packages = [
            p._replace(
                source_uris=tuple(
                    self._get_package_apt_source_uri(
                        name=p.source, version=p.source_version
                    )
                )
            )
            for p in packages
        ]
        return packages

    def get_copyright(self, package: InstalledPackage) -> str:
        package_name = package.name.split(":")[0]  # HACK: Strip architecture.
        return (pathlib.Path("/usr/share/doc") / package_name / "copyright").read_text()

    @staticmethod
    def _dpkg_packages_for_installed_files(
        files: typing.Iterable[str],
    ) -> typing.List[str]:
        process = subprocess.run(
            ["dpkg-query", "--search", "--"] + list(files),
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        process.check_returncode()

        packages = []
        for line in process.stdout.splitlines():
            match = re.match(f"^(?P<package>\\S+): (?P<file>.*)$", line)
            if match is not None:
                file = match.group("file")
                package_name = match.group("package")
                packages.append(package_name)
                logger.debug("dpkg %s contains file %s", package_name, file)
        return packages

    @classmethod
    def _get_installed_debian_packages(
        cls,
        package_names: typing.Iterable[str],
    ) -> typing.List[InstalledPackage]:
        process = subprocess.run(
            [
                "dpkg-query",
                "--show",
                "--showformat=${binary:Package}\\t${source:Package}\\t${Version}\\t${source:Version}\\t${db:Status-Status}\\n",
                "--",
            ]
            + list(package_names),
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        process.check_returncode()

        packages = []
        for line in process.stdout.splitlines():
            parts = line.split("\t")
            if parts[4] == "installed":
                packages.append(
                    cls.InstalledPackage(
                        name=parts[0],
                        source=parts[1],
                        version=parts[2],
                        source_version=parts[3],
                        source_uris=(),
                    )
                )
        return packages

    @classmethod
    def _get_package_apt_source_uri(cls, name: str, version: str) -> typing.List[str]:
        logger.debug("getting source for package %s version %s", name, version)
        process = subprocess.run(
            ["apt-get", "source", "--print-uris", "-qq", "--", f"{name}={version}"],
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        if process.returncode == 100:
            warn(f"failed to get source URIs for package {name}")
            return []
        process.check_returncode()
        return cls._parse_apt_get_source_uris(process.stdout)

    @staticmethod
    def _parse_apt_get_source_uris(raw: str) -> typing.List[str]:
        source_uris = []
        for line in raw.splitlines():
            match = re.match(r"^'(?P<uri>.*)' .*$", line)
            if match is not None:
                source_uris.append(match.group("uri"))
        return source_uris


class FreeBSD:
    class InstalledPackage(typing.NamedTuple):
        name: str
        source: str
        source_uris: typing.Tuple[str, ...]
        version: str

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        files = set(files)
        packages = []

        freebsd_version = None
        freebsd_source_uri = None

        def get_freebsd_info() -> None:
            nonlocal freebsd_version
            nonlocal freebsd_source_uri
            if freebsd_version is not None:
                return  # Already computed.
            freebsd_version = subprocess.check_output(
                ["/bin/freebsd-version", "-u"], encoding="utf-8"
            ).strip()
            freebsd_source_uri = f"https://cgit.freebsd.org/src/?h=releng/{freebsd_version.replace('-RELEASE', '')}"

        freebsd_lib_files = {
            "/usr/lib/libc.a",
            "/usr/lib/libm.a",
            "/usr/lib/libpthread.a",
        }
        matched_freebsd_lib_files = files & freebsd_lib_files
        if matched_freebsd_lib_files:
            get_freebsd_info()
            base_freebsd_package = FreeBSD.InstalledPackage(
                name="",
                source="FreeBSD",
                version=freebsd_version,
                source_uris=(freebsd_source_uri,),
            )
            for file in matched_freebsd_lib_files:
                packages.append(
                    base_freebsd_package._replace(name=pathlib.Path(file).stem)
                )
                files.remove(file)

        freebsd_llvm_files = {
            "/usr/lib/libc++.a": "libc++",
            "/usr/lib/libgcc_eh.a": "libunwind",
        }
        matched_freebsd_llvm_files = set(f for f in files if f in freebsd_llvm_files)
        if matched_freebsd_llvm_files:
            get_freebsd_info()
            clang_version_info = parse_clang_version(
                subprocess.check_output(
                    ["/usr/bin/clang", "--version"], encoding="utf-8"
                )
            )
            base_llvm_package = FreeBSD.InstalledPackage(
                name="",
                source="LLVM (FreeBSD)",
                version=clang_version_info.vcs_tag or clang_version_info.version,
                source_uris=(
                    freebsd_source_uri,
                    clang_version_info.vcs_uri,
                ),
            )
            for file in matched_freebsd_llvm_files:
                packages.append(
                    base_llvm_package._replace(name=freebsd_llvm_files[file])
                )
                files.remove(file)

        if files:
            raise Exception(f"failed to determine source for linked files: {files}")

        return packages

    def get_copyright(self, package: InstalledPackage) -> str:
        if package.source == "FreeBSD":
            return pathlib.Path("/COPYRIGHT").read_text()
        if package.source == "LLVM (FreeBSD)":
            return pathlib.Path("/usr/share/doc/llvm/LICENSE.TXT").read_text()
        raise Exception(f"could not determine copyright for package: {package.source}")


class Nix:
    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[None]:
        # TODO(strager)
        return []


class Emscripten:
    class InstalledPackage(typing.NamedTuple):
        name: str
        source: str
        version: str
        source_uris: typing.Tuple[str, ...]
        root: pathlib.Path

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        archive_to_source_package = {
            "libc++-except.a": "emscripten libc++",
            "libc++-mt-except.a": "emscripten libc++",
            "libc++-mt-noexcept.a": "emscripten libc++",
            "libc++-mt.a": "emscripten libc++",
            "libc++-noexcept.a": "emscripten libc++",
            "libc++.a": "emscripten libc++",
            "libc++abi-except.a": "emscripten libc++abi",
            "libc++abi-mt-except.a": "emscripten libc++abi",
            "libc++abi-mt-noexcept.a": "emscripten libc++abi",
            "libc++abi-mt.a": "emscripten libc++abi",
            "libc++abi-noexcept.a": "emscripten libc++abi",
            "libc++abi.a": "emscripten libc++abi",
            "libc-asan.a": "emscripten libc (musl)",
            "libc-mt-asan.a": "emscripten libc (musl)",
            "libc-mt.a": "emscripten libc (musl)",
            "libc.a": "emscripten libc (musl)",
            "libc_rt_wasm-asan.a": "emscripten libc (musl)",
            "libc_rt_wasm.a": "emscripten libc (musl)",
            "libcompiler_rt-mt.a": "emscripten compiler-rt",
            "libcompiler_rt.a": "emscripten compiler-rt",
            "libdlmalloc-debug-noerrno-tracing.a": "emscripten",
            "libdlmalloc-debug-noerrno.a": "emscripten",
            "libdlmalloc-debug-tracing.a": "emscripten",
            "libdlmalloc-debug.a": "emscripten",
            "libdlmalloc-mt-debug-noerrno-tracing.a": "emscripten",
            "libdlmalloc-mt-debug-noerrno.a": "emscripten",
            "libdlmalloc-mt-debug-tracing.a": "emscripten",
            "libdlmalloc-mt-debug.a": "emscripten",
            "libdlmalloc-mt-noerrno-tracing.a": "emscripten",
            "libdlmalloc-mt-noerrno.a": "emscripten",
            "libdlmalloc-mt-tracing.a": "emscripten",
            "libdlmalloc-mt.a": "emscripten",
            "libdlmalloc-noerrno-tracing.a": "emscripten",
            "libdlmalloc-noerrno.a": "emscripten",
            "libdlmalloc-tracing.a": "emscripten",
            "libdlmalloc.a": "emscripten",
            "libpthread_stub.a": "emscripten",
            "libstandalonewasm-memgrow.a": "emscripten",
            "libstandalonewasm.a": "emscripten",
        }

        known_files = [
            file
            for file in files
            if pathlib.Path(file).name in archive_to_source_package
        ]
        unknown_files = sorted(set(files) - set(known_files))
        if unknown_files:
            warn(
                f"files not recognized to be part of emscripten: {' '.join(unknown_files)}"
            )
        if not known_files:
            return []

        version_file = find_file_in_ancestors(
            path=pathlib.Path(known_files[0]).parent, name="emscripten-version.txt"
        )
        if version_file is None:
            raise Exception(
                f"Could not find emscripten-version.txt in {known_files[0]}"
            )
        emscripten_root = version_file.parent
        version = json.loads(version_file.read_text())
        git_revision = (
            (emscripten_root / "emscripten-revision.txt").read_text().rstrip("\n")
        )
        full_version = f"emscripten-{version} ({git_revision})"

        source_package_names = {"emscripten"} | {
            archive_to_source_package[pathlib.Path(file).name] for file in known_files
        }
        return [
            self.InstalledPackage(
                name=name,
                source=name,
                version=full_version,
                source_uris=[
                    f"https://github.com/emscripten-core/emscripten/tree/{git_revision}"
                ],
                root=emscripten_root,
            )
            for name in source_package_names
        ]

    def get_copyright(self, package: InstalledPackage) -> str:
        source_package_to_license_path = {
            "emscripten compiler-rt": "system/lib/compiler-rt/LICENSE.TXT",
            "emscripten libc (musl)": "system/lib/libc/musl/COPYRIGHT",
            "emscripten libc++": "system/lib/libcxx/LICENSE.TXT",
            "emscripten libc++abi": "system/lib/libcxxabi/LICENSE.TXT",
            "emscripten": "LICENSE",
        }
        return (
            package.root / source_package_to_license_path[package.source]
        ).read_text()


def find_file_in_ancestors(
    path: pathlib.Path, name: str
) -> typing.Optional[pathlib.Path]:
    for ancestor in (path,) + tuple(path.parents):
        p = ancestor / name
        if p.exists():
            return p
    return None


class RedHat:
    class InstalledPackage(typing.NamedTuple):
        name: str
        source: str
        version: str
        source_uris: typing.Tuple[str, ...]

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        package_names = [self._rpm_package_for_installed_file(f) for f in files]
        packages = [
            self._get_rpm_package(name)
            for name in set(package_names)
            if name is not None
        ]
        return packages

    def get_copyright(self, package: InstalledPackage) -> str:
        error = None
        for file_name in ("LICENSES", "LICENSE.TXT"):
            try:
                raw_text = (
                    pathlib.Path("/usr/share/licenses") / package.source / file_name
                ).read_text()
                return raw_text.replace("\x0C", "\n---\n")
            except FileNotFoundError as e:
                error = e
        assert error is not None
        raise error

    @staticmethod
    def _rpm_package_for_installed_file(file_path: str) -> typing.Optional[str]:
        logger.debug("checking which RPM provides file %s", file_path)
        process = subprocess.run(
            ["rpm", "--query", "--whatprovides", "--", file_path],
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        if process.returncode != 0:
            warn(f"could not determine RPM package for file: {file_path}")
            return None
        rpm_name = process.stdout.rstrip("\n")
        logger.debug("RPM %s provides file %s", rpm_name, file_path)
        return rpm_name

    @classmethod
    def _get_rpm_package(cls, package_name: str) -> InstalledPackage:
        source_rpm_info = subprocess.check_output(
            [
                "dnf",
                "--quiet",
                "repoquery",
                "--qf",
                "%{name}\\t%{version}-%{release}\\t%{source_name}",
                "--",
                package_name,
            ],
            encoding="utf-8",
        ).rstrip("\n")
        [name, version, source_name] = source_rpm_info.split("\t")
        source_rpm_url = subprocess.check_output(
            ["dnf", "--quiet", "repoquery", "--srpm", "--location", "--", package_name],
            encoding="utf-8",
        ).rstrip("\n")
        return cls.InstalledPackage(
            name=name,
            version=version,
            source=source_name,
            source_uris=(source_rpm_url,),
        )


class LinkMap(typing.NamedTuple):
    linked_archives: typing.Set[str]


def parse_text_link_map(raw_link_map: str) -> LinkMap:
    linked_archives = set()
    state = "NONE"
    for line in raw_link_map.splitlines():
        if line == "":
            if state == "BEFORE_ELF_ARCHIVE_MEMBERS":
                state = "ELF_ARCHIVE_MEMBERS"
            else:
                state = "NONE"
        elif line in (
            "Archive member included to satisfy reference by file (symbol)",
            "Archive member included because of file (symbol)",
        ):
            state = "BEFORE_ELF_ARCHIVE_MEMBERS"
        elif line == "# Object files:":
            state = "MACHO_OBJECT_FILES"
        else:
            if state == "ELF_ARCHIVE_MEMBERS":
                match = re.compile(
                    r"""
                        ^
                        (?P<file>\S[^()]*?)     # File
                        (?:\([^()]*\))?         # Optional symbol after file
                        (?:\s+\S+\s\([^()]*\))? # Optional reason (file-symbol pair)
                        $
                    """,
                    re.VERBOSE,
                ).match(line)
                if match is not None:
                    linked_archives.add(match.group("file"))
            elif state == "MACHO_OBJECT_FILES":
                match = re.match(
                    r"^\[[0-9 ]+\] (?P<file>.*?)(?:\[\d+\])?\((?P<object>.*)\)$", line
                )
                if match is not None:
                    linked_archives.add(match.group("file"))
            elif state == "NONE":
                match = re.match(
                    r"^\s*(?P<address>[0-9a-f]+:[0-9a-f]+)\s+(?P<symbol>.*)\s+(?P<value>[0-9a-f]+)\s+[f ]\s+[i ]\s+(?P<archive>.*):(?P<file>.*)$",
                    line,
                )
                if match is not None:
                    linked_archives.add(match.group("archive") + ".lib")
                match = re.match(
                    r"^(?P<archive>\S+\.a)\((?P<member>[^/]+\.o)\)$",
                    line,
                )
                if match is not None:
                    linked_archives.add(match.group("archive"))
                match = re.match(
                    r"^\s*\d+\s+\d+\s+\d+\s+(?P<archive>\S+\.a)\((?P<member>[^/]+\.o)\):\((?P<symbol>.*)\)$",
                    line,
                )
                if match is not None:
                    linked_archives.add(pathlib.Path(match.group("archive")).name)
                # ELF LLD style:
                match = re.match(
                    r"^\s*[0-9a-f]+\s+[0-9a-f]+\s+[0-9a-f]+\s+[0-9a-f]+\s+(?P<archive>\S+\.a)\((?P<member>[^/]+\.o)\):\((?P<symbol>.*)\)$",
                    line,
                )
                if match is not None:
                    linked_archives.add(match.group("archive"))
    return LinkMap(linked_archives=linked_archives)


def parse_coff_lld_link_map(
    tar_path: pathlib.Path, build_directory: pathlib.Path
) -> LinkMap:
    """Read a .tar file generated by Windows lld's -reproduce option."""

    def absolute_tar_entry_path(entry_path: str) -> pathlib.Path:
        """
        POSIX: quick-lint-js/tmp/crt2u.o -> /tmp/crt2u.o
        Windows: quick-lint-js/C/tmp/crt2u.o -> C:/tmp/crt2u.o
        """
        entry_path = pathlib.PurePosixPath(entry_path)
        parts = list(entry_path.parts)
        if os.name == "nt":
            del parts[0]
            parts[0] = parts[0] + ":\\"
        else:
            parts[0] = "/"
        return pathlib.Path(*parts)

    linked_archives = set()
    with tarfile.open(tar_path) as tar:
        for entry_path in tar.getnames():
            path = absolute_tar_entry_path(entry_path)
            if path.suffix.lower() in (".a", ".lib"):
                linked_archives.add(
                    path.name if is_path_relative(build_directory, path) else str(path)
                )
    return LinkMap(linked_archives=linked_archives)


class VendorReadmeProject(typing.NamedTuple):
    project_name: str
    vendor_directory: pathlib.Path
    properties: typing.Dict[str, str]


def parse_vendor_readme_txt(
    txt: str, vendor_directory: pathlib.Path
) -> typing.List[VendorReadmeProject]:
    """Parse the README.txt file in quick-lint-js' vendor/ directory."""
    projects = []
    for line in txt.splitlines():
        match = re.match(r"^## (?P<project_name>.+)$", line)
        if match is not None:
            projects.append(
                VendorReadmeProject(
                    project_name=match.group("project_name"),
                    vendor_directory=vendor_directory,
                    properties={},
                )
            )
        match = re.match(r"^(?P<key>[^:]+): (?P<value>.*)$", line)
        if match is not None and projects:
            projects[-1].properties[match.group("key")] = match.group("value")
    return projects


def warn(message: str) -> None:
    if os.environ.get("QLJS_COLLECT_COPYRIGHT_NO_WARNINGS", ""):
        print(f"{__file__}: fatal: {message}", file=sys.stderr)
        exit(1)
    print(f"{__file__}: warning: {message}", file=sys.stderr)


class TestGNULinkMap(unittest.TestCase):
    def test_small_link_map_from_bfd_ld(self) -> None:
        raw_link_map = """\
Archive member included to satisfy reference by file (symbol)

/usr/lib/x86_64-linux-gnu/libc_nonshared.a(elf-init.oS)
                              /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o (__libc_csu_init)

Merging program properties


As-needed library included to satisfy reference by file (symbol)

libc.so.6                     /tmp/cchQcqEp.o (puts@@GLIBC_2.2.5)
"""
        link_map = parse_text_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives, {"/usr/lib/x86_64-linux-gnu/libc_nonshared.a"}
        )

    def test_small_link_map_with_archive_dependencies_from_bfd_ld(self) -> None:
        raw_link_map = """\
Archive member included to satisfy reference by file (symbol)

libboost.a(global_resource.cpp.o)
                              src/CMakeFiles/quick-lint-js.dir/main.cpp.o (new_delete_resource())
libboost.a(dlmalloc.cpp.o)    libboost.a(global_resource.cpp.o) (boost::container::dlmalloc_global_sync_lock())
libboost.a(alloc_lib.c.o)     libboost.a(dlmalloc.cpp.o) (boost_cont_all_deallocated)
"""
        link_map = parse_text_link_map(raw_link_map)
        self.assertEqual(link_map.linked_archives, {"libboost.a"})

    def test_small_link_map_from_gold(self) -> None:
        raw_link_map = """\
Archive member included because of file (symbol)

/usr/lib/x86_64-linux-gnu/libc_nonshared.a(elf-init.oS)
                              /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o (__libc_csu_init)
libquick-lint-js-lib.a        libquick-lint-js-lib.a (_ZN13quick_lint_js16web_demo_locatorC1ENS_18padded_string_viewE)
../libboost.a                 CMakeFiles/quick-lint-js.dir/main.cpp.o (_ZN5boost9container3pmr19new_delete_resourceEv)
../libboost.a                 ../libboost.a (_ZN5boost9container25dlmalloc_global_sync_lockEv)
../libboost.a                 ../libboost.a (boost_cont_multialloc_arrays)

Discarded input sections

 .note.GNU-stack
                0x0000000000000000        0x0 /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o
 .note.GNU-stack
"""
        link_map = parse_text_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {
                "/usr/lib/x86_64-linux-gnu/libc_nonshared.a",
                "libquick-lint-js-lib.a",
                "../libboost.a",
            },
        )


class TestLD64LinkMap(unittest.TestCase):
    def test_small_link_map(self) -> None:
        raw_link_map = """\
# Path: quick-lint-js
# Arch: x86_64
# Object files:
[  0] linker synthesized
[  1] /usr/local/opt/llvm/lib/libc++.a(charconv.cpp.o)
[  2] /usr/local/opt/llvm/lib/libc++abi.a(cxa_default_handlers.cpp.o)
[  3] src/CMakeFiles/quick-lint-js.dir/main.cpp.o
[  4] /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/lib/libSystem.tbd
# Sections:
# Address	Size    	Segment	Section
0x100001AB0	0x000D62E3	__TEXT	__text
0x1000D7D94	0x0000061E	__TEXT	__stubs
# Symbols:
# Address	Size    	File  Name
0x100001AB0	0x00000280	[  1] __ZNSt3__16__itoa8__u32toaEjPc
0x100001D30	0x00000660	[  1] __ZNSt3__16__itoa8__u64toaEyPc
0x100002390	0x00000020	[  2] __ZNSt3__118condition_variable10notify_oneEv
0x1000023B0	0x00000020	[  2] __ZNSt3__118condition_variable10notify_allEv
"""
        link_map = parse_text_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {"/usr/local/opt/llvm/lib/libc++.a", "/usr/local/opt/llvm/lib/libc++abi.a"},
        )


class TestDyldLinkMap(unittest.TestCase):
    """Test Apple's macOS linker introduced in Xcode 15."""

    def test_small_link_map(self) -> None:
        raw_link_map = """\
# Path: quick-lint-js
# Arch: arm64
# Object files:
[  0] linker synthesized
[  1] tlv-file
[  2] inits-file
[  3] /Users/strager/Projects/quick-lint-js-sl/build/src/CMakeFiles/quick-lint-js.dir/quick-lint-js/cli/main.cpp.o
[  4] src/libquick-lint-js-lib.a[4](emacs-lisp-diag-reporter.cpp.o)
[ 89] vendor/simdjson/libsimdjson.a[2](simdjson.cpp.o)
[ 90] /opt/homebrew/Cellar/llvm/17.0.6/lib/c++/libc++.1.0.dylib
[ 91] /Library/Developer/CommandLineTools/SDKs/MacOSX13.3.sdk/usr/lib/system/libsystem_c.tbd
# Sections:
# Address	Size    	Segment	Section
0x100002C10	0x001FB1E4	__TEXT	__text
0x1001FDDF4	0x00005304	__TEXT	__stubs
# Symbols:
# Address	Size    	File  Name
0x100002C10	0x00000028	[  3] _main
0x100002C38	0x00000014	[  3] __ZN13quick_lint_js12_GLOBAL__N_14initEv
0x100002C4C	0x0000005C	[  3] __ZN13quick_lint_js12_GLOBAL__N_13runEiPPc
"""
        link_map = parse_text_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {"src/libquick-lint-js-lib.a", "vendor/simdjson/libsimdjson.a"},
        )


class TestMoldLinkMap(unittest.TestCase):
    def test_small_link_map(self) -> None:
        raw_link_map = """\
             VMA       Size Align Out     In      Symbol
         2097152         64     8 
         2097216        560     8 
         2097776         28     1 .interp
         2097804         32     4 .note.ABI-tag
         2097804         32     4         /usr/lib/x86_64-linux-gnu/crt1.o:(.note.ABI-tag)
         2491008     105120    16 .rodata
         2491008        140     4         src/CMakeFiles/quick-lint-js.dir/main.cpp.o:(.rodata._ZN13quick_lint_js12lsp_endpointINS_26linting_lsp_server_handlerINS_21lsp_javascript_linterEEENS_15lsp_pipe_writerEE14message_parsedESt17basic_string_viewIDuSt11char_traitsIDuEE)
         2511120          0     0                 simdjson::fallback::(anonymous namespace)::stringparsing::escape_map
         2511376        192    16         src/libquick-lint-js-lib.a(buffering-error-reporter.cpp.o):(.rodata._ZZN13quick_lint_js24buffering_error_reporter11report_implENS_10error_typeEPvE11error_sizes)
         2651424          8     4         /usr/lib/x86_64-linux-gnu/crti.o:(.fini)
         2651424          0     0                 _fini
               0    3085635     1 .debug_str
               0       2432     8 
"""
        link_map = parse_text_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {"libquick-lint-js-lib.a"},
        )


class TestELFLLDLinkMap(unittest.TestCase):
    def test_small_link_map(self) -> None:
        raw_link_map = """\
             VMA              LMA     Size Align Out     In      Symbol
          2002e0           2002e0       15     1 .interp
          2002e0           2002e0       15     1         <internal>:(.interp)
          2002f8           2002f8       30     4 .note.tag
          2002f8           2002f8       30     4         /usr/lib/crt1.o:(.note.tag)
          202350           202350     c2a6     1         <internal>:(.rodata)
          20e5f8           20e5f8       20     8         CMakeFiles/quick-lint-js.dir/quick-lint-js/cli/main.cpp.o:(.rodata._ZN13quick_lint_js12_GLOBAL__N_132get_linter_options_from_languageENS_19input_file_languageE)
          20e618           20e618       10     4         <internal>:(.rodata)
          20e628           20e628       30     8         CMakeFiles/quick-lint-js.dir/quick-lint-js/cli/main.cpp.o:(.rodata._ZTVN13quick_lint_js24configuration_filesystemE)
          20e628           20e628       30     1                 vtable for quick_lint_js::configuration_filesystem
          20e778           20e778       30     8         libquick-lint-js-lib.a(basic-configuration-filesystem.cpp.o):(.rodata._ZTVN13quick_lint_js30basic_configuration_filesystemE)
          20e778           20e778       30     1                 vtable for quick_lint_js::basic_configuration_filesystem
          5783a8           5783a8        1     1         ../vendor/simdjson/libsimdjson.a(simdjson.cpp.o):(.bss._ZN8simdjson25available_implementationsE)
          5783a8           5783a8        1     1                 simdjson::available_implementations
          5783b0           5783b0        8     8         ../vendor/simdjson/libsimdjson.a(simdjson.cpp.o):(.bss._ZN8simdjson21active_implementationE)
          5783b0           5783b0        8     1                 simdjson::active_implementation
          2c9e00           2c9e00       30     1         /usr/lib/libm.a(s_ceilf.o):(.eh_frame+0x18)
"""
        link_map = parse_text_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {
                "libquick-lint-js-lib.a",
                "../vendor/simdjson/libsimdjson.a",
                "/usr/lib/libm.a",
            },
        )


class TestMicrosoftLinkMap(unittest.TestCase):
    def test_small_link_map(self) -> None:
        raw_link_map = """\
 quick-lint-js

 Timestamp is 5fffbf67 (Wed Jan 13 19:49:59 2021)

 Preferred load address is 0000000140000000

 Start         Length     Name                   Class
 0001:00000000 00000630H .text$di                CODE

  Address         Publics by Value              Rva+Base               Lib:Object

 0000:00000000       __x64_code_ranges_to_entry_points_count 0000000000000000     <absolute>
 0001:00000cf0       ??$?RVarrow_function_with_statements@expression@quick_lint_js@<lambda_1>@?1???$with_derived@V<lambda_1>@?1??object_entry@expression@quick_lint_js@@QEBA?AUobject_property_value_pair@4@H@Z@@expression@quick_lint_js@@AEBA@$$QEAV0?1??object_entry@12@QEBA?AUobject_property_value_pair@2@H@Z@@Z@QEBA?AU42@AEBVarrow_function_with_statements@12@@Z 0000000140001cf0 f i main.obj
 0001:0003d760       ?create_dom_parser_implementation@implementation@haswell@simdjson@@UEBA?AW4error_code@3@_K0AEAV?$unique_ptr@Vdom_parser_implementation@internal@simdjson@@U?$default_delete@Vdom_parser_implementation@internal@simdjson@std@std@Z 000000014003e760 f   simdjson:simdjson.obj

 entry point at        0001:00059824

 Static symbols

 0001:00000000       ??__Eallocator_arg@container@boost@@YAXXZ 0000000140001000 f   main.obj
 0001:00000020       ??__Eallocator_arg@container@boost@@YAXXZ 0000000140001020 f   quick-lint-js-lib:lex.obj
 0003:00025f0c       ?initialized@?1??__acrt_initialize_multibyte@@9@4_NA 00000001400c9f0c     libucrt:mbctype.obj
 0006:00000060       $R000000                   00000001400d1060     * linker generated manifest res *
"""
        link_map = parse_text_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {"libucrt.lib", "simdjson.lib", "quick-lint-js-lib.lib"},
        )


class TestLLVMWebAssemblyTrace(unittest.TestCase):
    def test_small_trace_file(self) -> None:
        raw_link_map = """\
plugin/vscode/CMakeFiles/quick-lint-js-vscode.dir/empty.cpp.o
/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/crt1_reactor.o
src/libquick-lint-js-lib.a(vscode.cpp.o)
/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/libc++-noexcept.a(new.o)
/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/libstandalonewasm.a(standalone.o)
"""
        link_map = parse_text_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {
                "src/libquick-lint-js-lib.a",
                "/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/libc++-noexcept.a",
                "/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/libstandalonewasm.a",
            },
        )


class TestAptGetSourceURIs(unittest.TestCase):
    def test_libc6_dev(self) -> None:
        output = """\
Reading package lists... Done
Picking 'glibc' as source package instead of 'libc6-dev'
NOTICE: 'glibc' packaging is maintained in the 'Git' version control system at:
https://git.launchpad.net/~ubuntu-core-dev/ubuntu/+source/glibc
Please use:
git clone https://git.launchpad.net/~ubuntu-core-dev/ubuntu/+source/glibc
to retrieve the latest (possibly unreleased) updates to the package.
Need to get 18.2 MB of source archives.
'http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31-0ubuntu9.1.dsc' glibc_2.31-0ubuntu9.1.dsc 9456 SHA512:b3f10b8c8f421431c91f14efbc92fc165675d5cdb427629e67d5e19927de0f4024c736c9da1c5d4b7e2bd0aa01395b3e269aeccee161918311f2ea1c75815f1f
'http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31.orig.tar.xz' glibc_2.31.orig.tar.xz 17317924 SHA512:2ff56628fe935cacbdf1825534f15d45cb87a159cbdb2e6a981590eeb6174ed4b3ff7041519cdecbd4f624ac20b745e2dd9614c420dd3ea186b8f36bc4c2453c
'http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31-0ubuntu9.1.debian.tar.xz' glibc_2.31-0ubuntu9.1.debian.tar.xz 844816 SHA512:bca1857b031eda2d170256b97829c6b8a38493c66858a041e6f0143bf26c376c207e72d499ef1be07a83667419d55284407bae518511a702171eac58c6f31d62
"""
        source_uris = Debian._parse_apt_get_source_uris(output)
        self.assertEqual(
            source_uris,
            [
                "http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31-0ubuntu9.1.dsc",
                "http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31.orig.tar.xz",
                "http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31-0ubuntu9.1.debian.tar.xz",
            ],
        )

    def test_gcc_9_qq(self) -> None:
        output = """\
'http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0-17ubuntu1~20.04.dsc' gcc-9_9.3.0-17ubuntu1~20.04.dsc 23746 SHA512:bae6c156516a4988546a4518934f661a70243a89ed17883fe382bd984ae6533aab7d9b459986c2ebdb715b64b11ac76b9283447b26be3dbaec0b00c02afeb7f6
'http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0.orig.tar.gz' gcc-9_9.3.0.orig.tar.gz 90490748 SHA512:cebaa27b0ac7485e72f0d7b07e6ef08cd544bf551bc080ee00175cbe319ea8b0592ff54d55633bb189e481f9ba489d513205cf2310e4b5211869a021432ee31f
'http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0-17ubuntu1~20.04.debian.tar.xz' gcc-9_9.3.0-17ubuntu1~20.04.debian.tar.xz 763792 SHA512:f696a3d92edbadb7cfb29494b68ec00fa9b278ab7657e45933cf2e7564e6a524ac41edde14449114a2e06ad0c1f56473d998687aeb2f98f18c431727d4254d21
"""
        source_uris = Debian._parse_apt_get_source_uris(output)
        self.assertEqual(
            source_uris,
            [
                "http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0-17ubuntu1~20.04.dsc",
                "http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0.orig.tar.gz",
                "http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0-17ubuntu1~20.04.debian.tar.xz",
            ],
        )


class TestParseVendorReadme(unittest.TestCase):
    def test_parse_snippet(self) -> None:
        readme_txt = """\n
# Vendor

This vendor directory contains third-party software.

## benchmark

The benchmark directory contains Google Benchmark, a performance measurement
framework for C++ code.

Copyright: Copyright 2015 Google Inc. All rights reserved.
Version: v1.5.1

## boost

The boost directory contains Boost, free peer-reviewed portable C++ source
libraries.

Many directories and files have been deleted to reduce storage consumption.

Download URL: https://dl.bintray.com/boostorg/release/1.74.0/source/boost_1_74_0.tar.bz2
Download date: August 25, 2020
Version: 1.74.0
"""
        vendor_dir = pathlib.Path(".")
        projects = parse_vendor_readme_txt(readme_txt, vendor_directory=vendor_dir)
        self.assertEqual(
            projects,
            [
                VendorReadmeProject(
                    project_name="benchmark",
                    vendor_directory=vendor_dir,
                    properties={
                        "Copyright": "Copyright 2015 Google Inc. All rights reserved.",
                        "Version": "v1.5.1",
                    },
                ),
                VendorReadmeProject(
                    project_name="boost",
                    vendor_directory=vendor_dir,
                    properties={
                        "Download URL": "https://dl.bintray.com/boostorg/release/1.74.0/source/boost_1_74_0.tar.bz2",
                        "Download date": "August 25, 2020",
                        "Version": "1.74.0",
                    },
                ),
            ],
        )


class TestParseClangVersion(unittest.TestCase):
    def test_freebsd_13(self) -> None:
        version_output = """\
FreeBSD clang version 11.0.1 (git@github.com:llvm/llvm-project.git llvmorg-11.0.1-0-g43ff75f2c3fe)
Target: x86_64-unknown-freebsd13.0
Thread model: posix
InstalledDir: /usr/bin
"""
        v = parse_clang_version(version_output)
        self.assertEqual(v.version, "11.0.1")
        self.assertEqual(v.vcs_uri, "git@github.com:llvm/llvm-project.git")
        self.assertEqual(v.vcs_tag, "llvmorg-11.0.1-0-g43ff75f2c3fe")

    def test_ubuntu(self) -> None:
        version_output = """\
Ubuntu clang version 14.0.0-1ubuntu1
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
"""
        v = parse_clang_version(version_output)
        self.assertEqual(v.version, "14.0.0-1ubuntu1")
        self.assertIsNone(v.vcs_uri)
        self.assertIsNone(v.vcs_tag)

    def test_custom(self) -> None:
        version_output = """\
clang version 15.0.6 (https://github.com/rust-lang/llvm-project.git 3dfd4d93fa013e1c0578d3ceac5c8f4ebba4b6ec)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /home/strager/Toolchains/clang-stage4-qljs-bolt/bin
"""
        v = parse_clang_version(version_output)
        self.assertEqual(v.version, "15.0.6")
        self.assertEqual(v.vcs_uri, "https://github.com/rust-lang/llvm-project.git")
        self.assertEqual(v.vcs_tag, "3dfd4d93fa013e1c0578d3ceac5c8f4ebba4b6ec")

    def test_apple_clang(self) -> None:
        version_output = """\
Apple clang version 14.0.0 (clang-1400.0.29.202)
Target: arm64-apple-darwin22.3.0
Thread model: posix
InstalledDir: /Users/strager/Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin
"""
        v = parse_clang_version(version_output)
        self.assertEqual(v.version, "14.0.0")
        self.assertIsNone(v.vcs_uri)
        self.assertIsNone(v.vcs_tag)


class ClangVersion(typing.NamedTuple):
    version: str
    vcs_uri: typing.Optional[str]
    vcs_tag: typing.Optional[str]


def parse_clang_version(clang_version_output: str) -> ClangVersion:
    """Extract version information from the output of 'clang --version'."""
    match = re.search(
        r"\bclang version (?P<version>\d\S*)(?: \((?P<vcs_uri>\S+) (?P<vcs_tag>\S+)\))?",
        clang_version_output,
    )
    if match is not None:
        return ClangVersion(
            version=match.group("version"),
            vcs_uri=match.group("vcs_uri"),
            vcs_tag=match.group("vcs_tag"),
        )
    raise ValueError(
        "failed to parse version information from 'clang --version' output"
    )


if __name__ == "__main__":
    main()

# quick-lint-js finds bugs in JavaScript programs.
# Copyright (C) 2020  Matthew "strager" Glazar
#
# This file is part of quick-lint-js.
#
# quick-lint-js is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# quick-lint-js is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with quick-lint-js.  If not, see <https://www.gnu.org/licenses/>.
