#!/usr/bin/env python3

# Copyright (C) 2020  Matthew "strager" Glazar
# See end of file for extended copyright information.

"""Generate quick-lint-js copyright documentation.

Run this program to generate a text file listing copyrights and licenses for
software used by quick-lint-js (and quick-lint-js' own copyright and license).
"""

import argparse
import json
import logging
import os
import pathlib
import re
import subprocess
import sys
import typing
import unittest

logger = logging.getLogger(__name__)

archive_to_vendor_project = {
    "Pathcch.lib": None,
    "boost.lib": "boost",
    "libboost.a": "boost",
    "libquick-lint-js-lib.a": None,
    "libsimdjson.a": "simdjson",
    "node-hook.lib": None,
    "node-napi.lib": None,  # Dynamically linked.
    "quick-lint-js-lib.lib": None,
    "simdjson.lib": "simdjson",
    # HACK(strager): Ignore these libraries for now.
    "LIBCMT.lib": None,
    "LIBCMTD.lib": None,
    "MSVCRT.lib": None,
    "MSVCRTD.lib": None,
    "delayimp.lib": None,
    "kernel32.lib": None,
    "libcpmt.lib": None,
    "libcpmtd.lib": None,
    "libucrt.lib": None,
    "libucrtd.lib": None,
    "libvcruntime.lib": None,
    "libvcruntimed.lib": None,
    "msvcprt.lib": None,
    "msvcprtd.lib": None,
    "ucrt.lib": None,
    "ucrtd.lib": None,
    "vcruntime.lib": None,
    "vcruntimed.lib": None,
}


def main() -> None:
    logging.basicConfig()

    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument(
        "--linkmap",
        help="File generated by an Mach-O linker's -map option or an ELF linker's -Map option or a PE linker's /MAP option",
        required=True,
    )
    parser.add_argument(
        "--cross-compiling-emscripten",
        action="store_true",
        default=False,
        help="Don't assume system package manager manages the standard library",
    )
    parser.add_argument(
        "--extra-vendor-projects",
        default=[],
        help="Include license information for these projects from --vendor-readme",
        nargs="+",
    )
    parser.add_argument(
        "--vendor-readme",
        required=False,
        default=str(pathlib.Path(__file__).parent / ".." / "vendor" / "README.txt"),
    )
    parser.add_argument(
        "--verbose",
        action="store_true",
        default=False,
        help="Print debugging messages",
    )
    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    qljs_license_text = (pathlib.Path(__file__).parent / ".." / "COPYING").read_text()
    print(
        f"""\
# quick-lint-js licenses

The quick-lint-js program is Copyright (C) 2020 Matthew "strager" Glazar. It is
distributed under the following license:
{markdown_block_quote(qljs_license_text)}

Parts of quick-lint-js might be distributed under different licenses:

# Arm Optimized Routines

src/quick-lint-js/simd-neon-arm.h in quick-lint-js is derived from
string/aarch64/memchr.S in Arm Optimized Routines.

Source code of Arm Optimized Routines is available at:
* https://github.com/ARM-software/optimized-routines/tree/7a9fd1603e1179b044406fb9b6cc5770d736cde7

Copyright information for Arm Optimized Routines:
> MIT License
>
> Copyright (c) 1999-2019, Arm Limited.
>
> Permission is hereby granted, free of charge, to any person obtaining a copy
> of this software and associated documentation files (the "Software"), to deal
> in the Software without restriction, including without limitation the rights
> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> copies of the Software, and to permit persons to whom the Software is
> furnished to do so, subject to the following conditions:
>
> The above copyright notice and this permission notice shall be included in all
> copies or substantial portions of the Software.
>
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> SOFTWARE.
"""
    )

    vendor_readme_file = pathlib.Path(args.vendor_readme)
    vendor_projects = parse_vendor_readme_txt(vendor_readme_file.read_text())

    dump_from_linkmap(
        link_map_file=pathlib.Path(args.linkmap),
        vendor_projects=vendor_projects,
        vendor_directory=vendor_readme_file.parent,
        is_cross_compiling_emscripten=args.cross_compiling_emscripten,
        extra_vendor_projects=args.extra_vendor_projects,
    )


def dump_from_linkmap(
    link_map_file: pathlib.Path,
    vendor_projects: typing.List["VendorReadmeProject"],
    vendor_directory: pathlib.Path,
    is_cross_compiling_emscripten: bool,
    extra_vendor_projects: typing.List[str],
) -> None:
    link_map = parse_link_map(link_map_file.read_text())
    if not link_map.linked_archives:
        warn("link map lists no archives; this is probably a bug in collect-copyright")
    linked_system_archives = [
        file for file in link_map.linked_archives if pathlib.Path(file).is_absolute()
    ]
    linked_vendored_archives = [
        file
        for file in link_map.linked_archives
        if not pathlib.Path(file).is_absolute()
    ]
    dump_from_vendor(
        linked_vendored_archives,
        project_names=extra_vendor_projects,
        vendor_projects=vendor_projects,
        vendor_directory=vendor_directory,
    )
    dump_from_system_archives(
        linked_system_archives,
        is_cross_compiling_emscripten=is_cross_compiling_emscripten,
    )


def dump_from_vendor(
    archives: typing.List[str],
    project_names: typing.List[str],
    vendor_projects: typing.List["VendorReadmeProject"],
    vendor_directory: pathlib.Path,
) -> None:
    all_project_names = project_names
    for archive in sorted(archives):
        try:
            vendor_project_name = archive_to_vendor_project[pathlib.Path(archive).name]
        except KeyError:
            raise Exception(f"could not find source for archive: {archive}") from None
        if vendor_project_name is None:
            # Archive is quick-lint-js itself. Its license is handled elsewhere.
            continue
        all_project_names.append(vendor_project_name)

    all_project_names = sorted(set(all_project_names))
    for vendor_project_name in all_project_names:
        matching_vendor_projects = [
            p for p in vendor_projects if p.project_name == vendor_project_name
        ]
        if len(matching_vendor_projects) != 1:
            raise Exception(
                f"could not find vendor project for project name: {vendor_project_name}"
            ) from None
        vendor_project = matching_vendor_projects[0]

        license_files = vendor_project.properties["License file"].split("; ")
        license = (vendor_directory / license_files[0]).read_text()

        print(
            f"""\
## {vendor_project_name}

Version {vendor_project.properties['Version']} of {vendor_project_name} is statically linked into quick-lint-js.

Source code for {vendor_project_name} is available at:
* {vendor_project.properties['Download URL']}

Copyright information for libc6-dev:amd64:
{markdown_block_quote(license)})
"""
        )


def detect_system_package_manager(
    files: typing.Iterable[str], is_cross_compiling_emscripten: bool
):
    if is_cross_compiling_emscripten:
        return Emscripten()

    if os.environ.get("NIX_STORE", ""):
        warn("Nix not well supported")
        return Nix()

    try:
        brew_prefix = subprocess.check_output(
            ["brew", "--prefix"], encoding="utf-8"
        ).rstrip("\n")
        if any(is_path_relative(root_path=brew_prefix, sub_path=f) for f in files):
            return Homebrew(brew_prefix=brew_prefix)
    except (subprocess.CalledProcessError, FileNotFoundError, PermissionError):
        pass

    try:
        subprocess.check_output(["rpm", "--query", "glibc"])
        return RedHat()
    except (subprocess.CalledProcessError, FileNotFoundError, PermissionError):
        pass

    try:
        subprocess.check_output(["dpkg-query", "--list", "libc6"])
        return Debian()
    except (subprocess.CalledProcessError, FileNotFoundError, PermissionError):
        pass

    return None


def dump_from_system_archives(
    linked_system_archives: typing.List[str], is_cross_compiling_emscripten: bool
) -> None:
    if not linked_system_archives:
        return

    system_package_manager = detect_system_package_manager(
        linked_system_archives,
        is_cross_compiling_emscripten=is_cross_compiling_emscripten,
    )
    if system_package_manager is None:
        warn("Could not detect system package manager")
        print(
            """## Other packages

The following files are statically linked into quick-lint-js. Their copyright and licenses are unknown:
"""
        )
        for archive in sorted(linked_system_archives):
            print(f"* {archive}")
        return

    installed_packages = system_package_manager.packages_for_installed_files(
        linked_system_archives
    )
    source_package_names = set(p.source for p in installed_packages)

    for source_package_name in sorted(source_package_names):
        print(f"## {source_package_name}\n")

        relevant_installed_packages = [
            p for p in sorted(installed_packages) if p.source == source_package_name
        ]

        for p in relevant_installed_packages:
            print(
                f"Version {p.version} of package {p.name} is statically linked into quick-lint-js.\n"
            )

        print(f"Source code for {source_package_name} is available at:")
        source_uris = []
        for p in relevant_installed_packages:
            source_uris.extend(p.source_uris)
        if source_uris:
            for source_uri in sorted(set(source_uris)):
                print(f"* {source_uri}")
        else:
            print(f"* (location unknown!)")

        for p in relevant_installed_packages:
            print(f"\nCopyright information for {p.name}:")
            copyright = system_package_manager.get_copyright(p)
            print(markdown_block_quote(copyright))

        print(f"")


def markdown_block_quote(text: str) -> str:
    return "> " + text.strip("\n").replace("\n", "\n> ")


class Homebrew:
    class InstalledPackage(typing.NamedTuple):
        name: str
        source: str
        version: str
        source_uris: typing.Tuple[str, ...]
        path: str

    def __init__(self, brew_prefix: str) -> None:
        self.__brew_prefix = brew_prefix

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        files_from_unknown_formula = set()
        formulae = set()
        for file in files:
            if self._is_llvm_file(file):
                formulae.add("llvm")
            else:
                files_from_unknown_formula.add(file)
        if files_from_unknown_formula:
            warn(
                f"could not determine formula for files: {' '.join(sorted(files_from_unknown_formula))}"
            )
        return [self._make_installed_package(f) for f in formulae]

    def get_copyright(self, package: InstalledPackage) -> str:
        if package.name == "llvm":
            return """\
Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
See https://llvm.org/LICENSE.txt for license information.
SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
"""
        else:
            raise Exception(
                f"could not determine copyright for formula: {package.name}"
            )

    def _make_installed_package(self, formula_name: str) -> InstalledPackage:
        installed_path = pathlib.Path(self.__brew_prefix) / "opt" / formula_name
        version = pathlib.Path(os.readlink(installed_path)).name
        install_receipt = json.loads(
            (installed_path / "INSTALL_RECEIPT.json").read_text()
        )

        tap = install_receipt["source"]["tap"]
        if tap != "homebrew/core":
            raise Exception(f"Unsupported tap for formula {formula_name}: {tap}")

        formula_rb_path = pathlib.Path(install_receipt["source"]["path"])
        # TODO(strager): Get the exact version installed, not the latest
        # version.
        tap_git_commit = subprocess.check_output(
            ["git", "rev-parse", "HEAD"], encoding="utf-8", cwd=formula_rb_path.parent
        ).rstrip("\n")
        formula_rb_repo_path = self._relative_path_within_git_checkout(formula_rb_path)

        return self.InstalledPackage(
            name=formula_name,
            source=formula_name,
            version=version,
            source_uris=[
                f"https://github.com/Homebrew/homebrew-core/blob/{tap_git_commit}/{formula_rb_repo_path}"
            ],
            path=str(installed_path),
        )

    def _relative_path_within_git_checkout(
        self, file_path: pathlib.Path
    ) -> pathlib.Path:
        checkout_path = subprocess.check_output(
            ["git", "rev-parse", "--show-toplevel"],
            cwd=file_path.parent,
            encoding="utf-8",
        ).rstrip("\n")
        return file_path.relative_to(checkout_path)

    def _is_llvm_file(self, file_path: str) -> bool:
        return is_path_relative(
            sub_path=file_path,
            root_path=pathlib.Path(self.__brew_prefix) / "opt" / "llvm",
        )


def is_path_relative(root_path: str, sub_path: typing.Union[str, pathlib.Path]) -> bool:
    try:
        pathlib.Path(sub_path).relative_to(root_path)
        return True
    except ValueError:
        return False


class Debian:
    class InstalledPackage(typing.NamedTuple):
        name: str
        version: str
        source: str
        source_version: str
        source_uris: typing.Tuple[str, ...]

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        package_names = self._dpkg_packages_for_installed_files(files)
        packages = self._get_installed_debian_packages(set(package_names))
        logger.debug("Debian packages: %s", ", ".join(repr(p) for p in packages))
        packages = [
            p._replace(
                source_uris=tuple(
                    self._get_package_apt_source_uri(
                        name=p.source, version=p.source_version
                    )
                )
            )
            for p in packages
        ]
        return packages

    def get_copyright(self, package: InstalledPackage) -> str:
        package_name = package.name.split(":")[0]  # HACK: Strip architecture.
        return (pathlib.Path("/usr/share/doc") / package_name / "copyright").read_text()

    @staticmethod
    def _dpkg_packages_for_installed_files(
        files: typing.Iterable[str],
    ) -> typing.List[str]:
        process = subprocess.run(
            ["dpkg-query", "--search", "--"] + list(files),
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        process.check_returncode()

        packages = []
        for line in process.stdout.splitlines():
            match = re.match(f"^(?P<package>\S+): (?P<file>.*)$", line)
            if match is not None:
                file = match.group("file")
                package_name = match.group("package")
                packages.append(package_name)
                logger.debug("dpkg %s contains file %s", package_name, file)
        return packages

    @classmethod
    def _get_installed_debian_packages(
        cls,
        package_names: typing.Iterable[str],
    ) -> typing.List[InstalledPackage]:
        process = subprocess.run(
            [
                "dpkg-query",
                "--show",
                "--showformat=${binary:Package}\\t${source:Package}\\t${Version}\\t${source:Version}\\t${db:Status-Status}\\n",
                "--",
            ]
            + list(package_names),
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        process.check_returncode()

        packages = []
        for line in process.stdout.splitlines():
            parts = line.split("\t")
            if parts[4] == "installed":
                packages.append(
                    cls.InstalledPackage(
                        name=parts[0],
                        source=parts[1],
                        version=parts[2],
                        source_version=parts[3],
                        source_uris=(),
                    )
                )
        return packages

    @classmethod
    def _get_package_apt_source_uri(cls, name: str, version: str) -> typing.List[str]:
        logger.debug("getting source for package %s version %s", name, version)
        process = subprocess.run(
            ["apt-get", "source", "--print-uris", "-qq", "--", f"{name}={version}"],
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        if process.returncode == 100:
            warn(f"failed to get source URIs for package {name}")
            return []
        process.check_returncode()
        return cls._parse_apt_get_source_uris(process.stdout)

    @staticmethod
    def _parse_apt_get_source_uris(raw: str) -> typing.List[str]:
        source_uris = []
        for line in raw.splitlines():
            match = re.match(r"^'(?P<uri>.*)' .*$", line)
            if match is not None:
                source_uris.append(match.group("uri"))
        return source_uris


class Nix:
    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[None]:
        # TODO(strager)
        return []


class Emscripten:
    class InstalledPackage(typing.NamedTuple):
        name: str
        source: str
        version: str
        source_uris: typing.Tuple[str, ...]
        root: pathlib.Path

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        archive_to_source_package = {
            "libc++-except.a": "emscripten libc++",
            "libc++-mt-except.a": "emscripten libc++",
            "libc++-mt-noexcept.a": "emscripten libc++",
            "libc++-mt.a": "emscripten libc++",
            "libc++-noexcept.a": "emscripten libc++",
            "libc++.a": "emscripten libc++",
            "libc++abi-except.a": "emscripten libc++abi",
            "libc++abi-mt-except.a": "emscripten libc++abi",
            "libc++abi-mt-noexcept.a": "emscripten libc++abi",
            "libc++abi-mt.a": "emscripten libc++abi",
            "libc++abi-noexcept.a": "emscripten libc++abi",
            "libc++abi.a": "emscripten libc++abi",
            "libc-asan.a": "emscripten libc (musl)",
            "libc-mt-asan.a": "emscripten libc (musl)",
            "libc-mt.a": "emscripten libc (musl)",
            "libc.a": "emscripten libc (musl)",
            "libc_rt_wasm-asan.a": "emscripten libc (musl)",
            "libc_rt_wasm.a": "emscripten libc (musl)",
            "libcompiler_rt-mt.a": "emscripten compiler-rt",
            "libcompiler_rt.a": "emscripten compiler-rt",
            "libdlmalloc-debug-noerrno-tracing.a": "emscripten",
            "libdlmalloc-debug-noerrno.a": "emscripten",
            "libdlmalloc-debug-tracing.a": "emscripten",
            "libdlmalloc-debug.a": "emscripten",
            "libdlmalloc-mt-debug-noerrno-tracing.a": "emscripten",
            "libdlmalloc-mt-debug-noerrno.a": "emscripten",
            "libdlmalloc-mt-debug-tracing.a": "emscripten",
            "libdlmalloc-mt-debug.a": "emscripten",
            "libdlmalloc-mt-noerrno-tracing.a": "emscripten",
            "libdlmalloc-mt-noerrno.a": "emscripten",
            "libdlmalloc-mt-tracing.a": "emscripten",
            "libdlmalloc-mt.a": "emscripten",
            "libdlmalloc-noerrno-tracing.a": "emscripten",
            "libdlmalloc-noerrno.a": "emscripten",
            "libdlmalloc-tracing.a": "emscripten",
            "libdlmalloc.a": "emscripten",
            "libpthread_stub.a": "emscripten",
            "libstandalonewasm-memgrow.a": "emscripten",
            "libstandalonewasm.a": "emscripten",
        }

        known_files = [
            file
            for file in files
            if pathlib.Path(file).name in archive_to_source_package
        ]
        unknown_files = sorted(set(files) - set(known_files))
        if unknown_files:
            warn(
                f"files not recognized to be part of emscripten: {' '.join(unknown_files)}"
            )
        if not known_files:
            return []

        version_file = find_file_in_ancestors(
            path=pathlib.Path(known_files[0]).parent, name="emscripten-version.txt"
        )
        if version_file is None:
            raise Exception(
                f"Could not find emscripten-version.txt in {known_files[0]}"
            )
        emscripten_root = version_file.parent
        version = json.loads(version_file.read_text())
        git_revision = (
            (emscripten_root / "emscripten-revision.txt").read_text().rstrip("\n")
        )
        full_version = f"emscripten-{version} ({git_revision})"

        source_package_names = {"emscripten"} | {
            archive_to_source_package[pathlib.Path(file).name] for file in known_files
        }
        return [
            self.InstalledPackage(
                name=name,
                source=name,
                version=full_version,
                source_uris=[
                    f"https://github.com/emscripten-core/emscripten/tree/{git_revision}"
                ],
                root=emscripten_root,
            )
            for name in source_package_names
        ]

    def get_copyright(self, package: InstalledPackage) -> str:
        source_package_to_license_path = {
            "emscripten compiler-rt": "system/lib/compiler-rt/LICENSE.TXT",
            "emscripten libc (musl)": "system/lib/libc/musl/COPYRIGHT",
            "emscripten libc++": "system/lib/libcxx/LICENSE.TXT",
            "emscripten libc++abi": "system/lib/libcxxabi/LICENSE.TXT",
            "emscripten": "LICENSE",
        }
        return (
            package.root / source_package_to_license_path[package.source]
        ).read_text()


def find_file_in_ancestors(
    path: pathlib.Path, name: str
) -> typing.Optional[pathlib.Path]:
    for ancestor in (path,) + tuple(path.parents):
        p = ancestor / name
        if p.exists():
            return p
    return None


class RedHat:
    class InstalledPackage(typing.NamedTuple):
        name: str
        source: str
        version: str
        source_uris: typing.Tuple[str, ...]

    def packages_for_installed_files(
        self, files: typing.Iterable[str]
    ) -> typing.List[InstalledPackage]:
        package_names = [self._rpm_package_for_installed_file(f) for f in files]
        packages = [
            self._get_rpm_package(name)
            for name in set(package_names)
            if name is not None
        ]
        return packages

    def get_copyright(self, package: InstalledPackage) -> str:
        error = None
        for file_name in ("LICENSES", "LICENSE.TXT"):
            try:
                raw_text = (
                    pathlib.Path("/usr/share/licenses") / package.source / file_name
                ).read_text()
                return raw_text.replace("\x0C", "\n---\n")
            except FileNotFoundError as e:
                error = e
        assert error is not None
        raise error

    @staticmethod
    def _rpm_package_for_installed_file(file_path: str) -> typing.Optional[str]:
        logger.debug("checking which RPM provides file %s", file_path)
        process = subprocess.run(
            ["rpm", "--query", "--whatprovides", "--", file_path],
            stdout=subprocess.PIPE,
            encoding="utf-8",
        )
        if process.returncode != 0:
            warn(f"could not determine RPM package for file: {file_path}")
            return None
        rpm_name = process.stdout.rstrip("\n")
        logger.debug("RPM %s provides file %s", rpm_name, file_path)
        return rpm_name

    @classmethod
    def _get_rpm_package(cls, package_name: str) -> InstalledPackage:
        source_rpm_info = subprocess.check_output(
            [
                "dnf",
                "--quiet",
                "repoquery",
                "--qf",
                "%{name}\\t%{version}-%{release}\\t%{source_name}",
                "--",
                package_name,
            ],
            encoding="utf-8",
        ).rstrip("\n")
        [name, version, source_name] = source_rpm_info.split("\t")
        source_rpm_url = subprocess.check_output(
            ["dnf", "--quiet", "repoquery", "--srpm", "--location", "--", package_name],
            encoding="utf-8",
        ).rstrip("\n")
        return cls.InstalledPackage(
            name=name,
            version=version,
            source=source_name,
            source_uris=(source_rpm_url,),
        )


class LinkMap(typing.NamedTuple):
    linked_archives: typing.Set[str]


def parse_link_map(raw_link_map: str) -> LinkMap:
    linked_archives = set()
    state = "NONE"
    for line in raw_link_map.splitlines():
        if line == "":
            if state == "BEFORE_ELF_ARCHIVE_MEMBERS":
                state = "ELF_ARCHIVE_MEMBERS"
            else:
                state = "NONE"
        elif line in (
            "Archive member included to satisfy reference by file (symbol)",
            "Archive member included because of file (symbol)",
        ):
            state = "BEFORE_ELF_ARCHIVE_MEMBERS"
        elif line == "# Object files:":
            state = "MACHO_OBJECT_FILES"
        else:
            if state == "ELF_ARCHIVE_MEMBERS":
                match = re.match(r"^(?P<file>\S[^()]*)\((?P<symbol>[^()]*)\)$", line)
                if match is not None:
                    linked_archives.add(match.group("file"))
            elif state == "MACHO_OBJECT_FILES":
                match = re.match(r"^\[[0-9 ]+\] (?P<file>.*)\((?P<object>.*)\)$", line)
                if match is not None:
                    linked_archives.add(match.group("file"))
            elif state == "NONE":
                match = re.match(
                    r"^\s*(?P<address>[0-9a-f]+:[0-9a-f]+)\s+(?P<symbol>.*)\s+(?P<value>[0-9a-f]+)\s+[f ]\s+[i ]\s+(?P<archive>.*):(?P<file>.*)$",
                    line,
                )
                if match is not None:
                    linked_archives.add(match.group("archive") + ".lib")
                match = re.match(
                    r"^(?P<archive>\S+\.a)\((?P<member>[^/]+\.o)\)$",
                    line,
                )
                if match is not None:
                    linked_archives.add(match.group("archive"))
                match = re.match(
                    r"^\s*\d+\s+\d+\s+\d+\s+(?P<archive>\S+\.a)\((?P<member>[^/]+\.o)\):\((?P<symbol>.*)\)$",
                    line,
                )
                if match is not None:
                    linked_archives.add(pathlib.Path(match.group("archive")).name)
    return LinkMap(linked_archives=linked_archives)


class VendorReadmeProject(typing.NamedTuple):
    project_name: str
    properties: typing.Dict[str, str]


def parse_vendor_readme_txt(txt: str) -> typing.List[VendorReadmeProject]:
    """Parse the README.txt file in quick-lint-js' vendor/ directory."""
    projects = []
    for line in txt.splitlines():
        match = re.match(r"^## (?P<project_name>.+)$", line)
        if match is not None:
            projects.append(
                VendorReadmeProject(
                    project_name=match.group("project_name"), properties={}
                )
            )
        match = re.match(r"^(?P<key>[^:]+): (?P<value>.*)$", line)
        if match is not None and projects:
            projects[-1].properties[match.group("key")] = match.group("value")
    return projects


def warn(message: str) -> None:
    if os.environ.get("QLJS_COLLECT_COPYRIGHT_NO_WARNINGS", ""):
        print(f"{__file__}: fatal: {message}", file=sys.stderr)
        exit(1)
    print(f"{__file__}: warning: {message}", file=sys.stderr)


class TestGNULinkMap(unittest.TestCase):
    def test_small_link_map_from_bfd_ld(self) -> None:
        raw_link_map = """\
Archive member included to satisfy reference by file (symbol)

/usr/lib/x86_64-linux-gnu/libc_nonshared.a(elf-init.oS)
                              /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o (__libc_csu_init)

Merging program properties


As-needed library included to satisfy reference by file (symbol)

libc.so.6                     /tmp/cchQcqEp.o (puts@@GLIBC_2.2.5)
"""
        link_map = parse_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives, {"/usr/lib/x86_64-linux-gnu/libc_nonshared.a"}
        )

    def test_small_link_map_with_archive_dependencies_from_bfd_ld(self) -> None:
        raw_link_map = """\
Archive member included to satisfy reference by file (symbol)

libboost.a(global_resource.cpp.o)
                              src/CMakeFiles/quick-lint-js.dir/main.cpp.o (boost::container::pmr::new_delete_resource())
libboost.a(dlmalloc.cpp.o)    libboost.a(global_resource.cpp.o) (boost::container::dlmalloc_global_sync_lock())
libboost.a(alloc_lib.c.o)     libboost.a(dlmalloc.cpp.o) (boost_cont_all_deallocated)
"""
        link_map = parse_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives, {"libboost.a"}
        )

    def test_small_link_map_from_gold(self) -> None:
        raw_link_map = """\
Archive member included because of file (symbol)

/usr/lib/x86_64-linux-gnu/libc_nonshared.a(elf-init.oS)
                              /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o (__libc_csu_init)

Discarded input sections

 .note.GNU-stack
                0x0000000000000000        0x0 /usr/lib/gcc/x86_64-linux-gnu/9/../../../x86_64-linux-gnu/Scrt1.o
 .note.GNU-stack
"""
        link_map = parse_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives, {"/usr/lib/x86_64-linux-gnu/libc_nonshared.a"}
        )


class TestLD64LinkMap(unittest.TestCase):
    def test_small_link_map(self) -> None:
        raw_link_map = """\
# Path: quick-lint-js
# Arch: x86_64
# Object files:
[  0] linker synthesized
[  1] /usr/local/opt/llvm/lib/libc++.a(charconv.cpp.o)
[  2] /usr/local/opt/llvm/lib/libc++abi.a(cxa_default_handlers.cpp.o)
[  3] src/CMakeFiles/quick-lint-js.dir/main.cpp.o
[  4] /Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/usr/lib/libSystem.tbd
# Sections:
# Address	Size    	Segment	Section
0x100001AB0	0x000D62E3	__TEXT	__text
0x1000D7D94	0x0000061E	__TEXT	__stubs
# Symbols:
# Address	Size    	File  Name
0x100001AB0	0x00000280	[  1] __ZNSt3__16__itoa8__u32toaEjPc
0x100001D30	0x00000660	[  1] __ZNSt3__16__itoa8__u64toaEyPc
0x100002390	0x00000020	[  2] __ZNSt3__118condition_variable10notify_oneEv
0x1000023B0	0x00000020	[  2] __ZNSt3__118condition_variable10notify_allEv
"""
        link_map = parse_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {"/usr/local/opt/llvm/lib/libc++.a", "/usr/local/opt/llvm/lib/libc++abi.a"},
        )


class TestMoldLinkMap(unittest.TestCase):
    def test_small_link_map(self) -> None:
        raw_link_map = """\
             VMA       Size Align Out     In      Symbol
         2097152         64     8 
         2097216        560     8 
         2097776         28     1 .interp
         2097804         32     4 .note.ABI-tag
         2097804         32     4         /usr/lib/x86_64-linux-gnu/crt1.o:(.note.ABI-tag)
         2491008     105120    16 .rodata
         2491008        140     4         src/CMakeFiles/quick-lint-js.dir/main.cpp.o:(.rodata._ZN13quick_lint_js12lsp_endpointINS_26linting_lsp_server_handlerINS_21lsp_javascript_linterEEENS_15lsp_pipe_writerEE14message_parsedESt17basic_string_viewIDuSt11char_traitsIDuEE)
         2511120          0     0                 simdjson::fallback::(anonymous namespace)::stringparsing::escape_map
         2511376        192    16         src/libquick-lint-js-lib.a(buffering-error-reporter.cpp.o):(.rodata._ZZN13quick_lint_js24buffering_error_reporter11report_implENS_10error_typeEPvE11error_sizes)
         2651424          8     4         /usr/lib/x86_64-linux-gnu/crti.o:(.fini)
         2651424          0     0                 _fini
               0    3085635     1 .debug_str
               0       2432     8 
"""
        link_map = parse_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {"libquick-lint-js-lib.a"},
        )


class TestMicrosoftLinkMap(unittest.TestCase):
    def test_small_link_map(self) -> None:
        raw_link_map = """\
 quick-lint-js

 Timestamp is 5fffbf67 (Wed Jan 13 19:49:59 2021)

 Preferred load address is 0000000140000000

 Start         Length     Name                   Class
 0001:00000000 00000630H .text$di                CODE

  Address         Publics by Value              Rva+Base               Lib:Object

 0000:00000000       __x64_code_ranges_to_entry_points_count 0000000000000000     <absolute>
 0001:00000cf0       ??$?RVarrow_function_with_statements@expression@quick_lint_js@<lambda_1>@?1???$with_derived@V<lambda_1>@?1??object_entry@expression@quick_lint_js@@QEBA?AUobject_property_value_pair@4@H@Z@@expression@quick_lint_js@@AEBA@$$QEAV0?1??object_entry@12@QEBA?AUobject_property_value_pair@2@H@Z@@Z@QEBA?AU42@AEBVarrow_function_with_statements@12@@Z 0000000140001cf0 f i main.obj
 0001:0003d760       ?create_dom_parser_implementation@implementation@haswell@simdjson@@UEBA?AW4error_code@3@_K0AEAV?$unique_ptr@Vdom_parser_implementation@internal@simdjson@@U?$default_delete@Vdom_parser_implementation@internal@simdjson@std@std@Z 000000014003e760 f   simdjson:simdjson.obj

 entry point at        0001:00059824

 Static symbols

 0001:00000000       ??__Eallocator_arg@container@boost@@YAXXZ 0000000140001000 f   main.obj
 0001:00000020       ??__Eallocator_arg@container@boost@@YAXXZ 0000000140001020 f   quick-lint-js-lib:lex.obj
 0003:00025f0c       ?initialized@?1??__acrt_initialize_multibyte@@9@4_NA 00000001400c9f0c     libucrt:mbctype.obj
 0006:00000060       $R000000                   00000001400d1060     * linker generated manifest res *
"""
        link_map = parse_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {"libucrt.lib", "simdjson.lib", "quick-lint-js-lib.lib"},
        )


class TestLLVMWebAssemblyTrace(unittest.TestCase):
    def test_small_trace_file(self) -> None:
        raw_link_map = """\
plugin/vscode/CMakeFiles/quick-lint-js-vscode.dir/empty.cpp.o
/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/crt1_reactor.o
src/libquick-lint-js-lib.a(vscode.cpp.o)
/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/libc++-noexcept.a(new.o)
/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/libstandalonewasm.a(standalone.o)
"""
        link_map = parse_link_map(raw_link_map)
        self.assertEqual(
            link_map.linked_archives,
            {
                "src/libquick-lint-js-lib.a",
                "/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/libc++-noexcept.a",
                "/home/strager/tmp/Projects/emsdk/upstream/emscripten/cache/wasm/libstandalonewasm.a",
            },
        )


class TestAptGetSourceURIs(unittest.TestCase):
    def test_libc6_dev(self) -> None:
        output = """\
Reading package lists... Done
Picking 'glibc' as source package instead of 'libc6-dev'
NOTICE: 'glibc' packaging is maintained in the 'Git' version control system at:
https://git.launchpad.net/~ubuntu-core-dev/ubuntu/+source/glibc
Please use:
git clone https://git.launchpad.net/~ubuntu-core-dev/ubuntu/+source/glibc
to retrieve the latest (possibly unreleased) updates to the package.
Need to get 18.2 MB of source archives.
'http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31-0ubuntu9.1.dsc' glibc_2.31-0ubuntu9.1.dsc 9456 SHA512:b3f10b8c8f421431c91f14efbc92fc165675d5cdb427629e67d5e19927de0f4024c736c9da1c5d4b7e2bd0aa01395b3e269aeccee161918311f2ea1c75815f1f
'http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31.orig.tar.xz' glibc_2.31.orig.tar.xz 17317924 SHA512:2ff56628fe935cacbdf1825534f15d45cb87a159cbdb2e6a981590eeb6174ed4b3ff7041519cdecbd4f624ac20b745e2dd9614c420dd3ea186b8f36bc4c2453c
'http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31-0ubuntu9.1.debian.tar.xz' glibc_2.31-0ubuntu9.1.debian.tar.xz 844816 SHA512:bca1857b031eda2d170256b97829c6b8a38493c66858a041e6f0143bf26c376c207e72d499ef1be07a83667419d55284407bae518511a702171eac58c6f31d62
"""
        source_uris = Debian._parse_apt_get_source_uris(output)
        self.assertEqual(
            source_uris,
            [
                "http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31-0ubuntu9.1.dsc",
                "http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31.orig.tar.xz",
                "http://archive.ubuntu.com/ubuntu/pool/main/g/glibc/glibc_2.31-0ubuntu9.1.debian.tar.xz",
            ],
        )

    def test_gcc_9_qq(self) -> None:
        output = """\
'http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0-17ubuntu1~20.04.dsc' gcc-9_9.3.0-17ubuntu1~20.04.dsc 23746 SHA512:bae6c156516a4988546a4518934f661a70243a89ed17883fe382bd984ae6533aab7d9b459986c2ebdb715b64b11ac76b9283447b26be3dbaec0b00c02afeb7f6
'http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0.orig.tar.gz' gcc-9_9.3.0.orig.tar.gz 90490748 SHA512:cebaa27b0ac7485e72f0d7b07e6ef08cd544bf551bc080ee00175cbe319ea8b0592ff54d55633bb189e481f9ba489d513205cf2310e4b5211869a021432ee31f
'http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0-17ubuntu1~20.04.debian.tar.xz' gcc-9_9.3.0-17ubuntu1~20.04.debian.tar.xz 763792 SHA512:f696a3d92edbadb7cfb29494b68ec00fa9b278ab7657e45933cf2e7564e6a524ac41edde14449114a2e06ad0c1f56473d998687aeb2f98f18c431727d4254d21
"""
        source_uris = Debian._parse_apt_get_source_uris(output)
        self.assertEqual(
            source_uris,
            [
                "http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0-17ubuntu1~20.04.dsc",
                "http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0.orig.tar.gz",
                "http://archive.ubuntu.com/ubuntu/pool/main/g/gcc-9/gcc-9_9.3.0-17ubuntu1~20.04.debian.tar.xz",
            ],
        )


class TestParseVendorReadme(unittest.TestCase):
    def test_parse_snippet(self) -> None:
        readme_txt = """\n
# Vendor

This vendor directory contains third-party software.

## benchmark

The benchmark directory contains Google Benchmark, a performance measurement
framework for C++ code.

Copyright: Copyright 2015 Google Inc. All rights reserved.
Version: v1.5.1

## boost

The boost directory contains Boost, free peer-reviewed portable C++ source
libraries.

Many directories and files have been deleted to reduce storage consumption.

Download URL: https://dl.bintray.com/boostorg/release/1.74.0/source/boost_1_74_0.tar.bz2
Download date: August 25, 2020
Version: 1.74.0
"""
        projects = parse_vendor_readme_txt(readme_txt)
        self.assertEqual(
            projects,
            [
                VendorReadmeProject(
                    project_name="benchmark",
                    properties={
                        "Copyright": "Copyright 2015 Google Inc. All rights reserved.",
                        "Version": "v1.5.1",
                    },
                ),
                VendorReadmeProject(
                    project_name="boost",
                    properties={
                        "Download URL": "https://dl.bintray.com/boostorg/release/1.74.0/source/boost_1_74_0.tar.bz2",
                        "Download date": "August 25, 2020",
                        "Version": "1.74.0",
                    },
                ),
            ],
        )


if __name__ == "__main__":
    main()

# quick-lint-js finds bugs in JavaScript programs.
# Copyright (C) 2020  Matthew "strager" Glazar
#
# This file is part of quick-lint-js.
#
# quick-lint-js is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# quick-lint-js is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with quick-lint-js.  If not, see <https://www.gnu.org/licenses/>.
