Blog

2つのディレクトリの jar ファイルを比較するスクリプト

ChatGPT、こういうのの生成はやたら得意なので、ChatGPT で生成した。 こういう単純なスクリプトは手で書くより早いよねぇ。

import os
import sys
import subprocess
import difflib
import zipfile
import subprocess
import tempfile
from pathlib import Path

def get_git_commit_hash(directory):
    """Return the latest git commit hash of the specified directory."""
    try:
        commit_hash = subprocess.check_output(['git', '-C', directory, 'rev-parse', 'HEAD']).decode('utf-8').strip()
        return commit_hash
    except subprocess.CalledProcessError:
        print(f"Error: Failed to retrieve git commit hash for {directory}")
        sys.exit(1)

def get_jar_files(directory):
    """Return a list of jar files in the specified directory."""
    basedir = Path(directory)
    return [f.relative_to(basedir) for f in basedir.rglob("*.jar") if f.is_file()]

def are_jars_identical(jar1_path, jar2_path):
    """Check if the contents of the two JAR files are identical and display unified diff for .html files if they're different."""

    def is_binary(content):
        """Determine if the given content is binary."""
        return b'\x00' in content

    def get_javap_output(class_file_path):
        """Get the bytecode dump using javap."""
        try:
            output = subprocess.check_output(['javap', '-verbose', '-c', class_file_path], stderr=subprocess.STDOUT)
            return output.decode('utf-8')
        except subprocess.CalledProcessError as e:
            return str(e)

    with zipfile.ZipFile(jar1_path, 'r') as jar1, zipfile.ZipFile(jar2_path, 'r') as jar2:
        jar1_files = set(jar1.namelist())
        jar2_files = set(jar2.namelist())

        if jar1_files != jar2_files:
            return False, "File lists are different"

        for file_name in sorted(jar1_files):
            with jar1.open(file_name) as file1, jar2.open(file_name) as file2:
                file1_contents = file1.read()
                file2_contents = file2.read()

                if file1_contents != file2_contents:
                    # If the files are .html and not binary, display the unified diff
                    if file_name.endswith('.class'):
                        # If the files are .class files, get the javap output and compare
                        with tempfile.NamedTemporaryFile(suffix='.class', delete=True) as tmp1, tempfile.NamedTemporaryFile(suffix='.class', delete=True) as tmp2:
                            tmp1.write(file1_contents)
                            tmp2.write(file2_contents)
                            tmp1.flush()
                            tmp2.flush()

                            javap_output1 = get_javap_output(tmp1.name)
                            javap_output2 = get_javap_output(tmp2.name)

                            diff = difflib.unified_diff(
                                javap_output1.splitlines(),
                                javap_output2.splitlines(),
                                fromfile=f"{jar1_path}/{file_name}",
                                tofile=f"{jar2_path}/{file_name}"
                            )
                            return False, file_name + "\n" + "\n".join(diff)
                    elif file_name.endswith('.html') and not is_binary(file1_contents) and not is_binary(file2_contents):
                        diff = difflib.unified_diff(
                            file1_contents.decode().splitlines(),
                            file2_contents.decode().splitlines(),
                            fromfile=f"{jar1_path}/{file_name}",
                            tofile=f"{jar2_path}/{file_name}"
                        )
                        return False, "\n".join(diff)
                    return False, f"File contents are different: {file_name}"

    return True, ""

def main():
    if len(sys.argv) != 3:
        print("Usage: script_name directory1 directory2")
        sys.exit(1)

    dir1, dir2 = sys.argv[1], sys.argv[2]

    # Display git commit hashes for both directories
    commit_hash_dir1 = get_git_commit_hash(dir1)
    commit_hash_dir2 = get_git_commit_hash(dir2)

    print(f"Git commit hash for directory 1: {dir1} {commit_hash_dir1}")
    print(f"Git commit hash for directory 2: {dir2} {commit_hash_dir2}\n\n\n")

    jar_files_dir1 = set(get_jar_files(dir1))
    jar_files_dir2 = set(get_jar_files(dir2))

    common_jar_files = jar_files_dir1.intersection(jar_files_dir2)

    for jar_file in common_jar_files:
        if "-sources.jar" in str(jar_file):
            continue
        file1_path = os.path.join(dir1, jar_file)
        file2_path = os.path.join(dir2, jar_file)

        is_same, reason = are_jars_identical(file1_path, file2_path)
        if not is_same:
            print(f"## {jar_file} has different contents.")
            print(reason)
            print("\n\n")
        else:
            #print(f"{jar_file} has the same contents.")
            pass

    # Report files only in directory 1
    unique_files_dir1 = jar_files_dir1 - common_jar_files
    for jar_file in unique_files_dir1:
        print(f"{jar_file} exists only in directory 1.")

    # Report files only in directory 2
    unique_files_dir2 = jar_files_dir2 - common_jar_files
    for jar_file in unique_files_dir2:
        print(f"{jar_file} exists only in directory 2.")

if __name__ == "__main__":
    main()