Skip to content
Snippets Groups Projects
Commit 3437e1ba authored by chrg's avatar chrg
Browse files

A working example of a tool

parent b4dce46a
No related branches found
No related tags found
No related merge requests found
......@@ -17,4 +17,7 @@ All contributions are accepted under the license in LICENSE.
After your first contribution please add your name to the `pyproject.toml` file.
## Notes
- https://git-scm.com/docs/git-commit I can see that there is a `--reuse-message=<commit>` option
in git which might be interesting.
# This file is @generated by PDM.
# It is not intended for manual editing.
[[package]]
name = "click"
version = "8.1.3"
......@@ -13,9 +16,39 @@ version = "0.4.6"
requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
summary = "Cross-platform colored terminal text."
[[package]]
name = "git-filter-repo"
version = "2.38.0"
requires_python = ">=3.5"
summary = "Quickly rewrite git repository history"
[[package]]
name = "gitdb"
version = "4.0.10"
requires_python = ">=3.7"
summary = "Git Object Database"
dependencies = [
"smmap<6,>=3.0.1",
]
[[package]]
name = "gitpython"
version = "3.1.31"
requires_python = ">=3.7"
summary = "GitPython is a Python library used to interact with Git repositories"
dependencies = [
"gitdb<5,>=4.0.1",
]
[[package]]
name = "smmap"
version = "5.0.0"
requires_python = ">=3.6"
summary = "A pure Python implementation of a sliding window memory map manager"
[metadata]
lock_version = "4.0"
content_hash = "sha256:2bdce55641fef073c86dab537aa23267eeee7c12b6d9a223d41cce0fb30b4af5"
lock_version = "4.1"
content_hash = "sha256:e4668d20f55756901111f3e73519dca0c61f32a1eccd679b031ad9aafd0c6225"
[metadata.files]
"click 8.1.3" = [
......@@ -26,3 +59,19 @@ content_hash = "sha256:2bdce55641fef073c86dab537aa23267eeee7c12b6d9a223d41cce0fb
{url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
"git-filter-repo 2.38.0" = [
{url = "https://files.pythonhosted.org/packages/70/99/37da3374977fb5e0915064718e58715666a395a614c42532a89d6164d958/git-filter-repo-2.38.0.tar.gz", hash = "sha256:fe1753e18f0bd6d3c9b57868c8011ae7d056a6ebab706706396a15ef5312725e"},
{url = "https://files.pythonhosted.org/packages/8d/0b/49d4d620327b717fdc072b2efdfcb3588eb3cf780e60ed5ba98ebedb5637/git_filter_repo-2.38.0-py2.py3-none-any.whl", hash = "sha256:c2646206dfdbf06ce27c06c5b77ea33209d8e32d1e33f6bd4c25b1ccc59f9864"},
]
"gitdb 4.0.10" = [
{url = "https://files.pythonhosted.org/packages/21/a6/35f83efec687615c711fe0a09b67e58f6d1254db27b1013119de46f450bd/gitdb-4.0.10-py3-none-any.whl", hash = "sha256:c286cf298426064079ed96a9e4a9d39e7f3e9bf15ba60701e95f5492f28415c7"},
{url = "https://files.pythonhosted.org/packages/4b/47/dc98f3d5d48aa815770e31490893b92c5f1cd6c6cf28dd3a8ae0efffac14/gitdb-4.0.10.tar.gz", hash = "sha256:6eb990b69df4e15bad899ea868dc46572c3f75339735663b81de79b06f17eb9a"},
]
"gitpython 3.1.31" = [
{url = "https://files.pythonhosted.org/packages/5f/11/2b0f60686dbda49028cec8c66bd18a5e82c96d92eef4bc34961e35bb3762/GitPython-3.1.31.tar.gz", hash = "sha256:8ce3bcf69adfdf7c7d503e78fd3b1c492af782d58893b650adb2ac8912ddd573"},
{url = "https://files.pythonhosted.org/packages/9e/8a/d1e02cc111d65b0346f70abb83c51f8593e7134bf694a4a56d1a470caaf7/GitPython-3.1.31-py3-none-any.whl", hash = "sha256:f04893614f6aa713a60cbbe1e6a97403ef633103cdd0ef5eb6efe0deb98dbe8d"},
]
"smmap 5.0.0" = [
{url = "https://files.pythonhosted.org/packages/21/2d/39c6c57032f786f1965022563eec60623bb3e1409ade6ad834ff703724f3/smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
{url = "https://files.pythonhosted.org/packages/6d/01/7caa71608bc29952ae09b0be63a539e50d2484bc37747797a66a60679856/smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
]
......@@ -7,6 +7,8 @@ authors = [
]
dependencies = [
"click>=8.1.3",
"git-filter-repo>=2.38.0",
"gitpython>=3.1.31",
]
requires-python = ">=3.10"
license = {text = "MIT"}
......
from contextlib import contextmanager
from pathlib import Path
import shutil
from typing import Callable
import click
import csv
import git
import os
import git_filter_repo as fr
import tempfile
import subprocess
from dataclasses import dataclass, field
# Some code borrowed from https://github.com/newren/git-filter-repo/blob/main/contrib/filter-repo-demos/lint-history
@contextmanager
def blob_reader():
process = subprocess.Popen(
["git", "cat-file", "--batch"], stdin=subprocess.PIPE, stdout=subprocess.PIPE
)
def reader(blob_id):
assert process.stdin is not None
assert process.stdout is not None
process.stdin.write(blob_id + b"\n")
process.stdin.flush()
_, _, objsize = process.stdout.readline().split()
return process.stdout.read(int(objsize) + 1)[:-1]
yield reader
if process.stdin is not None:
process.stdin.close()
process.wait()
@dataclass
class BlobHandler:
reader: Callable[[str], bytes]
filter: fr.RepoFilter | None = None
blobs_handled: dict[int, int] = field(default_factory=dict)
is_relevant: Callable[[Path], bool] = lambda _: True
transform: Callable[[Path, bytes], bytes] = lambda _, b: b
def __call__(self, commit, metadata):
assert self.filter is not None
for change in commit.file_changes:
filename = Path(change.filename.decode("utf-8"))
if change.type == b"D" or not self.is_relevant(filename):
continue
if change.blob_id not in self.blobs_handled:
content = self.reader(change.blob_id)
blob = fr.Blob(self.transform(filename, content))
self.filter.insert(blob)
# Record our handling of the blob and use it for this change
self.blobs_handled[change.blob_id] = blob.id
change.blob_id = self.blobs_handled[change.blob_id]
def transform_program(program: Path, args: tuple[str], folder: Path):
def callback(file: Path, content: bytes):
ix = args.index("{}")
if ix != -1:
pargs = list(args)
tmp_file = folder / file.name
with open(tmp_file, "wb") as f:
f.write(content)
pargs[ix] = str(tmp_file)
print(program, pargs)
subprocess.run([program] + pargs, check=True)
with open(tmp_file, "rb") as f:
new_content = f.read()
os.remove(tmp_file)
return new_content
else:
return subprocess.run(
[program] + list(args), input=content, capture_output=True, check=True
).stdout
return callback
@click.command()
@click.option("--count", default=1, help="Number of greetings.")
@click.option("--name", prompt="Your name?", help="The person to greet.")
def regit(count, name):
"""Simple program that greets NAME for a total of COUNT times."""
for x in range(count):
click.echo("Hello %s!" % name)
@click.option("--repo", help="the repo to clone and change", default=None)
@click.option(
"-o",
"--output",
help="The name of the resulting repo",
type=click.Path(exists=False, path_type=Path),
)
@click.option(
"-p",
"--pattern",
help="the glob-pattern to match files.",
type=str,
)
@click.option(
"-m",
"--mapping",
help="The file to output the csv mapping to",
type=click.File("w", lazy=False),
)
@click.argument(
"program",
# help="The program to execute on each file",
type=click.Path(executable=True, path_type=Path),
)
@click.argument(
"args",
# help="The argurments, choose '{}' to mean the file in question, omit a '{}' to use stdin",
nargs=-1,
type=str,
)
def regit(
repo,
pattern: str | None,
output: Path,
mapping,
program: Path,
args: tuple[str],
):
"""A simple program that runs a command on every commit on a repo."""
if output is None:
output = Path(tempfile.mkdtemp())
if repo is None:
repo = git.Repo().clone(path=output)
else:
repo = git.Repo.clone_from(url=repo, to_path=output)
if program.exists():
program = program.absolute()
else:
pp = shutil.which(str(program))
assert pp is not None
program = Path(pp)
os.chdir(output)
options = fr.FilteringOptions.parse_args([], error_on_empty=False)
with blob_reader() as br, tempfile.TemporaryDirectory() as folder:
handler = BlobHandler(
br,
is_relevant=lambda a: pattern is None or a.match(pattern),
transform=transform_program(program, args, Path(folder)),
)
filter = fr.RepoFilter(options, commit_callback=handler)
handler.filter = filter
filter.run()
if mapping:
writer = csv.writer(mapping)
writer.writerow(["from", "to"])
for fm, to in filter._commit_renames.items():
writer.writerow([m.decode() for m in [fm, to]])
print(repo)
if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment