Skip to content
Snippets Groups Projects
Commit e15aaab0 authored by chrg's avatar chrg
Browse files

Add progress bar

parent 0f9ea0f8
No related branches found
No related tags found
No related merge requests found
# This file is @generated by PDM.
# It is not intended for manual editing.
[[package]]
name = "click"
version = "8.1.3"
......@@ -46,9 +43,18 @@ version = "5.0.0"
requires_python = ">=3.6"
summary = "A pure Python implementation of a sliding window memory map manager"
[[package]]
name = "tqdm"
version = "4.65.0"
requires_python = ">=3.7"
summary = "Fast, Extensible Progress Meter"
dependencies = [
"colorama; platform_system == \"Windows\"",
]
[metadata]
lock_version = "4.1"
content_hash = "sha256:e4668d20f55756901111f3e73519dca0c61f32a1eccd679b031ad9aafd0c6225"
lock_version = "4.0"
content_hash = "sha256:d30e726a8365a8d0ae78a7c83885da637eb2af4c80451b78ffdc00773e8a34b0"
[metadata.files]
"click 8.1.3" = [
......@@ -75,3 +81,7 @@ content_hash = "sha256:e4668d20f55756901111f3e73519dca0c61f32a1eccd679b031ad9aaf
{url = "https://files.pythonhosted.org/packages/21/2d/39c6c57032f786f1965022563eec60623bb3e1409ade6ad834ff703724f3/smmap-5.0.0.tar.gz", hash = "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936"},
{url = "https://files.pythonhosted.org/packages/6d/01/7caa71608bc29952ae09b0be63a539e50d2484bc37747797a66a60679856/smmap-5.0.0-py3-none-any.whl", hash = "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94"},
]
"tqdm 4.65.0" = [
{url = "https://files.pythonhosted.org/packages/3d/78/81191f56abb7d3d56963337dbdff6aa4f55805c8afd8bad64b0a34199e9b/tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"},
{url = "https://files.pythonhosted.org/packages/e6/02/a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97/tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"},
]
......@@ -9,6 +9,7 @@ dependencies = [
"click>=8.1.3",
"git-filter-repo>=2.38.0",
"gitpython>=3.1.31",
"tqdm>=4.65.0",
]
requires-python = ">=3.10"
license = {text = "MIT"}
......
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, cast
from typing import Callable
import subprocess
import csv
import logging
import tempfile
from contextlib import contextmanager
import tqdm
import click
import git
import git_filter_repo as fr
......@@ -27,6 +28,7 @@ class BlobHandler:
transform: Callable[[Path, bytes], bytes] = lambda _, b: b
filter: fr.RepoFilter | None = None
gitcat: Popen | None = None
bar: tqdm.tqdm | None = None
def __enter__(self) -> "BlobHandler":
from subprocess import PIPE
......@@ -48,6 +50,9 @@ class BlobHandler:
self.gitcat.stdin.close()
self.gitcat.wait()
if self.bar:
self.bar.close()
# Some code borrowed from https://github.com/newren/git-filter-repo/blob/main/contrib/filter-repo-demos/lint-history
def __call__(self, commit: fr.Commit, metadata):
assert self.filter is not None
......@@ -55,7 +60,8 @@ class BlobHandler:
filename = Path(change.filename.decode("utf-8"))
if change.type == b"D" or not self.is_relevant(filename):
continue
if self.bar:
self.bar.update(1)
if change.blob_id not in self.blobs_handled:
content = self.read_blob(change.blob_id)
blob = fr.Blob(self.transform(filename, content))
......@@ -205,19 +211,18 @@ def regit(
):
"""A simple program that runs a command on every commit on a repo."""
logging.basicConfig(level=verbose)
log.debug("Setting verbosity %s", verbose)
logging.basicConfig(level=20 - 10 * verbose)
if output is None:
output = Path(tempfile.mkdtemp())
log.debug("Using tempdir %s", output)
with utils.timeit("cloning repo", logfn=log.info):
if repo is None:
repo = git.Repo().clone(path=output, no_local=True)
log.debug("Cloned current repo to %s", output)
else:
repo = git.Repo.clone_from(url=repo, to_path=output, no_local=True)
log.debug("Cloned repo to %s", output)
log.info("Cloned repo to %s", output)
def is_relevant(file: Path):
if pattern is None:
......@@ -226,8 +231,39 @@ def regit(
log.debug("Check if %s matched pattern %s", file, match)
return match
if True:
options = fr.FilteringOptions.parse_args(
["--prune-empty", "never", "--quiet"],
error_on_empty=False,
)
from collections import Counter
cnt = Counter()
def find_files(commit, metadata):
for change in commit.file_changes:
filename = Path(change.filename.decode("utf-8"))
if change.type == b"D" or not is_relevant(filename):
continue
cnt.update([filename])
filter = fr.RepoFilter(options, commit_callback=find_files)
with utils.timeit("prefilter", log.info), utils.chdir(repo.working_dir):
filter.run()
log.debug(f"Continue to format {cnt.total()} files")
for f, c in cnt.most_common():
log.debug(f"{f}: {c}")
with mktransformer(program, args, batch, on_error) as transformer:
handler = BlobHandler(repo, is_relevant=is_relevant, transform=transformer)
handler = BlobHandler(
repo,
is_relevant=is_relevant,
transform=transformer,
bar=tqdm.tqdm(total=cnt.total()),
)
log.debug("Starting handler")
with handler:
options = fr.FilteringOptions.parse_args(
......@@ -236,13 +272,11 @@ def regit(
)
filter = fr.RepoFilter(options, commit_callback=handler)
handler.filter = filter
with utils.chdir(repo.working_dir):
log.debug("Starting git filter")
with utils.timeit("git filter", log.info), utils.chdir(repo.working_dir):
filter.run()
log.debug("Finished git filter")
if mapping:
log.debug("Writing mapping to %s", mapping)
log.debug("Writing mapping...")
writer = csv.writer(mapping)
writer.writerow(["from", "to"])
for fm, to in filter._commit_renames.items():
......
......@@ -29,11 +29,25 @@ class Executable(click.ParamType):
class Repository(click.ParamType):
"""A click type that checks if a path is a Repository."""
import re
name = "repository"
# borrowed from https://pytutorial.com/check-strig-url/
ISURL = re.compile(
r"^(?:http|ftp)s?://" # http:// or https://
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # domain...
r"localhost|" # localhost...
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
r"(?::\d+)?" # optional port
r"(?:/?|[/?]\S+)$",
re.IGNORECASE,
)
def convert(self, value, param, ctx):
if not os.path.isdir(value):
self.fail(f"{value!r} is not a directory", param, ctx)
if os.path.isdir(value):
if not os.path.isdir(os.path.join(value, ".git")):
self.fail(f"{value!r} is not a git repository", param, ctx)
return Path(value)
elif Repository.ISURL.match(value) is not None:
return value
......@@ -10,12 +10,12 @@ log = logging.getLogger("regit")
@contextmanager
def timeit(name):
def timeit(name, logfn=log.debug):
start = time.time()
log.debug("Started %s", name)
logfn("Started %s", name)
yield
end = time.time()
log.debug("Done running %s in %s", name, end - start)
logfn("Done running %s in %s", name, end - start)
def handle_results(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment