Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
regit
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Software Systems Engineering
regit
Commits
58b3ee4e
Commit
58b3ee4e
authored
May 15, 2023
by
chrg
Browse files
Options
Downloads
Patches
Plain Diff
Major cleanup
parent
e15aaab0
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/regit/__main__.py
+195
-161
195 additions, 161 deletions
src/regit/__main__.py
with
195 additions
and
161 deletions
src/regit/__main__.py
+
195
−
161
View file @
58b3ee4e
from
dataclasses
import
dataclass
,
field
from
pathlib
import
Path
from
pathlib
import
Path
from
typing
import
Callable
from
typing
import
*
import
subprocess
import
subprocess
import
csv
import
csv
import
logging
import
logging
...
@@ -18,122 +17,100 @@ from . import utils
...
@@ -18,122 +17,100 @@ from . import utils
log
=
logging
.
getLogger
(
"
regit
"
)
log
=
logging
.
getLogger
(
"
regit
"
)
@dataclass
# Some code borrowed from https://github.com/newren/git-filter-repo/blob/main/contrib/filter-repo-demos/lint-history
class
BlobHandler
:
@contextmanager
from
subprocess
import
Popen
def
make_blobreader
(
repo
:
git
.
Repo
):
with
utils
.
popen
(
repo
:
git
.
Repo
[
"
git
"
,
"
-C
"
,
repo
.
working_dir
,
"
cat-file
"
,
"
--batch
"
],
blobs_handled
:
dict
[
int
,
int
]
=
field
(
default_factory
=
dict
)
stdin
=
subprocess
.
PIPE
,
is_relevant
:
Callable
[[
Path
],
bool
]
=
lambda
_
:
True
stdout
=
subprocess
.
PIPE
,
transform
:
Callable
[[
Path
,
bytes
],
bytes
]
=
lambda
_
,
b
:
b
)
as
gitcat
:
filter
:
fr
.
RepoFilter
|
None
=
None
gitcat
:
Popen
|
None
=
None
bar
:
tqdm
.
tqdm
|
None
=
None
def
__enter__
(
self
)
->
"
BlobHandler
"
:
from
subprocess
import
PIPE
log
.
debug
(
"
Starting blob handler
"
)
if
gitcat
.
stdin
is
None
or
gitcat
.
stdout
is
None
:
self
.
gitcat
=
utils
.
popen
(
[
"
git
"
,
"
-C
"
,
self
.
repo
.
working_dir
,
"
cat-file
"
,
"
--batch
"
],
stdin
=
PIPE
,
stdout
=
PIPE
,
)
if
self
.
gitcat
.
stdin
is
None
or
self
.
gitcat
.
stdout
is
None
:
raise
RuntimeError
(
"
Could not start git cat-file
"
)
raise
RuntimeError
(
"
Could not start git cat-file
"
)
return
self
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
stdin
,
stdout
=
gitcat
.
stdin
,
gitcat
.
stdout
if
self
.
gitcat
is
None
:
return
if
self
.
gitcat
.
stdin
is
not
None
:
self
.
gitcat
.
stdin
.
close
()
self
.
gitcat
.
wait
()
if
self
.
bar
:
def
read_blob
(
blob_id
:
bytes
)
->
bytes
:
self
.
bar
.
close
()
# Some code borrowed from https://github.com/newren/git-filter-repo/blob/main/contrib/filter-repo-demos/lint-history
def
__call__
(
self
,
commit
:
fr
.
Commit
,
metadata
):
assert
self
.
filter
is
not
None
for
change
in
commit
.
file_changes
:
filename
=
Path
(
change
.
filename
.
decode
(
"
utf-8
"
))
if
change
.
type
==
b
"
D
"
or
not
self
.
is_relevant
(
filename
):
continue
if
self
.
bar
:
self
.
bar
.
update
(
1
)
if
change
.
blob_id
not
in
self
.
blobs_handled
:
content
=
self
.
read_blob
(
change
.
blob_id
)
blob
=
fr
.
Blob
(
self
.
transform
(
filename
,
content
))
self
.
filter
.
insert
(
blob
)
self
.
blobs_handled
[
change
.
blob_id
]
=
blob
.
id
change
.
blob_id
=
self
.
blobs_handled
[
change
.
blob_id
]
def
read_blob
(
self
,
blob_id
:
bytes
):
log
.
debug
(
"
Reading blob %s
"
,
blob_id
)
log
.
debug
(
"
Reading blob %s
"
,
blob_id
)
assert
self
.
gitcat
is
not
None
# To get the typecheck to pass
# To get the typecheck to pass
stdin
,
stdout
=
self
.
gitcat
.
stdin
,
self
.
gitcat
.
stdout
assert
stdin
is
not
None
and
stdout
is
not
None
assert
stdin
is
not
None
and
stdout
is
not
None
stdin
.
write
(
blob_id
+
b
"
\n
"
)
stdin
.
write
(
blob_id
+
b
"
\n
"
)
# type: ignore
stdin
.
flush
()
stdin
.
flush
()
_
,
_
,
objsize
=
stdout
.
readline
().
split
()
_
,
_
,
objsize
=
stdout
.
readline
().
split
()
return
stdout
.
read
(
int
(
objsize
)
+
1
)[:
-
1
]
return
stdout
.
read
(
int
(
objsize
)
+
1
)[:
-
1
]
# type: ignore
yield
read_blob
@contextmanager
def
mktransformer
(
program
,
args
,
batch
,
on_error
):
with
tempfile
.
TemporaryDirectory
()
as
folder
:
folder
=
Path
(
folder
)
pargs
=
list
(
args
)
def
run_filter
(
repo
:
git
.
Repo
,
update
:
Callable
[[
Path
,
fr
.
RepoFilter
,
bytes
],
bytes
]
=
lambda
_
,
_n
,
x
:
x
,
is_relevant
:
Callable
[[
Path
],
bool
]
=
lambda
x
:
True
,
options
:
Optional
[
fr
.
FilteringOptions
]
=
None
,
)
->
fr
.
RepoFilter
:
"""
Create a blob handler
"""
if
batch
:
blobs_handled
=
dict
()
formatproc
=
utils
.
popen
(
[
program
]
+
pargs
,
def
commit_callback
(
commit
:
fr
.
Commit
,
metadata
):
stdin
=
subprocess
.
PIPE
,
nonlocal
filter
stdout
=
subprocess
.
PIPE
,
for
change
in
commit
.
file_changes
:
universal_newlines
=
False
,
filename
=
Path
(
change
.
filename
.
decode
(
"
utf-8
"
))
if
change
.
type
==
b
"
D
"
or
not
is_relevant
(
filename
):
continue
if
change
.
blob_id
not
in
blobs_handled
:
blobs_handled
[
change
.
blob_id
]
=
update
(
filename
,
filter
,
change
.
blob_id
)
if
change
.
blob_id
!=
blobs_handled
[
change
.
blob_id
]:
change
.
blob_id
=
blobs_handled
[
change
.
blob_id
]
if
options
is
None
:
options
=
fr
.
FilteringOptions
.
parse_args
(
# type: ignore
[
"
--prune-empty
"
,
"
never
"
,
"
--quiet
"
],
error_on_empty
=
False
,
)
)
if
formatproc
.
stdin
is
None
or
formatproc
.
stdout
is
None
:
raise
RuntimeError
(
"
Could not start the formatting program
"
)
fin
,
fout
=
formatproc
.
stdin
,
formatproc
.
stdout
filter
=
fr
.
RepoFilter
(
options
,
commit_callback
=
commit_callback
,
)
with
utils
.
chdir
(
repo
.
working_dir
):
filter
.
run
()
return
filter
@contextmanager
def
make_stream_transformer
(
program
,
args
,
config
):
def
transformer
(
file
:
Path
,
content
:
bytes
)
->
bytes
:
def
transformer
(
file
:
Path
,
content
:
bytes
)
->
bytes
:
with
utils
.
tfile
(
folder
,
file
.
name
,
content
)
as
tmp_file
:
try
:
try
:
with
utils
.
timeit
(
file
.
name
):
return
utils
.
run_stdout
([
program
]
+
list
(
args
),
input
=
content
)
fin
.
write
(
str
(
tmp_file
).
encode
(
"
utf-8
"
)
+
b
"
\n
"
)
# type: ignore
fin
.
flush
()
log
.
debug
(
"
Waiting for respond.
"
)
reps
=
fout
.
readline
().
decode
()
# type:ignore
log
.
debug
(
"
Process respond %s:
"
,
reps
)
except
subprocess
.
CalledProcessError
as
e
:
except
subprocess
.
CalledProcessError
as
e
:
on_error
=
config
.
get
(
"
on_error
"
,
"
fail
"
)
if
on_error
in
"
fail
"
:
if
on_error
in
"
fail
"
:
file
=
Path
(
file
.
name
).
with_suffix
(
"
.input
"
).
absolute
()
log
.
error
(
"
Writing argument to %s
"
,
file
)
with
open
(
file
,
"
wb
"
)
as
f
:
f
.
write
(
content
)
raise
raise
elif
on_error
==
"
warn
"
:
elif
on_error
==
"
warn
"
:
log
.
warn
(
"
Process failed:
"
,
e
.
cmd
)
log
.
warn
(
"
Process failed:
"
,
e
.
cmd
)
return
content
elif
on_error
==
"
revert
"
:
elif
on_error
==
"
revert
"
:
return
content
return
content
elif
on_error
==
"
ignore
"
:
elif
on_error
==
"
ignore
"
:
log
.
warn
(
"
Process failed:
"
,
e
.
cmd
)
return
content
else
:
else
:
log
.
error
(
"
Unknown
'
on error
'
value
"
)
log
.
error
(
"
Unknown
'
on error
'
value
"
)
raise
KeyError
()
raise
KeyError
()
with
open
(
tmp_file
,
"
rb
"
)
as
f
:
return
f
.
read
()
yield
transformer
yield
transformer
formatproc
.
stdin
.
close
()
formatproc
.
wait
()
@contextmanager
elif
"
{}
"
in
pargs
:
def
make_file_transformer
(
program
,
args
,
config
):
with
tempfile
.
TemporaryDirectory
()
as
folder
:
folder
=
Path
(
folder
)
pargs
=
list
(
args
)
ix
=
pargs
.
index
(
"
{}
"
)
ix
=
pargs
.
index
(
"
{}
"
)
def
transformer
(
file
:
Path
,
content
:
bytes
)
->
bytes
:
def
transformer
(
file
:
Path
,
content
:
bytes
)
->
bytes
:
...
@@ -141,23 +118,83 @@ def mktransformer(program, args, batch, on_error):
...
@@ -141,23 +118,83 @@ def mktransformer(program, args, batch, on_error):
pargs
[
ix
]
=
str
(
tmp_file
)
pargs
[
ix
]
=
str
(
tmp_file
)
try
:
try
:
utils
.
run
([
program
]
+
pargs
)
utils
.
run
([
program
]
+
pargs
)
except
subprocess
.
CalledProcessError
:
except
subprocess
.
CalledProcessError
as
e
:
on_error
=
config
.
get
(
"
on_error
"
,
"
fail
"
)
if
on_error
in
"
fail
"
:
file
=
Path
(
file
.
name
).
with_suffix
(
"
.input
"
).
absolute
()
file
=
Path
(
file
.
name
).
with_suffix
(
"
.input
"
).
absolute
()
log
.
error
(
"
Writing argument to %s
"
,
file
)
log
.
error
(
"
Writing argument to %s
"
,
file
)
with
open
(
file
,
"
wb
"
)
as
f
:
with
open
(
file
,
"
wb
"
)
as
f
:
f
.
write
(
content
)
f
.
write
(
content
)
raise
raise
elif
on_error
==
"
warn
"
:
log
.
warn
(
"
Process failed:
"
,
e
.
cmd
)
elif
on_error
==
"
revert
"
:
return
content
elif
on_error
==
"
ignore
"
:
log
.
warn
(
"
Process failed:
"
,
e
.
cmd
)
else
:
log
.
error
(
"
Unknown
'
on error
'
value
"
)
raise
KeyError
()
with
open
(
tmp_file
,
"
rb
"
)
as
f
:
with
open
(
tmp_file
,
"
rb
"
)
as
f
:
return
f
.
read
()
return
f
.
read
()
yield
transformer
yield
transformer
else
:
@contextmanager
def
make_batch_transformer
(
program
,
args
,
**
config
):
with
(
tempfile
.
TemporaryDirectory
()
as
folder
,
utils
.
popen
(
[
program
]
+
list
(
args
),
stdin
=
subprocess
.
PIPE
,
stdout
=
subprocess
.
PIPE
,
universal_newlines
=
False
,
)
as
formatproc
,
):
folder
=
Path
(
folder
)
if
formatproc
.
stdin
is
None
or
formatproc
.
stdout
is
None
:
raise
RuntimeError
(
"
Could not start the formatting program
"
)
fin
,
fout
=
formatproc
.
stdin
,
formatproc
.
stdout
def
transformer
(
file
:
Path
,
content
:
bytes
)
->
bytes
:
def
transformer
(
file
:
Path
,
content
:
bytes
)
->
bytes
:
return
utils
.
run_stdout
([
program
]
+
pargs
,
input
=
content
)
with
utils
.
tfile
(
folder
,
file
.
name
,
content
)
as
tmp_file
:
with
utils
.
timeit
(
file
.
name
):
fin
.
write
(
str
(
tmp_file
).
encode
(
"
utf-8
"
)
+
b
"
\n
"
)
# type: ignore
fin
.
flush
()
log
.
debug
(
"
Waiting for respond.
"
)
reps
=
fout
.
readline
().
decode
()
# type:ignore
log
.
debug
(
"
Process respond %s:
"
,
reps
)
with
open
(
tmp_file
,
"
rb
"
)
as
f
:
return
f
.
read
()
yield
transformer
yield
transformer
formatproc
.
stdin
.
close
()
formatproc
.
wait
()
def
countfiles
(
repo
:
git
.
Repo
,
is_relevant
:
Callable
[[
Path
],
bool
])
->
Counter
[
Path
]:
"""
count the number of files that will be processed
"""
from
collections
import
Counter
cnt
=
Counter
()
def
count
(
f
,
_
,
x
):
cnt
.
update
([
f
])
return
x
run_filter
(
repo
=
repo
,
update
=
count
,
is_relevant
=
is_relevant
,
)
return
cnt
@click.command
()
@click.command
()
@click.option
(
@click.option
(
...
@@ -178,6 +215,13 @@ def mktransformer(program, args, batch, on_error):
...
@@ -178,6 +215,13 @@ def mktransformer(program, args, batch, on_error):
help
=
"
the glob-pattern to match files.
"
,
help
=
"
the glob-pattern to match files.
"
,
type
=
str
,
type
=
str
,
)
)
@click.option
(
"
-t
"
,
"
--type
"
,
help
=
"
the type of transformation to apply.
"
,
type
=
click
.
Choice
([
"
batch
"
,
"
stream
"
,
"
file
"
,
"
auto
"
],
case_sensitive
=
False
),
default
=
"
auto
"
,
)
@click.option
(
@click.option
(
"
-m
"
,
"
-m
"
,
"
--mapping
"
,
"
--mapping
"
,
...
@@ -203,11 +247,11 @@ def regit(
...
@@ -203,11 +247,11 @@ def regit(
pattern
:
str
|
None
,
pattern
:
str
|
None
,
output
:
Path
,
output
:
Path
,
mapping
,
mapping
,
type
:
str
,
program
:
Path
,
program
:
Path
,
on_error
:
str
,
args
:
tuple
[
str
],
args
:
tuple
[
str
],
verbose
:
int
,
verbose
:
int
,
batch
:
bool
,
**
config
,
):
):
"""
A simple program that runs a command on every commit on a repo.
"""
"""
A simple program that runs a command on every commit on a repo.
"""
...
@@ -224,6 +268,18 @@ def regit(
...
@@ -224,6 +268,18 @@ def regit(
repo
=
git
.
Repo
.
clone_from
(
url
=
repo
,
to_path
=
output
,
no_local
=
True
)
repo
=
git
.
Repo
.
clone_from
(
url
=
repo
,
to_path
=
output
,
no_local
=
True
)
log
.
info
(
"
Cloned repo to %s
"
,
output
)
log
.
info
(
"
Cloned repo to %s
"
,
output
)
if
type
==
"
auto
"
:
if
"
{}
"
in
args
:
type
=
"
file
"
else
:
type
=
"
stream
"
transformer
=
{
"
file
"
:
make_file_transformer
,
"
stream
"
:
make_stream_transformer
,
"
batch
"
:
make_batch_transformer
,
}[
type
]
def
is_relevant
(
file
:
Path
):
def
is_relevant
(
file
:
Path
):
if
pattern
is
None
:
if
pattern
is
None
:
return
True
return
True
...
@@ -231,49 +287,27 @@ def regit(
...
@@ -231,49 +287,27 @@ def regit(
log
.
debug
(
"
Check if %s matched pattern %s
"
,
file
,
match
)
log
.
debug
(
"
Check if %s matched pattern %s
"
,
file
,
match
)
return
match
return
match
if
True
:
with
utils
.
timeit
(
"
counting files
"
,
logfn
=
log
.
info
):
options
=
fr
.
FilteringOptions
.
parse_args
(
cnt
=
countfiles
(
repo
,
is_relevant
)
[
"
--prune-empty
"
,
"
never
"
,
"
--quiet
"
],
error_on_empty
=
False
,
)
from
collections
import
Counter
with
(
transformer
(
program
,
args
,
config
)
as
transform
,
cnt
=
Counter
()
make_blobreader
(
repo
)
as
blobreader
,
tqdm
.
tqdm
(
total
=
cnt
.
total
(),
unit
=
"
files
"
,
desc
=
"
formatting files
"
)
as
bar
,
def
find_files
(
commit
,
metadata
):
):
for
change
in
commit
.
file_changes
:
filename
=
Path
(
change
.
filename
.
decode
(
"
utf-8
"
))
if
change
.
type
==
b
"
D
"
or
not
is_relevant
(
filename
):
continue
cnt
.
update
([
filename
])
filter
=
fr
.
RepoFilter
(
options
,
commit_callback
=
find_files
)
with
utils
.
timeit
(
"
prefilter
"
,
log
.
info
),
utils
.
chdir
(
repo
.
working_dir
):
filter
.
run
()
log
.
debug
(
f
"
Continue to format
{
cnt
.
total
()
}
files
"
)
def
update
(
filename
:
Path
,
filter
:
fr
.
RepoFilter
,
blob_id
:
bytes
)
->
bytes
:
for
f
,
c
in
cnt
.
most_common
():
bar
.
update
()
log
.
debug
(
f
"
{
f
}
:
{
c
}
"
)
content
=
blobreader
(
blob_id
)
blob
=
fr
.
Blob
(
transform
(
filename
,
content
))
filter
.
insert
(
blob
)
return
blob
.
id
# type: ignore
with
mktransformer
(
program
,
args
,
batch
,
on_error
)
as
transform
er
:
filter
=
run_filt
er
(
handler
=
BlobHandler
(
repo
=
repo
,
repo
,
update
=
update
,
is_relevant
=
is_relevant
,
is_relevant
=
is_relevant
,
transform
=
transformer
,
bar
=
tqdm
.
tqdm
(
total
=
cnt
.
total
()),
)
log
.
debug
(
"
Starting handler
"
)
with
handler
:
options
=
fr
.
FilteringOptions
.
parse_args
(
[
"
--prune-empty
"
,
"
never
"
],
error_on_empty
=
False
,
)
)
filter
=
fr
.
RepoFilter
(
options
,
commit_callback
=
handler
)
handler
.
filter
=
filter
with
utils
.
timeit
(
"
git filter
"
,
log
.
info
),
utils
.
chdir
(
repo
.
working_dir
):
filter
.
run
()
if
mapping
:
if
mapping
:
log
.
debug
(
"
Writing mapping...
"
)
log
.
debug
(
"
Writing mapping...
"
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment