Context Navigation

Back to Ticket #90

Ticket #90: patchfs.in

File patchfs.in, 9.8 KB (added by kdave, 16 years ago)
rewritten parser of unified diff files, copyin is disabled

Line
1	#! @PERL@ -w
2	#
3	# Written by Adam Byrtek <alpha@debian.org>, 2002
4	# Rewritten by David Sterba <dave@jikos.cz>, 2009
5	#
6	# Extfs to handle patches in context and unified diff format.
7	# Known issues: When name of file to patch is modified during editing,
8	# hunk is duplicated on copyin. It is unavoidable.
9
10	use bytes;
11	use strict;
12	use POSIX;
13	use File::Temp 'tempfile';
14
15	# standard binaries
16	my $lzma = 'lzma';
17	my $bzip = 'bzip2';
18	my $gzip = 'gzip';
19	my $fileutil = 'file';
20
21	# date parsing requires Date::Parse from TimeDate module
22	my $parsedates = eval 'require Date::Parse';
23
24	# regular expressions
25	my $unified_header=qr/^--- .\n\+\+\+ .\n$/;
26	my $unified_extract=qr/^--- ([^\s]+).\n\+\+\+ ([^\s]+)\s([^\t\n]*)/;
27	my $unified_contents=qr/^([+\-\\ \n]\|@@ .* @@)/;
28	my $unified_hunk=qr/@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+)) @@.*\n/;
29
30	my $context_header=qr/^\\\* .\n--- .\n$/;
31	my $context_extract=qr/^\\\* ([^\s]+).\n--- ([^\s]+)\s([^\t\n]*)/;
32	my $context_contents=qr/^([!+\-\\ \n]\|-{3} .* -{4}\|\{3} . \{4}\|\{15})/;
33
34	my $ls_extract_id=qr/^[^\s]+\s+[^\s]+\s+([^\s]+)\s+([^\s]+)/;
35	my $basename=qr\|^(./)([^/]+)$\|;
36
37	sub patchfs_canonicalize_path ($) {
38	my ($fname) = @_;
39	$fname =~ s,/+,/,g;
40	$fname =~ s,(^\|/)(?:\.?\./)+,$1,;
41	return $fname;
42	}
43
44	# output unix date in a mc-readable format
45	sub timef
46	{
47	my @time=localtime($_[0]);
48	return sprintf '%02d-%02d-%02d %02d:%02d', $time[4]+1, $time[3],
49	$time[5]+1900, $time[2], $time[1];
50	}
51
52	# parse given string as a date and return unix time
53	sub datetime
54	{
55	# in case of problems fall back to 0 in unix time
56	# note: str2time interprets some wrong values (eg. " ") as 'today'
57	if ($parsedates && defined (my $t=str2time($_[0]))) {
58	return timef($t);
59	}
60	return timef(time);
61	}
62
63	# print message on stderr and exit
64	sub error
65	{
66	print STDERR $_[0], "\n";
67	exit 1;
68	}
69
70	# (compressed) input
71	sub myin
72	{
73	my ($qfname)=(quotemeta $_[0]);
74
75	$_=`$fileutil $qfname`;
76	if (/lzma/) {
77	return "$lzma -dc $qfname";
78	} elsif (/bzip/) {
79	return "$bzip -dc $qfname";
80	} elsif (/gzip/) {
81	return "$gzip -dc $qfname";
82	} else {
83	return "cat $qfname";
84	}
85	}
86
87	# (compressed) output
88	sub myout
89	{
90	my ($qfname,$append)=(quotemeta $_[0],$_[1]);
91	my ($sep) = $append ? '>>' : '>';
92
93	$_=`$fileutil $qfname`;
94	if (/lzma/) {
95	return "$lzma -c $sep $qfname";
96	} elsif (/bzip/) {
97	return "$bzip -c $sep $qfname";
98	} elsif (/gzip/) {
99	return "$gzip -c $sep $qfname";
100	} else {
101	return "cat $sep $qfname";
102	}
103	}
104
105	# select diff filename conforming with rules found in diff.info
106	sub diff_filename
107	{
108	my ($fsrc,$fdst)= @_;
109	# TODO: can remove these two calls later
110	$fsrc = patchfs_canonicalize_path ($fsrc);
111	$fdst = patchfs_canonicalize_path ($fdst);
112	if (!$fdst && !$fsrc) {
113	error 'Index: not yet implemented';
114	} elsif (!$fsrc \|\| $fsrc eq '/dev/null') {
115	return ($fdst,'PATCH-CREATE/');
116	} elsif (!$fdst \|\| $fdst eq '/dev/null') {
117	return ($fsrc,'PATCH-REMOVE/');
118	} elsif (($fdst eq '/dev/null') && ($fsrc eq '/dev/null')) {
119	error 'Malformed diff';
120	} else {
121	# fewest path name components
122	if ($fdst=~s\|/\|/\|g < $fsrc=~s\|/\|/\|g) {
123	return ($fdst,'');
124	} elsif ($fdst=~s\|/\|/\|g > $fsrc=~s\|/\|/\|g) {
125	return ($fsrc,'');
126	} else {
127	# shorter base name
128	if (($fdst=~/$basename/o,length $2) < ($fsrc=~/$basename/o,length $2)) {
129	return ($fdst,'');
130	} elsif (($fdst=~/$basename/o,length $2) > ($fsrc=~/$basename/o,length $2)) {
131	return ($fsrc,'');
132	} else {
133	# shortest names
134	if (length $fdst < length $fsrc) {
135	return ($fdst,'');
136	} else {
137	return ($fsrc,'');
138	}
139	}
140	}
141	}
142	}
143
144	# IN: diff "archive" name
145	# IN: file handle for output; STDIN for list, tempfile else
146	# IN: filename to watch (for: copyout, rm), '' for: list
147	# IN: remove the file?
148	# true - ... and print out the rest
149	# false - ie. copyout mode, print just the file
150	sub parse($$$$)
151	{
152	my $archive=quotemeta shift;
153	my $fh=shift;
154	my $file=shift;
155	my $rmmod=shift;
156	my ($state,$fsize,$time);
157	my ($f,$fsrc,$fdst,$prefix);
158	my ($unified,$context);
159	my ($skipread, $filetoprint, $filefound);
160	my ($h_add,$h_del,$h_ctx); # hunk line counts
161	my ($h_r1,$h_r2); # hunk ranges
162	my @outsrc; # if desired ...
163	my @outdst;
164	my $line;
165
166	# use uid and gid from file
167	my ($uid,$gid)=(`ls -l $archive`=~/$ls_extract_id/o);
168
169	import Date::Parse if ($parsedates && $file eq '');
170
171	$line=1;
172	$state=0; $fsize=0; $f='';
173	$filefound=0;
174	while ($skipread \|\| ($line++,$_=<I>)) {
175	$skipread=0;
176	if($state == 0) { # expecting comments
177	$unified=$context=0;
178	$unified=1 if (/^--- /);
179	$context=1 if (/^\\\* /);
180	if (!$unified && !$context) {
181	$filefound=0 if($file ne '' && $filetoprint);
182	# shortcut for rmmod xor filefound
183	# - in rmmod we print if not found
184	# - in copyout (!rmmod) we print if found
185	print $fh $_ if($rmmod != $filefound);
186	next;
187	}
188
189	if($file eq '' && $filetoprint) {
190	printf $fh "-rw-r--r-- 1 %s %s %d %s %s%s\n", $uid, $gid, $fsize, datetime($time), $prefix, $f;
191	}
192
193	# start of new file
194	$_ .=<I>; # steel next line, both formats
195	$line++;
196	if($unified) {
197	if(/$unified_header/o) {
198	($fsrc,$fdst,$time) = /$unified_extract/o;
199	} else {
200	error "Can't parse unified diff header";
201	}
202	} elsif($context) {
203	if(/$context_header/o) {
204	($fsrc,$fdst,$time) = /$context_extract/o;
205	} else {
206	error "Can't parse context diff header";
207	}
208	} else {
209	error "Unrecognized diff header";
210	}
211	$fsrc=patchfs_canonicalize_path($fsrc);
212	$fdst=patchfs_canonicalize_path($fdst);
213	if(wantarray) {
214	push @outsrc,$fsrc;
215	push @outdst,$fdst;
216	}
217	($f,$prefix)=diff_filename($fsrc,$fdst);
218	$filefound=($fsrc eq $file \|\| $fdst eq $file);
219
220	$f="$f.diff";
221	$filetoprint=1;
222	$fsize=length;
223	print $fh $_ if($rmmod != $filefound);
224
225	$state=1;
226	} elsif($state == 1) { # expecting diff hunk headers, end of file or comments
227	if($unified) {
228	my ($a,$b,$c,$d);
229	($a,$b,$h_r1,$c,$d,$h_r2)=/$unified_hunk/o;
230	if(!defined($a) \|\| !defined($c)) {
231	# hunk header does not come, a comment inside
232	# or maybe a new file, state 0 will decide
233	$skipread=1;
234	$state=0;
235	next;
236	}
237	$fsize+=length;
238	print $fh $_ if($rmmod != $filefound);
239	$h_r1=1 if(!defined($b));
240	$h_r2=1 if(!defined($d));
241	$h_add=$h_del=$h_ctx=0;
242	$state=2;
243	} elsif($context) {
244	if(!/$context_contents/o) {
245	$skipread=1;
246	$state=0;
247	next;
248	}
249	print $fh $_ if($rmmod != $filefound);
250	$fsize+=length;
251	}
252	} elsif($state == 2) { # expecting hunk contents
253	if($h_del + $h_ctx == $h_r1 && $h_add + $h_ctx == $h_r2) {
254	# hooray, end of hunk
255	# we optimistically ended with a hunk before but
256	# the line has been read already
257	$skipread=1;
258	$state=1;
259	next;
260	}
261	print $fh $_ if($rmmod != $filefound);
262	$fsize+=length;
263	my ($first)= /^(.)/;
264	if(ord($first) == ord('+')) { $h_add++; }
265	elsif(ord($first) == ord('-')) { $h_del++; }
266	elsif(ord($first) == ord(' ')) { $h_ctx++; }
267	elsif(ord($first) == ord('\\')) { 0; }
268	elsif(ord($first) == ord('@')) { error "Malformed hunk, header came too early"; }
269	else { error "Unrecognized character in hunk"; }
270	}
271	}
272	if($file eq '' && $filetoprint) {
273	printf $fh "-rw-r--r-- 1 %s %s %d %s %s%s\n", $uid, $gid, $fsize, datetime($time), $prefix, $f;
274	}
275
276	close($fh) if($file ne '');
277	return \(@outsrc, @outdst) if wantarray;
278	}
279
280	# list files affected by patch
281	sub list($) {
282	parse($_[0], *STDOUT, '', 0);
283	close(I);
284	}
285
286	# extract diff from patch
287	# IN: diff file to find
288	# IN: output file name
289	sub copyout($$) {
290	my ($file,$out)=@_;
291
292	$file=~s/^(PATCH-(CREATE\|REMOVE)\/)?(.*)\.diff$/$3/;
293	$file = patchfs_canonicalize_path ($file);
294
295	open(FH, ">$out") or error("Cannot open output file");
296	parse('', *FH, $file, 0);
297	}
298
299	# remove diff(s) from patch
300	# IN: archive
301	# IN: file to delete
302	sub rm($$) {
303	my $archive=shift;
304	my ($tmp,$tmpname)=tempfile();
305
306	@_=map {scalar(s/^(PATCH-(CREATE\|REMOVE)\/)?(.*)\.diff$/$3/,$_)} @_;
307
308	# just the first file for now
309	parse($archive, $tmp, $_[0], 1);
310	close I;
311
312	# replace archive
313	system("cat \Q$tmpname\E \| " . myout($archive,0))==0
314	or error "Can't write to archive";
315	system("rm -f -- \Q$tmpname\E");
316	}
317
318	# append diff to archive
319	# IN: diff archive name
320	# IN: newly created file name in archive
321	# IN: the real source file
322	sub copyin($$$) {
323	# TODO: seems to be tricky. what to do?
324	# copyin of file which is already there may:
325	# * delete the original and copy only the new
326	# * just append the new hunks to the same file
327	# problems: may not be a valid diff, unmerged hunks
328	# * try to merge the two together
329	# ... but we do not want write patchutils again, right?
330	error "Copying files into diff not supported";
331	return;
332
333	my ($archive,$name,$src)=@_;
334
335	# in case we are appending another diff, we have
336	# to delete/merge all the files
337	open(DEVNULL, ">/dev/null");
338	open I, myin($src).'\|';
339	my ($srclist,$dstlist)=parse($archive, *DEVNULL, '', 0);
340	close(I);
341	close(DEVNULL);
342	foreach(@$srclist) {
343	print("SRC: del $_\n");
344	}
345	foreach(@$dstlist) {
346	print("DST: del $_\n");
347	}
348	return;
349
350	# remove overwritten file
351	open I, myin($archive).'\|';
352	rm ($archive, $name);
353	close I;
354
355	my $cmd1=myin("$src.diff");
356	my $cmd2=myout($archive,1);
357	system("$cmd1 \| $cmd2")==0
358	or error "Can't write to archive";
359	}
360
361
362	if ($ARGV[0] eq 'list') {
363	open I, myin($ARGV[1]).'\|';
364	list ($ARGV[1]);
365	exit 0;
366	} elsif ($ARGV[0] eq 'copyout') {
367	open I, myin($ARGV[1])."\|";
368	copyout ($ARGV[2], $ARGV[3]);
369	exit 0;
370	} elsif ($ARGV[0] eq 'rm') {
371	open I, myin($ARGV[1])."\|";
372	rm ($ARGV[1], $ARGV[2]);
373	exit 0;
374	} elsif ($ARGV[0] eq 'rmdir') {
375	exit 0;
376	} elsif ($ARGV[0] eq 'mkdir') {
377	exit 0;
378	} elsif ($ARGV[0] eq 'copyin') {
379	copyin ($ARGV[1], $ARGV[2], $ARGV[3]);
380	exit 0;
381	}
382	exit 1;

Download in other formats:

Original Format