Ticket #90: patchfs.in

File patchfs.in, 9.8 KB (added by kdave, 15 years ago)

rewritten parser of unified diff files, copyin is disabled

Line 
1#! @PERL@ -w
2#
3# Written by Adam Byrtek <alpha@debian.org>, 2002
4# Rewritten by David Sterba <dave@jikos.cz>, 2009
5#
6# Extfs to handle patches in context and unified diff format.
7# Known issues: When name of file to patch is modified during editing,
8# hunk is duplicated on copyin. It is unavoidable.
9
10use bytes;
11use strict;
12use POSIX;
13use File::Temp 'tempfile';
14
15# standard binaries
16my $lzma = 'lzma';
17my $bzip = 'bzip2';
18my $gzip = 'gzip';
19my $fileutil = 'file';
20
21# date parsing requires Date::Parse from TimeDate module
22my $parsedates = eval 'require Date::Parse';
23
24# regular expressions
25my $unified_header=qr/^--- .*\n\+\+\+ .*\n$/;
26my $unified_extract=qr/^--- ([^\s]+).*\n\+\+\+ ([^\s]+)\s*([^\t\n]*)/;
27my $unified_contents=qr/^([+\-\\ \n]|@@ .* @@)/;
28my $unified_hunk=qr/@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+)) @@.*\n/;
29
30my $context_header=qr/^\*\*\* .*\n--- .*\n$/;
31my $context_extract=qr/^\*\*\* ([^\s]+).*\n--- ([^\s]+)\s*([^\t\n]*)/;
32my $context_contents=qr/^([!+\-\\ \n]|-{3} .* -{4}|\*{3} .* \*{4}|\*{15})/;
33
34my $ls_extract_id=qr/^[^\s]+\s+[^\s]+\s+([^\s]+)\s+([^\s]+)/;
35my $basename=qr|^(.*/)*([^/]+)$|;
36
37sub patchfs_canonicalize_path ($) {
38  my ($fname) = @_;
39  $fname =~ s,/+,/,g;
40  $fname =~ s,(^|/)(?:\.?\./)+,$1,;
41  return $fname;
42}
43
44# output unix date in a mc-readable format
45sub timef
46{
47    my @time=localtime($_[0]);
48    return sprintf '%02d-%02d-%02d %02d:%02d', $time[4]+1, $time[3],
49                   $time[5]+1900, $time[2], $time[1];
50}
51
52# parse given string as a date and return unix time
53sub datetime
54{
55    # in case of problems fall back to 0 in unix time
56    # note: str2time interprets some wrong values (eg. " ") as 'today'
57    if ($parsedates && defined (my $t=str2time($_[0]))) {
58        return timef($t);
59    }
60    return timef(time);
61}
62
63# print message on stderr and exit
64sub error
65{
66    print STDERR $_[0], "\n";
67    exit 1;
68}
69
70# (compressed) input
71sub myin
72{
73    my ($qfname)=(quotemeta $_[0]);
74
75    $_=`$fileutil $qfname`;
76    if (/lzma/) {
77        return "$lzma -dc $qfname";
78    } elsif (/bzip/) {
79        return "$bzip -dc $qfname";
80    } elsif (/gzip/) {
81        return "$gzip -dc $qfname";
82    } else {
83        return "cat $qfname";
84    }
85}
86
87# (compressed) output
88sub myout
89{
90    my ($qfname,$append)=(quotemeta $_[0],$_[1]);
91    my ($sep) = $append ? '>>' : '>';
92
93    $_=`$fileutil $qfname`;
94    if (/lzma/) {
95        return "$lzma -c $sep $qfname";
96    } elsif (/bzip/) {
97        return "$bzip -c $sep $qfname";
98    } elsif (/gzip/) {
99        return "$gzip -c $sep $qfname";
100    } else {
101        return "cat $sep $qfname";
102    }
103}
104
105# select diff filename conforming with rules found in diff.info
106sub diff_filename
107{
108    my ($fsrc,$fdst)= @_;
109    # TODO: can remove these two calls later
110    $fsrc = patchfs_canonicalize_path ($fsrc);
111    $fdst = patchfs_canonicalize_path ($fdst);
112    if (!$fdst && !$fsrc) {
113        error 'Index: not yet implemented';
114    } elsif (!$fsrc || $fsrc eq '/dev/null') {
115        return ($fdst,'PATCH-CREATE/');
116    } elsif (!$fdst || $fdst eq '/dev/null') {
117        return ($fsrc,'PATCH-REMOVE/');
118    } elsif (($fdst eq '/dev/null') && ($fsrc eq '/dev/null')) {
119        error 'Malformed diff';
120    } else {
121        # fewest path name components
122        if ($fdst=~s|/|/|g < $fsrc=~s|/|/|g) {
123            return ($fdst,'');
124        } elsif ($fdst=~s|/|/|g > $fsrc=~s|/|/|g) {
125            return ($fsrc,'');
126        } else {
127            # shorter base name
128            if (($fdst=~/$basename/o,length $2) < ($fsrc=~/$basename/o,length $2)) {
129                return ($fdst,'');
130            } elsif (($fdst=~/$basename/o,length $2) > ($fsrc=~/$basename/o,length $2)) {
131                return ($fsrc,'');
132            } else {
133                # shortest names
134                if (length $fdst < length $fsrc) {
135                    return ($fdst,'');
136                } else {
137                    return ($fsrc,'');
138                }
139            }
140        }
141    }
142}
143
144# IN: diff "archive" name
145# IN: file handle for output; STDIN for list, tempfile else
146# IN: filename to watch (for: copyout, rm), '' for: list
147# IN: remove the file?
148#     true  - ... and print out the rest
149#     false - ie. copyout mode, print just the file
150sub parse($$$$)
151{
152    my $archive=quotemeta shift;
153    my $fh=shift;
154    my $file=shift;
155    my $rmmod=shift;
156    my ($state,$fsize,$time);
157    my ($f,$fsrc,$fdst,$prefix);
158    my ($unified,$context);
159    my ($skipread, $filetoprint, $filefound);
160    my ($h_add,$h_del,$h_ctx);  # hunk line counts
161    my ($h_r1,$h_r2);           # hunk ranges
162    my @outsrc;         # if desired ...
163    my @outdst;
164    my $line;
165
166    # use uid and gid from file
167    my ($uid,$gid)=(`ls -l $archive`=~/$ls_extract_id/o);
168
169    import Date::Parse if ($parsedates && $file eq '');
170
171    $line=1;
172    $state=0; $fsize=0; $f='';
173    $filefound=0;
174    while ($skipread || ($line++,$_=<I>)) {
175        $skipread=0;
176        if($state == 0) {       # expecting comments
177            $unified=$context=0;
178            $unified=1 if (/^--- /);
179            $context=1 if (/^\*\*\* /);
180            if (!$unified && !$context) {
181                $filefound=0 if($file ne '' && $filetoprint);
182                # shortcut for rmmod xor filefound
183                # - in rmmod we print if not found
184                # - in copyout (!rmmod) we print if found
185                print $fh $_ if($rmmod != $filefound);
186                next;
187            }
188
189            if($file eq '' && $filetoprint) {
190                printf $fh "-rw-r--r-- 1 %s %s %d %s %s%s\n", $uid, $gid, $fsize, datetime($time), $prefix, $f;
191            }
192
193            # start of new file
194            $_ .=<I>;   # steel next line, both formats
195            $line++;
196            if($unified) {
197                if(/$unified_header/o) {
198                    ($fsrc,$fdst,$time) = /$unified_extract/o;
199                } else {
200                    error "Can't parse unified diff header";
201                }
202            } elsif($context) {
203                if(/$context_header/o) {
204                    ($fsrc,$fdst,$time) = /$context_extract/o;
205                } else {
206                    error "Can't parse context diff header";
207                }
208            } else {
209                error "Unrecognized diff header";
210            }
211            $fsrc=patchfs_canonicalize_path($fsrc);
212            $fdst=patchfs_canonicalize_path($fdst);
213            if(wantarray) {
214                push @outsrc,$fsrc;
215                push @outdst,$fdst;
216            }
217            ($f,$prefix)=diff_filename($fsrc,$fdst);
218            $filefound=($fsrc eq $file || $fdst eq $file);
219
220            $f="$f.diff";
221            $filetoprint=1;
222            $fsize=length;
223            print $fh $_ if($rmmod != $filefound);
224
225            $state=1;
226        } elsif($state == 1) { # expecting diff hunk headers, end of file or comments
227            if($unified) {
228                my ($a,$b,$c,$d);
229                ($a,$b,$h_r1,$c,$d,$h_r2)=/$unified_hunk/o;
230                if(!defined($a) || !defined($c)) {
231                    # hunk header does not come, a comment inside
232                    # or maybe a new file, state 0 will decide
233                    $skipread=1;
234                    $state=0;
235                    next;
236                }
237                $fsize+=length;
238                print $fh $_ if($rmmod != $filefound);
239                $h_r1=1 if(!defined($b));
240                $h_r2=1 if(!defined($d));
241                $h_add=$h_del=$h_ctx=0;
242                $state=2;
243            } elsif($context) {
244                if(!/$context_contents/o) {
245                    $skipread=1;
246                    $state=0;
247                    next;
248                }
249                print $fh $_ if($rmmod != $filefound);
250                $fsize+=length;
251            }
252        } elsif($state == 2) { # expecting hunk contents
253            if($h_del + $h_ctx == $h_r1 && $h_add + $h_ctx == $h_r2) {
254                # hooray, end of hunk
255                # we optimistically ended with a hunk before but
256                # the line has been read already
257                $skipread=1;
258                $state=1;
259                next;
260            }
261            print $fh $_ if($rmmod != $filefound);
262            $fsize+=length;
263            my ($first)= /^(.)/;
264            if(ord($first) == ord('+')) { $h_add++; }
265            elsif(ord($first) == ord('-')) { $h_del++; }
266            elsif(ord($first) == ord(' ')) { $h_ctx++; }
267            elsif(ord($first) == ord('\\')) { 0; }
268            elsif(ord($first) == ord('@')) { error "Malformed hunk, header came too early"; }
269            else { error "Unrecognized character in hunk"; }
270        }
271    }
272    if($file eq '' && $filetoprint) {
273        printf $fh "-rw-r--r-- 1 %s %s %d %s %s%s\n", $uid, $gid, $fsize, datetime($time), $prefix, $f;
274    }
275
276    close($fh) if($file ne '');
277    return \(@outsrc, @outdst) if wantarray;
278}
279
280# list files affected by patch
281sub list($) {
282        parse($_[0], *STDOUT, '', 0);
283        close(I);
284}
285
286# extract diff from patch
287# IN: diff file to find
288# IN: output file name
289sub copyout($$) {
290    my ($file,$out)=@_;
291
292    $file=~s/^(PATCH-(CREATE|REMOVE)\/)?(.*)\.diff$/$3/;
293    $file = patchfs_canonicalize_path ($file);
294
295    open(FH, ">$out") or error("Cannot open output file");
296    parse('', *FH, $file, 0);
297}
298
299# remove diff(s) from patch
300# IN: archive
301# IN: file to delete
302sub rm($$) {
303    my $archive=shift;
304    my ($tmp,$tmpname)=tempfile();
305
306    @_=map {scalar(s/^(PATCH-(CREATE|REMOVE)\/)?(.*)\.diff$/$3/,$_)} @_;
307
308    # just the first file for now
309    parse($archive, $tmp, $_[0], 1);
310    close I;
311
312    # replace archive
313    system("cat \Q$tmpname\E | " . myout($archive,0))==0
314      or error "Can't write to archive";
315    system("rm -f -- \Q$tmpname\E");
316}
317
318# append diff to archive
319# IN: diff archive name
320# IN: newly created file name in archive
321# IN: the real source file
322sub copyin($$$) {
323    # TODO: seems to be tricky. what to do?
324    # copyin of file which is already there may:
325    #  * delete the original and copy only the new
326    #  * just append the new hunks to the same file
327    #    problems: may not be a valid diff, unmerged hunks
328    #  * try to merge the two together
329    #    ... but we do not want write patchutils again, right?
330    error "Copying files into diff not supported";
331    return;
332
333    my ($archive,$name,$src)=@_;
334
335    # in case we are appending another diff, we have
336    # to delete/merge all the files
337    open(DEVNULL, ">/dev/null");
338    open I, myin($src).'|';
339    my ($srclist,$dstlist)=parse($archive, *DEVNULL, '', 0);
340    close(I);
341    close(DEVNULL);
342    foreach(@$srclist) {
343        print("SRC: del $_\n");
344    }
345    foreach(@$dstlist) {
346        print("DST: del $_\n");
347    }
348    return;
349
350    # remove overwritten file
351    open I, myin($archive).'|';
352    rm ($archive, $name);
353    close I;
354
355    my $cmd1=myin("$src.diff");
356    my $cmd2=myout($archive,1);
357    system("$cmd1 | $cmd2")==0
358      or error "Can't write to archive";
359}
360
361
362if ($ARGV[0] eq 'list') {
363    open I, myin($ARGV[1]).'|';
364    list ($ARGV[1]);
365    exit 0;
366} elsif ($ARGV[0] eq 'copyout') {
367    open I, myin($ARGV[1])."|";
368    copyout ($ARGV[2], $ARGV[3]);
369    exit 0;
370} elsif ($ARGV[0] eq 'rm') {
371    open I, myin($ARGV[1])."|";
372    rm ($ARGV[1], $ARGV[2]);
373    exit 0;
374} elsif ($ARGV[0] eq 'rmdir') {
375    exit 0;
376} elsif ($ARGV[0] eq 'mkdir') {
377    exit 0;
378} elsif ($ARGV[0] eq 'copyin') {
379    copyin ($ARGV[1], $ARGV[2], $ARGV[3]);
380    exit 0;
381}
382exit 1;