Ticket #1952: untar.txt

File untar.txt, 5.7 KB (added by zaytsev, 4 years ago)
Line 
1#! /bin/sh
2#
3# by dw 2013-11-12
4#
5# relies on tar to autodetect the used compression method (gz, bz2, xz, ...)
6# supported formats (posix, ...) depend on the found version of tar
7# due to proprietory options works only with GNU tar (1.26-1.29)
8# list and copyout working
9# rm, mkdir, rmdir partly working (only uncompressed tar archives)
10# copyin works partly (only uncompressed tar archives, archive rootdir)
11# besides modifying the tar file can cause problems (duplicate files, ...)
12# and should be avoided
13#
14# by dw 2017-04-09
15# experimental support for bsdtar added, problems
16#         - timestamps require LANG=C for english month names
17#         - with LANG=C can't handle filenames containing utf-8
18# experimental support for star added
19#
20# by dw 2018-06-03
21# star 1.5.3 from openSUSE Leap 15.0 supports auto detection of xz
22# date format conversion added, list and copyout working with star
23# error messages with GNU tar created archives containing
24# special character filenames
25# => GNU tar is the default, star is first fallback
26
27TAR=$( which tar 2>/dev/null ) #  || exit 1
28if [ -z "$TAR" ]; then
29  TAR=$( which star 2>/dev/null ) # || exit 1
30fi
31if [ -z "$TAR" ]; then
32  TAR=$( which bsdtar 2>/dev/null ) || exit 1
33fi
34
35mctarfs_list ()
36{
37  # link count is hardcoded to 1
38  # can't use $6 for the filename in case of blanks or other separators
39  # mc expects: AAAAAAA NNN OOOOOOOO GGGGGGGG SSSSSSSS DATETIME [PATH/]FILENAME [-> [PATH/]FILENAME[/]]]
40  # gawk strips blanks => trailing blanks of filenames are lost
41  # filenames may contain blanks - field splitting does not work for them
42
43  case "$TAR" in
44    */tar)
45      # GNU tar
46      # output of GNU tar 1.26 for tar tvf:
47      # drwxr-xr-x user/group      0 2013-08-22 20:27 directory/
48      # GNU tar timestamps always contain ":" and can be used as anchor
49
50      $TAR tvf "$1" | gawk '
51      BEGIN { FS="" }
52      {
53        split($0,b," ")
54        sub(/\//, " ", b[2])
55        split(b[4], DATE, "-")
56        name_offs = index($0, b[5])+6
57        printf "%s 1 %s %s %s-%s-%s %s ./%s\n", b[1], b[2], b[3], DATE[2], DATE[3], DATE[1], b[5], substr($0, name_offs)
58      }'
59      ;;
60    */bsdtar)
61      # BSD tar
62      # bsdtar uses LANG for the date format
63      # setting LANG to en_US.UTF-8 to get a specific date format breaks handling of non-utf8 archives
64      # output of bsdtar-3.1.2 tar tvf:
65      # drwxr-xr-x  0 user   group       0 Oct  5  2016 directory/
66      # -rw-r--r--  0 user   group   68981 Oct  1  2016 directory/file
67      # drwxr-xr-x  0 user   group       0 Dec 13 20:59 directory/
68      # -rw-r--r--  0 user   group    2013 Sep 28  2013 dateformat.tst
69      # the date/timestamp format is supported by mc BUT do not always contain ":", "YYYY" or "hh:mm"
70      # if b[8] =~ ":" this should always be the first match on the line, use as anchor
71      # if b[8] =~ "YYYY" this could match the filesize => name_offs has to be bigger than parts
72      LANG_BAK="$LANG"
73      export LANG=en_US.UTF-8
74
75      $TAR tvf "$1" | gawk '
76      BEGIN { FS="" }
77      {
78        split($0,b," ")
79        parts = length(b[1] b[2] b[3] b[4] b[5] b[6] b[7]) + 7
80        name_offs = index($0, " "b[8]" ") + 6
81        if (index(b[8], ":") != 0)
82          name_offs += 1
83        else if (name_offs < parts)
84        {
85          new_part = substr($0, parts)
86          name_offs = index(new_part, " "b[8]" ") + 5 + parts
87        }
88        printf "%s 1 %s %s %s %s %s %s ./%s\n", b[1], b[3], b[4], b[5], b[6], b[7], b[8], substr($0, name_offs)
89      }'
90      LANG="$LANG_BAK"
91      ;;
92    */star)
93      # output of star 1.5.3 tar tvf:
94      #      5 -rw-r--r--  user/group Sep 28 13:16 2013 dateformat.tst
95      #      0 drwxr-xr-x  user/group Aug 22 20:36 2013 tartest_dir_level_1/
96      # date conversion to MM-DD-YYYY hh:mm[:ss]
97      # b[1] size
98      # b[2] permissions
99      # b[3] user group
100      # b[4] MMM
101      # b[5] DD
102      # b[6] hh:mm
103      # b[7] YYYY
104
105      $TAR tvf "$1" 2> /dev/null | gawk '
106      BEGIN {
107         FS=""
108         # Copied from uarc/uzoo
109         split("Jan:Feb:Mar:Apr:May:Jun:Jul:Aug:Sep:Oct:Nov:Dec", month_list, ":")
110         for (i=1; i<=12; i++) {
111            month[month_list[i]] = i
112         }
113      }
114      {
115        split($0,b," ")
116        sub(/\//, " ", b[3])
117        split(b[4], DATE, "-")
118        name_offs = index($0, b[6]) + 11
119        printf "%s 1 %s %s %02d-%02d-%04d %s ./%s\n", b[2], b[3], b[1], month[b[4]], b[5], b[7], b[6], substr($0, name_offs)
120      }'
121      ;;
122    *)
123      echo "unsupported tar implementation"
124      exit 1
125    ;;
126  esac
127}
128
129mctarfs_copyin ()
130{
131  $TAR rf "$1" "$2" >/dev/null
132}
133
134mctarfs_copyout ()
135{
136  case "$TAR" in
137    */star)
138      $TAR x -to-stdout -f "$1" "$2" > "$3" 2> /dev/null || $TAR x -to-stdout -f "$1" "./$2" > "$3"
139      ;;
140    *)
141      # GNU tar, bsdtar
142      $TAR xOf "$1" "$2" > "$3" 2> /dev/null || $TAR xOf "$1" "./$2" > "$3"
143      # file in archive begins with ./ , e.g. from find generated file list
144      # $TAR xOf "$1" "./$2" > "$3"
145    ;;
146  esac
147}
148
149mctarfs_mkdir ()
150{
151  pwd="$( pwd )"
152  dir=$( mktemp -d "${MC_TMPDIR:-/tmp}/mctmpdir-utar.XXXXXX" ) || exit 1
153  cd "$dir" || exit 1
154  mkdir -p "$2"
155  $TAR rf "$1" "$2" >/dev/null
156  cd "$pwd" || exit 1
157  rm -rf "$dir"
158}
159
160mctarfs_rm ()
161{
162  $TAR --delete -f "$1" "$2" >/dev/null
163}
164
165# override any locale for dates
166LC_DATE=C
167export LC_DATE
168
169umask 077
170
171if [ -z "$TAR" ]; then
172  echo "Error: could not find tar." >&2
173  exit 1
174fi
175
176cmd="$1"
177shift
178
179case "$cmd" in
180  # Workaround for a bug in mc - directories must precede files to
181  # avoid duplicate entries, so we sort output by "permission string" drwxr-xr-x
182  list)    mctarfs_list    "$@" | sort -k 8 ;;
183  rm)      mctarfs_rm      "$@" ;;
184  rmdir)   mctarfs_rm      "$@" ;;
185  mkdir)   mctarfs_mkdir   "$@" ;;
186  copyin)  mctarfs_copyin  "$@" ;;
187  copyout) mctarfs_copyout "$@" ;;
188  *) exit 1 ;;
189esac
190exit 0