Discussion:
[PATCH 1/1] add pathchk
(too old to reply)
Ilya Kuzmich
2017-05-26 16:21:28 UTC
Permalink
Raw Message
Signed-off-by: Ilya Kuzmich <***@gmail.com>
---
tests/pathchk.test | 85 ++++++++++++++++++++++++++++++++++++++++++++++
toys/posix/pathchk.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 180 insertions(+)
create mode 100644 tests/pathchk.test
create mode 100644 toys/posix/pathchk.c

diff --git a/tests/pathchk.test b/tests/pathchk.test
new file mode 100644
index 0000000..4632399
--- /dev/null
+++ b/tests/pathchk.test
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+[ -f testing.sh ] && . testing.sh
+
+#testing "name" "command" "result" "infile" "stdin"
+
+testing "zero arguments" "pathchk 2>/dev/null || echo ok" "ok\n" "" ""
+
+mkdir dir_a-rx
+chmod a-rx dir_a-rx
+testing "dir_a-rx/a" "pathchk dir_a-rx/a 2>/dev/null || echo ok" "ok\n" "" ""
+rmdir dir_a-rx
+
+touch file
+testing "file/a" "pathchk file/a 2>/dev/null || echo ok" "ok\n" "" ""
+rm file
+
+mkcomponent() {
+ head -c "$1" /dev/zero | tr '\0' 'a'
+}
+
+mkpath() {
+ to_fill=$1
+ component_len=$2
+
+ while [ "$to_fill" -gt 0 ]; do
+ if [ $component_len -ge $to_fill ]; then
+ component_len=$((to_fill))
+ fi
+ mkcomponent $component_len
+ to_fill=$((to_fill-component_len))
+ if [ "$to_fill" -gt 0 ]; then
+ printf "/"
+ to_fill=$((to_fill-1))
+ fi
+ done
+}
+
+NAME_MAX="$(getconf NAME_MAX .)"
+testing "NAME_MAX" "pathchk '$(mkcomponent $NAME_MAX )' \
+ && echo ok" "ok\n" "" ""
+testing "NAME_MAX+1" "pathchk '$(mkcomponent $((NAME_MAX+1)) )' \
+ 2>/dev/null || echo ok" "ok\n" "" ""
+testing "NAME_MAX/1" "pathchk '$(mkcomponent $NAME_MAX)/a' \
+ 2>/dev/null && echo ok" "ok\n" "" ""
+testing "NAME_MAX+1/1" "pathchk '$(mkcomponent $((NAME_MAX+1)))/a' \
+ 2>/dev/null || echo ok" "ok\n" "" ""
+
+PATH_MAX="$(getconf PATH_MAX .)"
+testing "PATH_MAX-1" "pathchk '$(mkpath $((PATH_MAX-1)) $NAME_MAX)' \
+ && echo ok" "ok\n" "" ""
+testing "PATH_MAX" "pathchk '$(mkpath $PATH_MAX $NAME_MAX)' \
+ 2>/dev/null || echo ok" "ok\n" "" ""
+
+POSIX_NAME_MAX="14"
+testing "-p _POSIX_NAME_MAX" "pathchk -p \
+ '$(mkcomponent $POSIX_NAME_MAX)' && echo ok" "ok\n" "" ""
+testing "-p _POSIX_NAME_MAX+1" "pathchk -p\
+ '$(mkcomponent $((POSIX_NAME_MAX+1)))' 2>/dev/null || echo ok" \
+ "ok\n" "" ""
+testing "-p _POSIX_NAME_MAX/1" "pathchk -p \
+ '$(mkcomponent $POSIX_NAME_MAX)/a' && echo ok" "ok\n" "" ""
+testing "-p _POSIX_NAME_MAX+1/1" "pathchk -p\
+ '$(mkcomponent $((POSIX_NAME_MAX+1)))/1' 2>/dev/null || echo ok" \
+ "ok\n" "" ""
+
+POSIX_PATH_MAX="256"
+testing "-p _POSIX_PATH_MAX-1" "pathchk -p \
+ '$(mkpath $((POSIX_PATH_MAX-1)) $POSIX_NAME_MAX)' && echo ok" \
+ "ok\n" "" ""
+testing "-p _POSIX_PATH_MAX" "pathchk -p \
+ '$(mkpath $POSIX_PATH_MAX $POSIX_NAME_MAX)' 2>/dev/null || \
+ echo ok" "ok\n" "" ""
+
+# portable filename character set
+testing "-p a=b" "pathchk -p a=b 2>/dev/null || echo ok" "ok\n" "" ""
+testing "-p 'a$'" "pathchk -p 'a$' 2>/dev/null || echo ok" "ok\n" "" ""
+testing "-p '+a'" "pathchk -p '+a' 2>/dev/null || echo ok" "ok\n" "" ""
+
+# empty path and leading '-'
+testing "-P ''" "pathchk -P '' 2>/dev/null || echo ok" "ok\n" "" ""
+testing "-P a/-b" "pathchk -P 'a/-b' 2>/dev/null || echo ok" "ok\n" "" ""
+
+# one error message per path
+testing "-P '' '-'" "pathchk -P '' '-' 2>&1 | wc -l" "2\n" "" ""
diff --git a/toys/posix/pathchk.c b/toys/posix/pathchk.c
new file mode 100644
index 0000000..25d76a7
--- /dev/null
+++ b/toys/posix/pathchk.c
@@ -0,0 +1,95 @@
+/* pathchk.c - check pathnames
+ *
+ * Copyright 2017 Ilya Kuzmich <***@gmail.com>
+ *
+ * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pathchk.html
+
+USE_PATHCHK(NEWTOY(pathchk, "<1pP", TOYFLAG_USR|TOYFLAG_BIN))
+
+config PATHCHK
+ bool "pathchk"
+ default y
+ help
+ usage: pathchk [-p] [-P] [NAME...]
+
+ Check that pathnames are portable and valid on the underlying file system
+
+ -p check instead for POSIX portability
+ -P check for empty names and components with leading "-"
+*/
+
+#define FOR_pathchk
+#include "toys.h"
+
+static long get_pathconf(char *path, int name, long fallback)
+{
+ long ret;
+
+ errno = 0;
+ ret = pathconf(path, name);
+ if (ret == -1 && errno)
+ ret = fallback;
+
+ return ret;
+}
+
+static void do_check_path(char *path)
+{
+ long pathmax, namemax;
+ size_t namelen;
+ char *p = path;
+
+ if (toys.optflags & FLAG_p) {
+ namemax = _POSIX_NAME_MAX;
+ pathmax = _POSIX_PATH_MAX;
+
+ p += strspn(p, "/"
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "0123456789._-");
+
+ if (*p) {
+ error_msg("%s: nonportable character %c in path", path, *p);
+ return;
+ }
+ } else {
+ struct stat st;
+
+ namemax = get_pathconf(*path == '/' ? "/" : ".", _PC_NAME_MAX, NAME_MAX);
+ pathmax = get_pathconf(path, _PC_PATH_MAX, PATH_MAX);
+
+ errno = 0;
+ if (stat(path, &st) && errno != ENOENT)
+ perror_msg("%s", path);
+ }
+
+ for (namelen = 0, p = path; *p; p += namelen) {
+ p += strspn(p, "/");
+ namelen = strcspn(p, "/");
+
+ if (namemax != -1 && namelen > namemax) {
+ error_msg("%s: component too long (limit %ld)", path, namemax);
+ return;
+ }
+
+ if (toys.optflags & FLAG_P && *p == '-') {
+ error_msg("%s: leading '-' in a path component", path);
+ return;
+ }
+ }
+
+ if (pathmax != -1 && strlen(path) >= (size_t)pathmax) {
+ error_msg("%s: path too long (limit %ld)", path, pathmax);
+ return;
+ }
+
+ if (toys.optflags & FLAG_P && !*path) {
+ error_msg("path is empty");
+ return;
+ }
+}
+
+void pathchk_main(void)
+{
+ while (*toys.optargs) do_check_path(*toys.optargs++);
+}
--
2.7.4
Rob Landley
2017-05-27 19:24:19 UTC
Permalink
Raw Message
Post by Ilya Kuzmich
---
tests/pathchk.test | 85 ++++++++++++++++++++++++++++++++++++++++++++++
toys/posix/pathchk.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 180 insertions(+)
create mode 100644 tests/pathchk.test
create mode 100644 toys/posix/pathchk.c
Do you actually have a use case for this, or are you implementing it
because nobody has yet and it's in posix?

This seems like a competent implementation of this command, and I see it
is in the roadmap, but I'd like to know _why_ you want it. I've mentally
been lumping it in with the posix "sum" and "compress" commands as a
thing that's still in posix yet clearly obsolete, which there _might_
still be users for, but I'd wait for one to show up before implementing
it. It's not the outright veto things like sccs and qselect get for
being zombies from the 1970's, but I've run an awful lot of scripts and
package builds that never used this command.

Busybox never implemented this command. (Yes ubuntu installs it, but
they install "sum" too. "compress" and "pax" aren't there for historical
reasons: patents on the first, and a Linux vs Solaris identity thing for
the second.)

UTF8 is a big deal these days. Filenames starting with - are why all
toybox commands support -- and aren't really stranger than filenames
with spaces in them. The value of POSIX_PATH_MAX is 256, a hilariously
low value left over from the PDP-11. (The PATH_MAX in linux/limits.h is
4096 but that's legacy value, glibc doesn't define one at all anymore.
You can always create a deeper path with "cd longpath; mv ~/dirname ."
behind the OS's back (unless the OS wants to traverse all of ~/dirname's
children every time it does a mv) and even posix says rm -rf has to just
cope with it.)

On Linux, the only invalid characters in the Linux VHS are NULL and
forward slash; anything else is explicitly allowed, and yes that
includes invalid utf8 sequences:

http://yarchive.net/comp/linux/utf8.html

If you're trying to figure out what a specific filesystem supports in
directory du jour, some of them are case sensitive so you get magic
aliasing and what's valid depends on what ELSE is in the directory. (You
can't mkdir "blah" if "Blah" exists and belongs to another user as chmod
700). Some of the older filesystems have mount-time selectable locale
encodings they translate behind your back.

I terms of having the OS do this work for you, "readlink -m" and
"readlink -f" exist, both of which have -q and the ability to return
error without writing anything to the filesystem. That won't catch the
last path component being longer than 255 chars (the Linux VFS limit on
path components) but I could add a check for it there.

I moved hostid to examples because it's _not_ in posix, but this one is
so _if_ it goes in that's the place for it. But if I do merge it I'm
strongly tempted to make it "default n"...

Anyway, what's your motivation for submitting this command?

Rob
Ilya Kuzmich
2017-05-27 19:39:26 UTC
Permalink
Raw Message
No, i do not have use case for this.

I've implemented pathchk because it was on roadmap, had a decent spec
and was small enough to do in a couple of evenings.

I'm happy to contribute, but most of the stuff on the roadmap is either
huge or in pending.

I'm not sure i have enough time to do something big, and contributing to
pending is tricky: you have your reasons to keep stuff in pending but
they are not oblivious to me.

Because of that i choose to do pathck.
Post by Rob Landley
Post by Ilya Kuzmich
---
tests/pathchk.test | 85 ++++++++++++++++++++++++++++++++++++++++++++++
toys/posix/pathchk.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 180 insertions(+)
create mode 100644 tests/pathchk.test
create mode 100644 toys/posix/pathchk.c
Do you actually have a use case for this, or are you implementing it
because nobody has yet and it's in posix?
This seems like a competent implementation of this command, and I see it
is in the roadmap, but I'd like to know _why_ you want it. I've mentally
been lumping it in with the posix "sum" and "compress" commands as a
thing that's still in posix yet clearly obsolete, which there _might_
still be users for, but I'd wait for one to show up before implementing
it. It's not the outright veto things like sccs and qselect get for
being zombies from the 1970's, but I've run an awful lot of scripts and
package builds that never used this command.
Busybox never implemented this command. (Yes ubuntu installs it, but
they install "sum" too. "compress" and "pax" aren't there for historical
reasons: patents on the first, and a Linux vs Solaris identity thing for
the second.)
UTF8 is a big deal these days. Filenames starting with - are why all
toybox commands support -- and aren't really stranger than filenames
with spaces in them. The value of POSIX_PATH_MAX is 256, a hilariously
low value left over from the PDP-11. (The PATH_MAX in linux/limits.h is
4096 but that's legacy value, glibc doesn't define one at all anymore.
You can always create a deeper path with "cd longpath; mv ~/dirname ."
behind the OS's back (unless the OS wants to traverse all of ~/dirname's
children every time it does a mv) and even posix says rm -rf has to just
cope with it.)
On Linux, the only invalid characters in the Linux VHS are NULL and
forward slash; anything else is explicitly allowed, and yes that
http://yarchive.net/comp/linux/utf8.html
If you're trying to figure out what a specific filesystem supports in
directory du jour, some of them are case sensitive so you get magic
aliasing and what's valid depends on what ELSE is in the directory. (You
can't mkdir "blah" if "Blah" exists and belongs to another user as chmod
700). Some of the older filesystems have mount-time selectable locale
encodings they translate behind your back.
I terms of having the OS do this work for you, "readlink -m" and
"readlink -f" exist, both of which have -q and the ability to return
error without writing anything to the filesystem. That won't catch the
last path component being longer than 255 chars (the Linux VFS limit on
path components) but I could add a check for it there.
I moved hostid to examples because it's _not_ in posix, but this one is
so _if_ it goes in that's the place for it. But if I do merge it I'm
strongly tempted to make it "default n"...
Anyway, what's your motivation for submitting this command?
Rob
Rob Landley
2017-05-28 22:24:17 UTC
Permalink
Raw Message
Post by Ilya Kuzmich
No, i do not have use case for this.
I've implemented pathchk because it was on roadmap, had a decent spec
and was small enough to do in a couple of evenings.
Yeah, that's my fault. I should update the roadmap.
Post by Ilya Kuzmich
I'm happy to contribute, but most of the stuff on the roadmap is either
huge or in pending.
I'm not sure i have enough time to do something big, and contributing to
pending is tricky: you have your reasons to keep stuff in pending but
they are not oblivious to me.
Because of that i choose to do pathck.
Unfortunately people asking how they can help are usually asking "is
there something easy to do that you haven't already done". Hard question
for me to answer.

I have eight gazillion todo items, such as "figure out why top eats so
much cpu" and "now that zcat's in I should implement zgrep" with a note
asking if I should just make the normal grep autodetect gzipped input
from the header signature, or add a -z flag to it", and "should I add
mtd-utils or ubi-utils stuff", "add df -i"... those seem the easiest for
somebody else to tackle? (Although the zgrep one has a design question
in it.)

Rob
Ilya Kuzmich
2017-05-28 23:15:11 UTC
Permalink
Raw Message
Could you publish this todo somehow?
Would be glad to help.
Post by Rob Landley
Post by Ilya Kuzmich
No, i do not have use case for this.
I've implemented pathchk because it was on roadmap, had a decent spec
and was small enough to do in a couple of evenings.
Yeah, that's my fault. I should update the roadmap.
Post by Ilya Kuzmich
I'm happy to contribute, but most of the stuff on the roadmap is either
huge or in pending.
I'm not sure i have enough time to do something big, and contributing to
pending is tricky: you have your reasons to keep stuff in pending but
they are not oblivious to me.
Because of that i choose to do pathck.
Unfortunately people asking how they can help are usually asking "is
there something easy to do that you haven't already done". Hard question
for me to answer.
I have eight gazillion todo items, such as "figure out why top eats so
much cpu" and "now that zcat's in I should implement zgrep" with a note
asking if I should just make the normal grep autodetect gzipped input
from the header signature, or add a -z flag to it", and "should I add
mtd-utils or ubi-utils stuff", "add df -i"... those seem the easiest for
somebody else to tackle? (Although the zgrep one has a design question
in it.)
Rob
Loading...