Discussion:
[PATCH 1/1] teach head -c
(too old to reply)
Ilya Kuzmich
2017-05-28 15:29:54 UTC
Permalink
Raw Message
Not POSIX, but implemented in coreutils, busybox and freebsd.

Signed-off-by: Ilya Kuzmich <***@gmail.com>
---
tests/head.test | 4 ++++
toys/posix/head.c | 16 +++++++++++-----
2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/tests/head.test b/tests/head.test
index 6ed027c..4e4c01b 100755
--- a/tests/head.test
+++ b/tests/head.test
@@ -27,3 +27,7 @@ testing "-q, multiple files" "head -q -n 2 input file1" "one\ntwo\nfoo\nbar\n" \
"one\ntwo\nthree\n" ""
rm file1

+testing "-c 3" "head -c 3" "one" "" "one\ntwo"
+testing "-c bigger than input" "head -c 3" "a" "" "a"
+testing "-c 3 -n 1" "head -c 3 -n 1" "one\n" "" "one\ntwo"
+testing "-n 1 -c 3" "head -n 1 -c 3" "one" "" "one\ntwo"
diff --git a/toys/posix/head.c b/toys/posix/head.c
index f5fd281..41b7913 100644
--- a/toys/posix/head.c
+++ b/toys/posix/head.c
@@ -3,8 +3,9 @@
* Copyright 2006 Timothy Elliott <***@holymonkey.com>
*
* See http://opengroup.org/onlinepubs/9699919799/utilities/head.html
+ * See http://man7.org/linux/man-pages/man1/head.1.html

-USE_HEAD(NEWTOY(head, "?n#<0=10qv", TOYFLAG_USR|TOYFLAG_BIN))
+USE_HEAD(NEWTOY(head, "?n#<0=10c#qv[-nc]", TOYFLAG_USR|TOYFLAG_BIN))

config HEAD
bool "head"
@@ -24,13 +25,14 @@ config HEAD
#include "toys.h"

GLOBALS(
+ long bytes;
long lines;
int file_no;
)

static void do_head(int fd, char *name)
{
- int i, len, lines=TT.lines, size=sizeof(toybuf);
+ int i, len, lines=TT.lines, bytes=TT.bytes;

if ((toys.optc > 1 && !(toys.optflags & FLAG_q)) || toys.optflags & FLAG_v) {
// Print an extra newline for all but the first file
@@ -39,12 +41,16 @@ static void do_head(int fd, char *name)
xflush();
}

- while (lines) {
- len = read(fd, toybuf, size);
+ while (toys.optflags & FLAG_c ? bytes : lines) {
+ len = read(fd, toybuf, sizeof(toybuf));
if (len<0) perror_msg_raw(name);
if (len<1) break;

- for(i=0; i<len;) if (toybuf[i++] == '\n' && !--lines) break;
+ if (bytes) {
+ i = bytes >= len ? len : bytes;
+ bytes -= i;
+ } else
+ for(i=0; i<len;) if (toybuf[i++] == '\n' && !--lines) break;

xwrite(1, toybuf, i);
}
--
2.7.4
Ilya Kuzmich
2017-05-28 16:07:22 UTC
Permalink
Raw Message
Not POSIX, but implemented in coreutils, busybox and freebsd.

Signed-off-by: Ilya Kuzmich <***@gmail.com>
---
tests/head.test | 4 ++++
toys/posix/head.c | 17 ++++++++++++-----
2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/tests/head.test b/tests/head.test
index 6ed027c..4e4c01b 100755
--- a/tests/head.test
+++ b/tests/head.test
@@ -27,3 +27,7 @@ testing "-q, multiple files" "head -q -n 2 input file1" "one\ntwo\nfoo\nbar\n" \
"one\ntwo\nthree\n" ""
rm file1

+testing "-c 3" "head -c 3" "one" "" "one\ntwo"
+testing "-c bigger than input" "head -c 3" "a" "" "a"
+testing "-c 3 -n 1" "head -c 3 -n 1" "one\n" "" "one\ntwo"
+testing "-n 1 -c 3" "head -n 1 -c 3" "one" "" "one\ntwo"
diff --git a/toys/posix/head.c b/toys/posix/head.c
index f5fd281..43ac28a 100644
--- a/toys/posix/head.c
+++ b/toys/posix/head.c
@@ -3,8 +3,9 @@
* Copyright 2006 Timothy Elliott <***@holymonkey.com>
*
* See http://opengroup.org/onlinepubs/9699919799/utilities/head.html
+ * See http://man7.org/linux/man-pages/man1/head.1.html

-USE_HEAD(NEWTOY(head, "?n#<0=10qv", TOYFLAG_USR|TOYFLAG_BIN))
+USE_HEAD(NEWTOY(head, "?n#<0=10c#qv[-nc]", TOYFLAG_USR|TOYFLAG_BIN))

config HEAD
bool "head"
@@ -16,6 +17,7 @@ config HEAD
stdin. Filename "-" is a synonym for stdin.

-n Number of lines to copy
+ -c Number of bytes to copy
-q Never print headers
-v Always print headers
*/
@@ -24,13 +26,14 @@ config HEAD
#include "toys.h"

GLOBALS(
+ long bytes;
long lines;
int file_no;
)

static void do_head(int fd, char *name)
{
- int i, len, lines=TT.lines, size=sizeof(toybuf);
+ int i, len, lines=TT.lines, bytes=TT.bytes;

if ((toys.optc > 1 && !(toys.optflags & FLAG_q)) || toys.optflags & FLAG_v) {
// Print an extra newline for all but the first file
@@ -39,12 +42,16 @@ static void do_head(int fd, char *name)
xflush();
}

- while (lines) {
- len = read(fd, toybuf, size);
+ while (toys.optflags & FLAG_c ? bytes : lines) {
+ len = read(fd, toybuf, sizeof(toybuf));
if (len<0) perror_msg_raw(name);
if (len<1) break;

- for(i=0; i<len;) if (toybuf[i++] == '\n' && !--lines) break;
+ if (bytes) {
+ i = bytes >= len ? len : bytes;
+ bytes -= i;
+ } else
+ for(i=0; i<len;) if (toybuf[i++] == '\n' && !--lines) break;

xwrite(1, toybuf, i);
}
--
2.7.4
Rob Landley
2017-05-28 22:26:12 UTC
Permalink
Raw Message
Post by Ilya Kuzmich
Not POSIX, but implemented in coreutils, busybox and freebsd.
Again v1/v2...?

Confused,

Rob
Ilya Kuzmich
2017-05-28 23:08:55 UTC
Permalink
Raw Message
v1 misses help text, v2 does not.
Sorry for the inconvenience.
Post by Rob Landley
Post by Ilya Kuzmich
Not POSIX, but implemented in coreutils, busybox and freebsd.
Again v1/v2...?
Confused,
Rob
_______________________________________________
Toybox mailing list
http://lists.landley.net/listinfo.cgi/toybox-landley.net
Ilya Kuzmich
2017-05-29 02:36:28 UTC
Permalink
Raw Message
Now i forgot c#<0 constraint. Whops.
This is, hopefully, the last version.
Post by Ilya Kuzmich
v1 misses help text, v2 does not.
Sorry for the inconvenience.
Post by Rob Landley
Post by Ilya Kuzmich
Not POSIX, but implemented in coreutils, busybox and freebsd.
Again v1/v2...?
Confused,
Rob
_______________________________________________
Toybox mailing list
http://lists.landley.net/listinfo.cgi/toybox-landley.net
Ilya Kuzmich
2017-06-01 06:36:38 UTC
Permalink
Raw Message
ping?
Post by Ilya Kuzmich
Now i forgot c#<0 constraint. Whops.
This is, hopefully, the last version.
Post by Ilya Kuzmich
v1 misses help text, v2 does not.
Sorry for the inconvenience.
Post by Rob Landley
Post by Ilya Kuzmich
Not POSIX, but implemented in coreutils, busybox and freebsd.
Again v1/v2...?
Confused,
Rob
_______________________________________________
Toybox mailing list
http://lists.landley.net/listinfo.cgi/toybox-landley.net
From 2dc24d2ad4507328c363a52291257af6b5d0b6f8 Mon Sep 17 00:00:00 2001
Date: Sun, 28 May 2017 18:29:19 +0300
Subject: [PATCH] teach head -c
Not POSIX, but implemented in coreutils, busybox and freebsd.
---
tests/head.test | 4 ++++
toys/posix/head.c | 17 ++++++++++++-----
2 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/tests/head.test b/tests/head.test
index 6ed027c..4e4c01b 100755
--- a/tests/head.test
+++ b/tests/head.test
@@ -27,3 +27,7 @@ testing "-q, multiple files" "head -q -n 2 input file1" "one\ntwo\nfoo\nbar\n" \
"one\ntwo\nthree\n" ""
rm file1
+testing "-c 3" "head -c 3" "one" "" "one\ntwo"
+testing "-c bigger than input" "head -c 3" "a" "" "a"
+testing "-c 3 -n 1" "head -c 3 -n 1" "one\n" "" "one\ntwo"
+testing "-n 1 -c 3" "head -n 1 -c 3" "one" "" "one\ntwo"
diff --git a/toys/posix/head.c b/toys/posix/head.c
index f5fd281..be6e109 100644
--- a/toys/posix/head.c
+++ b/toys/posix/head.c
@@ -3,8 +3,9 @@
*
* See http://opengroup.org/onlinepubs/9699919799/utilities/head.html
+ * See http://man7.org/linux/man-pages/man1/head.1.html
-USE_HEAD(NEWTOY(head, "?n#<0=10qv", TOYFLAG_USR|TOYFLAG_BIN))
+USE_HEAD(NEWTOY(head, "?n#<0=10c#<0qv[-nc]", TOYFLAG_USR|TOYFLAG_BIN))
config HEAD
bool "head"
@@ -16,6 +17,7 @@ config HEAD
stdin. Filename "-" is a synonym for stdin.
-n Number of lines to copy
+ -c Number of bytes to copy
-q Never print headers
-v Always print headers
*/
@@ -24,13 +26,14 @@ config HEAD
#include "toys.h"
GLOBALS(
+ long bytes;
long lines;
int file_no;
)
static void do_head(int fd, char *name)
{
- int i, len, lines=TT.lines, size=sizeof(toybuf);
+ int i, len, lines=TT.lines, bytes=TT.bytes;
if ((toys.optc > 1 && !(toys.optflags & FLAG_q)) || toys.optflags & FLAG_v) {
// Print an extra newline for all but the first file
@@ -39,12 +42,16 @@ static void do_head(int fd, char *name)
xflush();
}
- while (lines) {
- len = read(fd, toybuf, size);
+ while (toys.optflags & FLAG_c ? bytes : lines) {
+ len = read(fd, toybuf, sizeof(toybuf));
if (len<0) perror_msg_raw(name);
if (len<1) break;
- for(i=0; i<len;) if (toybuf[i++] == '\n' && !--lines) break;
+ if (bytes) {
+ i = bytes >= len ? len : bytes;
+ bytes -= i;
+ } else
+ for(i=0; i<len;) if (toybuf[i++] == '\n' && !--lines) break;
xwrite(1, toybuf, i);
}
--
2.7.4
Rob Landley
2017-06-26 19:27:28 UTC
Permalink
Raw Message
ping?
Let's see...
Post by Ilya Kuzmich
Not POSIX, but implemented in coreutils, busybox and freebsd.
1) Do you have a use case for this? Or did you implement this because
it's there? Denys added it to Busybox on February 25, 2013 but no
message titles in the busybox mailing list around then mention "head" (I
checked from February 2013 back to the previous November). I couldn't a
bug report in https://bugs.busybox.net/buglist.cgi?quicksearch=head
either. So it looks like when busybox was ~15 years old Denys added this
because he could, not because anybody requested it or particularly
noticed it was missing...

2) On ubuntu "echo hello | head -c 0" produces no output. This one looks
like it falls back to line based behavior?

3) The ubuntu version has a more complicated -c behavior than you
implemented, "head -c -6600 README" currently prints the first 24 bytes
of that file. Why did you stop there? Why do we need this part but not
all of it? (Our tail already implements the -c +k behavior, but somebody
had an existing use case that needed it...)

4) I am really uncomfortable pointing to posix _and_ a man page as a
spec. (The old saying "A person with one watch always knows the time, a
person with two is never sure."*) There are two other files that use man
pages as their spec, and it's because neither posix nor lsb specify
nsenter/unshare/partprobe. I'd rather just add it to a "deviations from
posix" section if it's a posix command...

*shrug* I admit this a more convenient syntax than doing it with dd
(which is the first thing that comes to mind for me), but could you
explain your reasoning for this patch a bit more? And/or any second
opinions out there?

Thanks,

Rob

* A person with three or more is going to have an endlessly expanding
todo list, you can just tell. I say this having spent months wrestling
with https://www.navcen.uscg.gov/pubs/gps/sigspec/gpssps1.pdf which
requires _5_ nanosecond-accurate clocks to accurately tell you where you
are. 4 in the satellites, one in the receiver.
Ilya Kuzmich
2017-06-27 04:58:19 UTC
Permalink
Raw Message
Post by Rob Landley
ping?
Let's see...
Post by Ilya Kuzmich
Not POSIX, but implemented in coreutils, busybox and freebsd.
1) Do you have a use case for this? Or did you implement this because
it's there? Denys added it to Busybox on February 25, 2013 but no
message titles in the busybox mailing list around then mention "head" (I
checked from February 2013 back to the previous November). I couldn't a
bug report in https://bugs.busybox.net/buglist.cgi?quicksearch=head
either. So it looks like when busybox was ~15 years old Denys added this
because he could, not because anybody requested it or particularly
noticed it was missing...
Convenience and compatibility.
It's widely used syntax, on github alone `"head -c" language:shell`
query returns 22,602 code results.
Personal perspective: my embedded linux $DAYJOB uses head -c alot.
Post by Rob Landley
2) On ubuntu "echo hello | head -c 0" produces no output. This one looks
like it falls back to line based behavior?
No, it does not.
I've just tested my implementation and it produces no output either.
Post by Rob Landley
3) The ubuntu version has a more complicated -c behavior than you
implemented, "head -c -6600 README" currently prints the first 24 bytes
of that file. Why did you stop there? Why do we need this part but not
all of it? (Our tail already implements the -c +k behavior, but somebody
had an existing use case that needed it...)
It's just that I don't need negative values.
But hey - we could merge head.c and tail.c together.
Post by Rob Landley
4) I am really uncomfortable pointing to posix _and_ a man page as a
spec. (The old saying "A person with one watch always knows the time, a
person with two is never sure."*) There are two other files that use man
pages as their spec, and it's because neither posix nor lsb specify
nsenter/unshare/partprobe. I'd rather just add it to a "deviations from
posix" section if it's a posix command...
Reasonable.
Post by Rob Landley
*shrug* I admit this a more convenient syntax than doing it with dd
(which is the first thing that comes to mind for me), but could you
explain your reasoning for this patch a bit more? And/or any second
opinions out there?
Thanks,
Rob
* A person with three or more is going to have an endlessly expanding
todo list, you can just tell. I say this having spent months wrestling
with https://www.navcen.uscg.gov/pubs/gps/sigspec/gpssps1.pdf which
requires _5_ nanosecond-accurate clocks to accurately tell you where you
are. 4 in the satellites, one in the receiver.
Rob Landley
2017-06-27 20:30:17 UTC
Permalink
Raw Message
Post by Ilya Kuzmich
Post by Rob Landley
ping?
Let's see...
Post by Ilya Kuzmich
Not POSIX, but implemented in coreutils, busybox and freebsd.
1) Do you have a use case for this? Or did you implement this because
it's there? Denys added it to Busybox on February 25, 2013 but no
message titles in the busybox mailing list around then mention "head" (I
checked from February 2013 back to the previous November). I couldn't a
bug report in https://bugs.busybox.net/buglist.cgi?quicksearch=head
either. So it looks like when busybox was ~15 years old Denys added this
because he could, not because anybody requested it or particularly
noticed it was missing...
Convenience and compatibility.
It's widely used syntax, on github alone `"head -c" language:shell`
query returns 22,602 code results.
Personal perspective: my embedded linux $DAYJOB uses head -c alot.
You personally using it is good enough for me. :)

Applied. (And then I checked in a second nitpicking commit about
whitespace and replacing the man page link with a "deviations from
posix" comment instead, mostly because I was staring at it for so long.)
Post by Ilya Kuzmich
Post by Rob Landley
2) On ubuntu "echo hello | head -c 0" produces no output. This one looks
like it falls back to line based behavior?
No, it does not.
I've just tested my implementation and it produces no output either.
Ok.
Post by Ilya Kuzmich
Post by Rob Landley
3) The ubuntu version has a more complicated -c behavior than you
implemented, "head -c -6600 README" currently prints the first 24 bytes
of that file. Why did you stop there? Why do we need this part but not
all of it? (Our tail already implements the -c +k behavior, but somebody
had an existing use case that needed it...)
It's just that I don't need negative values.
Hmmm. I've added a todo item locally, but looking at it... it's a mess,
isn't it?
Post by Ilya Kuzmich
But hey - we could merge head.c and tail.c together.
I'd be all for that if I could figure out how to have a result simpler
than we started with.

Tail is kinda terrible: it has to remember data it's already seen and
then count backwards, and it needs two codepaths to do it remotely
efficiently (because reading through a multi-gigabyte file to display
the last 3 lines is unreasonably slow (sometimes minutes vs fraction of
a second), but you can't seek a pipe so "zcat | tail" has no choice but
to read). Plus you read data in blocks but parse it in lines so the
number of blocks you have to retain isn't fixed but can't start
outputting until you have all the data.

Head can do line at a time partial progress and forget what it's seen.
Until you get to -c with a negative value. where you have to retain all
the data you've seen because "zcat big.gz | head -c -999999999" doesn't
know when the file's over until it hits the end...

Eh, but it only has to buffer up to the negative value. Anything that
overflows from that it can print immediately. So it's not THAT bad, the
value you type is the limit on the memory allocation. (Still an out of
memory error in a can though, "cat /dev/zero | head -c -999999999999"
isn't going to be friendly to _any_ system...)

Rob

Loading...