Discussion:
[PATCH] tail fixes
(too old to reply)
Felix Janda
2013-08-31 10:53:09 UTC
Permalink
This should now hopefully fix the earlier segfaults.

In the case that lseek doesn't work and we count from the end
the algorithms for tail -n and tail -c are now more separate.
(tail -c is now more efficient since it adds lenghts instead of
counting bytes.)


POSIX tail BTW does only accept one input file. tail is not specified
by LSB. I guess it is convenient when combined with "-f" to watch
multiple files.

For tail -f, how about putting the fds into an array and then using
select() to watch the fds?

GNU and busybox print the header again when the file last read from
changes. They behave differently when a file occurs on the command
line more than once.

# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1377945041 -7200
# Node ID 3cd61d16aabec82505da96a142663cedbba8d12a
# Parent 9a059e9b3a17a372fc8b225f512af57c72f4eeaa
tail: Some fixes

- Rewrite most of the not lseek() logic
- Change meaning of len in line_list
- Use single instead of double linked list

diff -r 9a059e9b3a17 -r 3cd61d16aabe toys/posix/tail.c
--- a/toys/posix/tail.c Fri Aug 30 20:09:10 2013 +0200
+++ b/toys/posix/tail.c Sat Aug 31 12:30:41 2013 +0200
@@ -38,19 +38,20 @@
)

struct line_list {
- struct line_list *next, *prev;
+ struct line_list *next;
char *data;
- int len;
+ size_t len;
};

-static struct line_list *get_chunk(int fd, int len)
+static struct line_list *get_chunk(int fd, size_t len)
{
struct line_list *line = xmalloc(sizeof(struct line_list)+len);

- line->data = ((char *)line) + sizeof(struct line_list);
+ line->data = (char*)line + sizeof(struct line_list);
line->len = readall(fd, line->data, len);
+ line->next = 0;

- if (line->len < 1) {
+ if (line->len + 1 < 2) {
free(line);
return 0;
}
@@ -61,7 +62,9 @@
static void dump_chunk(void *ptr)
{
struct line_list *list = ptr;
- xwrite(1, list->data, list->len);
+ size_t len = list->len - (list->data - (char*)list - sizeof(struct line_list));
+
+ xwrite(1, list->data, len);
free(list);
}

@@ -71,11 +74,11 @@
static int try_lseek(int fd, long bytes, long lines)
{
struct line_list *list = 0, *temp;
- int flag = 0, chunk = sizeof(toybuf);
- ssize_t pos = lseek(fd, 0, SEEK_END);
+ int flag = 0;
+ size_t chunk = sizeof(toybuf), pos = lseek(fd, 0, SEEK_END);

// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;

// Seek to the right spot, output data from there.
if (bytes) {
@@ -88,40 +91,36 @@

bytes = pos;
while (lines && pos) {
- int offset;
+ size_t offset;

// Read in next chunk from end of file
- if (chunk>pos) chunk = pos;
+ if (chunk > pos) chunk = pos;
pos -= chunk;
if (pos != lseek(fd, pos, SEEK_SET)) {
perror_msg("seek failed");
break;
}
if (!(temp = get_chunk(fd, chunk))) break;
- if (list) list->next = temp;
+ if (list) temp->next = list;
list = temp;

// Count newlines in this chunk.
offset = list->len;
while (offset--) {
// If the last line ends with a newline, that one doesn't count.
- if (!flag) {
- flag++;
-
- continue;
- }
+ if (!flag) flag++;

// Start outputting data right after newline
- if (list->data[offset] == '\n' && !++lines) {
+ else if (list->data[offset] == '\n' && !++lines) {
offset++;
list->data += offset;
- list->len -= offset;

- break;
+ goto done;
}
}
}

+done:
// Output stored data
llist_traverse(list, dump_chunk);

@@ -143,58 +142,54 @@
// Are we measuring from the end of the file?

if (bytes<0 || lines<0) {
- struct line_list *list = 0, *new;
+ struct line_list *head = 0, *tail, *new;
+ // circular buffer of lines
+ struct {
+ char *start;
+ struct line_list *inchunk;
+ } *l = xzalloc(2*-lines*sizeof(void*));
+ int i = 0, flag = 0;
+ size_t count, len = bytes;

// The slow codepath is always needed, and can handle all input,
// so make lseek support optional.
- if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines));
+ if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines)) return;

// Read data until we run out, keep a trailing buffer
- else for (;;) {
- int len, count;
+ for (;;) {
char *try;

if (!(new = get_chunk(fd, sizeof(toybuf)))) break;
// append in order
- dlist_add_nomalloc((void *)&list, (struct double_list *)new);
+ if (head) tail->next = new;
+ else head = new;
+ tail = new;

- // Measure new chunk, discarding extra data from buffer
- len = new->len;
try = new->data;
- for (count=0; count<len; count++) {
- if ((toys.optflags & FLAG_c) && bytes) {
- bytes++;
- continue;
+ if (lines) for (count = 0; count < new->len; count++, try++) {
+ if (flag) { // last char was a newline
+ while (l[i].inchunk && (l[i].inchunk!=head)) free(llist_pop(&head));
+ l[i].inchunk = tail;
+ l[i].start = try;
+ i = (i + 1) % -lines;
+ flag = 0;
}
-
- if (lines) {
- if(try[count] != '\n' && count != len-1) continue;
- if (lines<0) {
- if (!++lines) ++lines;
- continue;
- }
+ if (*try == '\n') flag = 1;
+ } else { // bytes
+ if (len + new->len < len) flag = 1; // overflow -> have now read enough
+ for (len += new->len; flag && (len - head->len < len);) {
+ len -= head->len;
+ free(llist_pop(&head));
}
-
- // Time to discard data; given that bytes and lines were
- // nonzero coming in, we can't discard too much if we're
- // measuring right.
- do {
- char c = *(list->data++);
- if (!(--list->len)) {
- struct line_list *next = list->next;
- list->prev->next = next;
- list->next->prev = list->prev;
- free(list);
- list = next;
- }
- if (c == '\n') break;
- } while (lines);
}
}
+ if (lines) head->data = l[i].start;
+ else head->data += len;

// Output/free the buffer.
- llist_traverse(list, dump_chunk);
+ llist_traverse(head, dump_chunk);

+ free(l);
// Measuring from the beginning of the file.
} else for (;;) {
int len, offset = 0;
Felix Janda
2013-08-31 10:53:09 UTC
Permalink
This should now hopefully fix the earlier segfaults.

In the case that lseek doesn't work and we count from the end
the algorithms for tail -n and tail -c are now more separate.
(tail -c is now more efficient since it adds lenghts instead of
counting bytes.)


POSIX tail BTW does only accept one input file. tail is not specified
by LSB. I guess it is convenient when combined with "-f" to watch
multiple files.

For tail -f, how about putting the fds into an array and then using
select() to watch the fds?

GNU and busybox print the header again when the file last read from
changes. They behave differently when a file occurs on the command
line more than once.

# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1377945041 -7200
# Node ID 3cd61d16aabec82505da96a142663cedbba8d12a
# Parent 9a059e9b3a17a372fc8b225f512af57c72f4eeaa
tail: Some fixes

- Rewrite most of the not lseek() logic
- Change meaning of len in line_list
- Use single instead of double linked list

diff -r 9a059e9b3a17 -r 3cd61d16aabe toys/posix/tail.c
--- a/toys/posix/tail.c Fri Aug 30 20:09:10 2013 +0200
+++ b/toys/posix/tail.c Sat Aug 31 12:30:41 2013 +0200
@@ -38,19 +38,20 @@
)

struct line_list {
- struct line_list *next, *prev;
+ struct line_list *next;
char *data;
- int len;
+ size_t len;
};

-static struct line_list *get_chunk(int fd, int len)
+static struct line_list *get_chunk(int fd, size_t len)
{
struct line_list *line = xmalloc(sizeof(struct line_list)+len);

- line->data = ((char *)line) + sizeof(struct line_list);
+ line->data = (char*)line + sizeof(struct line_list);
line->len = readall(fd, line->data, len);
+ line->next = 0;

- if (line->len < 1) {
+ if (line->len + 1 < 2) {
free(line);
return 0;
}
@@ -61,7 +62,9 @@
static void dump_chunk(void *ptr)
{
struct line_list *list = ptr;
- xwrite(1, list->data, list->len);
+ size_t len = list->len - (list->data - (char*)list - sizeof(struct line_list));
+
+ xwrite(1, list->data, len);
free(list);
}

@@ -71,11 +74,11 @@
static int try_lseek(int fd, long bytes, long lines)
{
struct line_list *list = 0, *temp;
- int flag = 0, chunk = sizeof(toybuf);
- ssize_t pos = lseek(fd, 0, SEEK_END);
+ int flag = 0;
+ size_t chunk = sizeof(toybuf), pos = lseek(fd, 0, SEEK_END);

// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;

// Seek to the right spot, output data from there.
if (bytes) {
@@ -88,40 +91,36 @@

bytes = pos;
while (lines && pos) {
- int offset;
+ size_t offset;

// Read in next chunk from end of file
- if (chunk>pos) chunk = pos;
+ if (chunk > pos) chunk = pos;
pos -= chunk;
if (pos != lseek(fd, pos, SEEK_SET)) {
perror_msg("seek failed");
break;
}
if (!(temp = get_chunk(fd, chunk))) break;
- if (list) list->next = temp;
+ if (list) temp->next = list;
list = temp;

// Count newlines in this chunk.
offset = list->len;
while (offset--) {
// If the last line ends with a newline, that one doesn't count.
- if (!flag) {
- flag++;
-
- continue;
- }
+ if (!flag) flag++;

// Start outputting data right after newline
- if (list->data[offset] == '\n' && !++lines) {
+ else if (list->data[offset] == '\n' && !++lines) {
offset++;
list->data += offset;
- list->len -= offset;

- break;
+ goto done;
}
}
}

+done:
// Output stored data
llist_traverse(list, dump_chunk);

@@ -143,58 +142,54 @@
// Are we measuring from the end of the file?

if (bytes<0 || lines<0) {
- struct line_list *list = 0, *new;
+ struct line_list *head = 0, *tail, *new;
+ // circular buffer of lines
+ struct {
+ char *start;
+ struct line_list *inchunk;
+ } *l = xzalloc(2*-lines*sizeof(void*));
+ int i = 0, flag = 0;
+ size_t count, len = bytes;

// The slow codepath is always needed, and can handle all input,
// so make lseek support optional.
- if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines));
+ if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines)) return;

// Read data until we run out, keep a trailing buffer
- else for (;;) {
- int len, count;
+ for (;;) {
char *try;

if (!(new = get_chunk(fd, sizeof(toybuf)))) break;
// append in order
- dlist_add_nomalloc((void *)&list, (struct double_list *)new);
+ if (head) tail->next = new;
+ else head = new;
+ tail = new;

- // Measure new chunk, discarding extra data from buffer
- len = new->len;
try = new->data;
- for (count=0; count<len; count++) {
- if ((toys.optflags & FLAG_c) && bytes) {
- bytes++;
- continue;
+ if (lines) for (count = 0; count < new->len; count++, try++) {
+ if (flag) { // last char was a newline
+ while (l[i].inchunk && (l[i].inchunk!=head)) free(llist_pop(&head));
+ l[i].inchunk = tail;
+ l[i].start = try;
+ i = (i + 1) % -lines;
+ flag = 0;
}
-
- if (lines) {
- if(try[count] != '\n' && count != len-1) continue;
- if (lines<0) {
- if (!++lines) ++lines;
- continue;
- }
+ if (*try == '\n') flag = 1;
+ } else { // bytes
+ if (len + new->len < len) flag = 1; // overflow -> have now read enough
+ for (len += new->len; flag && (len - head->len < len);) {
+ len -= head->len;
+ free(llist_pop(&head));
}
-
- // Time to discard data; given that bytes and lines were
- // nonzero coming in, we can't discard too much if we're
- // measuring right.
- do {
- char c = *(list->data++);
- if (!(--list->len)) {
- struct line_list *next = list->next;
- list->prev->next = next;
- list->next->prev = list->prev;
- free(list);
- list = next;
- }
- if (c == '\n') break;
- } while (lines);
}
}
+ if (lines) head->data = l[i].start;
+ else head->data += len;

// Output/free the buffer.
- llist_traverse(list, dump_chunk);
+ llist_traverse(head, dump_chunk);

+ free(l);
// Measuring from the beginning of the file.
} else for (;;) {
int len, offset = 0;
Rob Landley
2013-09-04 23:46:44 UTC
Permalink
Replying with gmail, sorry if this winds up a bit garbled...
Post by Felix Janda
This should now hopefully fix the earlier segfaults.
Yay!

(Applied on the theory that what's there segfaults.)
Post by Felix Janda
In the case that lseek doesn't work and we count from the end
the algorithms for tail -n and tail -c are now more separate.
(tail -c is now more efficient since it adds lenghts instead of
counting bytes.)
POSIX tail BTW does only accept one input file. tail is not specified
by LSB. I guess it is convenient when combined with "-f" to watch
multiple files.
For tail -f, how about putting the fds into an array and then using
select() to watch the fds?
I've been thinking about that. It's a toss up between not wanting to
complicate the code and that being the obvious way to do that. (It
boils down to "is this worth doing".)

Right now netcat has either poll or select logic in it (I forget
which), and there are a couple other things like tee and less that
probably should have it. I have a vague note that if I can work out a
good library function abstracting this away into something simple,
using it in more places costs less. But haven't gone back to try to
work out a design. (Netcat's next big todo item is udp support, then
ipv6.)
Post by Felix Janda
GNU and busybox print the header again when the file last read from
changes. They behave differently when a file occurs on the command
line more than once.
Sheesh, I'd missed that last one. (Behave differently how?)
Post by Felix Janda
# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1377945041 -7200
# Node ID 3cd61d16aabec82505da96a142663cedbba8d12a
# Parent 9a059e9b3a17a372fc8b225f512af57c72f4eeaa
tail: Some fixes
- Rewrite most of the not lseek() logic
- Change meaning of len in line_list
- Use single instead of double linked list
diff -r 9a059e9b3a17 -r 3cd61d16aabe toys/posix/tail.c
--- a/toys/posix/tail.c Fri Aug 30 20:09:10 2013 +0200
+++ b/toys/posix/tail.c Sat Aug 31 12:30:41 2013 +0200
@@ -38,19 +38,20 @@
)
struct line_list {
- struct line_list *next, *prev;
+ struct line_list *next;
char *data;
- int len;
+ size_t len;
};
-static struct line_list *get_chunk(int fd, int len)
+static struct line_list *get_chunk(int fd, size_t len)
{
struct line_list *line = xmalloc(sizeof(struct line_list)+len);
- line->data = ((char *)line) + sizeof(struct line_list);
+ line->data = (char*)line + sizeof(struct line_list);
Adding a space between char and *. Pet peeve of mine: people doing
"char* a, b;" and then avoiding declaring more than one pointer at a
time after getting bit by that because they blame the wrong thing for
their mistake.
Post by Felix Janda
line->len = readall(fd, line->data, len);
Which is returning signed, and -1 for error...
Post by Felix Janda
+ line->next = 0;
- if (line->len < 1) {
+ if (line->len + 1 < 2) {
And this trick is assuming -1 will wrap to 0 in unsigned, and is
uncomfortably subtle. Which is why len was an int in the first place.

Possibly len should be ssize_t?
Post by Felix Janda
free(line);
return 0;
}
@@ -61,7 +62,9 @@
static void dump_chunk(void *ptr)
{
struct line_list *list = ptr;
- xwrite(1, list->data, list->len);
+ size_t len = list->len - (list->data - (char*)list - sizeof(struct line_list));
*blink* *blink*

Ok, data minus the start of the structure minus the size of the
structure... you're moving data forward _without_ adjusting len? And
assuming things about the layout of the structure...

Would it make more sense to add an "int used;" to the structure?
Post by Felix Janda
+
+ xwrite(1, list->data, len);
free(list);
}
@@ -71,11 +74,11 @@
static int try_lseek(int fd, long bytes, long lines)
{
struct line_list *list = 0, *temp;
- int flag = 0, chunk = sizeof(toybuf);
- ssize_t pos = lseek(fd, 0, SEEK_END);
+ int flag = 0;
+ size_t chunk = sizeof(toybuf), pos = lseek(fd, 0, SEEK_END);
Are single lines bigger than 2 gigabytes really a huge problem?
Post by Felix Janda
// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;
Negative one is less than zero...
Post by Felix Janda
// Seek to the right spot, output data from there.
if (bytes) {
@@ -88,40 +91,36 @@
bytes = pos;
while (lines && pos) {
- int offset;
+ size_t offset;
// Read in next chunk from end of file
- if (chunk>pos) chunk = pos;
+ if (chunk > pos) chunk = pos;
pos -= chunk;
if (pos != lseek(fd, pos, SEEK_SET)) {
perror_msg("seek failed");
break;
}
if (!(temp = get_chunk(fd, chunk))) break;
- if (list) list->next = temp;
+ if (list) temp->next = list;
Ok, _that_ is a largeish behavior change. Was that the bug?

I tend to use double lists as an easy way to assemble them in-order,
and also because I have existing dlist functions to manipulate them.
(I didn't bother to write the single linked functions in the library
because they're about as easy to do inline... and then the lazy way
was just to use the double ones. Six of one...)

I need to read through the rest of this more closely, but this message
has already sat half finished on my desktop for a couple days now...

Rob
Rob Landley
2013-09-06 09:34:43 UTC
Permalink
Post by Felix Janda
This should now hopefully fix the earlier segfaults.
In the case that lseek doesn't work and we count from the end
the algorithms for tail -n and tail -c are now more separate.
(tail -c is now more efficient since it adds lenghts instead of
counting bytes.)
And made it back here with my email client rather than snooping in
gmail's web interface. Time for a closer look...
Post by Felix Janda
POSIX tail BTW does only accept one input file. tail is not specified
by LSB. I guess it is convenient when combined with "-f" to watch
multiple files.
I treat posix as a frame of reference to diverge from. The gnu/dammit
guys take credit for every third party contribution from 20 years of
Linux development (and some disgruntled solaris guys back in the 80's
when Ed Zander decided to unbundle the compiler from the OS and charge
extra for it, and they all installed gcc instead).

I care about "what's on my existing linux box", both the actual current
Ubuntu LTS, and the old 2008 version I have in a qemu image, and the
various gentoo/fedora/suse/decade old red hat 9" test images I have
lying around. Some of it's crazy and it's better not to do that, but
it's a point of reference. (So is Android, and busybox, and the linux
standard base...)

What The Hurd implements is not. Those religious fanatics are simply
not relevant, except sometimes as a warning of what _not_ to do.
Post by Felix Janda
For tail -f, how about putting the fds into an array and then using
select() to watch the fds?
GNU and busybox print the header again when the file last read from
changes. They behave differently when a file occurs on the command
line more than once.
# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1377945041 -7200
# Node ID 3cd61d16aabec82505da96a142663cedbba8d12a
# Parent 9a059e9b3a17a372fc8b225f512af57c72f4eeaa
tail: Some fixes
- Rewrite most of the not lseek() logic
- Change meaning of len in line_list
- Use single instead of double linked list
diff -r 9a059e9b3a17 -r 3cd61d16aabe toys/posix/tail.c
--- a/toys/posix/tail.c Fri Aug 30 20:09:10 2013 +0200
+++ b/toys/posix/tail.c Sat Aug 31 12:30:41 2013 +0200
@@ -38,19 +38,20 @@
)
struct line_list {
- struct line_list *next, *prev;
+ struct line_list *next;
Double lists are easy to create in order, I usually do that when
sequencing is important.
Post by Felix Janda
char *data;
- int len;
+ size_t len;
};
You changed this from a signed to an unsigned value why? The largest
value fed into get_chunk is sizeof(toybuf), which easily fits in an
int. (If we're allocating a larger than 2 gigabyte block of data,
something is wrong.)
Post by Felix Janda
-static struct line_list *get_chunk(int fd, int len)
+static struct line_list *get_chunk(int fd, size_t len)
{
struct line_list *line = xmalloc(sizeof(struct line_list)+len);
- line->data = ((char *)line) + sizeof(struct line_list);
+ line->data = (char*)line + sizeof(struct line_list);
line->len = readall(fd, line->data, len);
+ line->next = 0;
Hmmm... the caller was supposed to set this. I see the caller doesn't
do so reliably for the end of the list and that is a bug. Quite
possibly the rest of this patch is noise?
Post by Felix Janda
- if (line->len < 1) {
+ if (line->len + 1 < 2) {
It was signed for a reason; readall returns -1 on error, and depending
on wraparound like that is nonobvious and awkward.
Post by Felix Janda
free(line);
return 0;
}
@@ -61,7 +62,9 @@
static void dump_chunk(void *ptr)
{
struct line_list *list = ptr;
- xwrite(1, list->data, list->len);
+ size_t len = list->len - (list->data - (char*)list - sizeof(struct
line_list));
+
+ xwrite(1, list->data, len);
free(list);
}
Actually the point of dump_chunk() was to output whole chunks. The
previous code should have either written up to the end of the previous
chunk or adjusted the start and length of the chunk for us.

(It's a llist_traverse() callback function to output all pending
buffered data.)

Sigh, it is _annoying_ to have two codepaths...
Post by Felix Janda
@@ -71,11 +74,11 @@
static int try_lseek(int fd, long bytes, long lines)
{
struct line_list *list = 0, *temp;
- int flag = 0, chunk = sizeof(toybuf);
- ssize_t pos = lseek(fd, 0, SEEK_END);
+ int flag = 0;
+ size_t chunk = sizeof(toybuf), pos = lseek(fd, 0, SEEK_END);
pos was a ssize_t because lseek() can return failure, now you've made
it a size_t which can't be negative. (Do you have something against
signed numbers?)
Post by Felix Janda
// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;
And yet you compare it with a negative. Maybe the bit patterns work
out, but I'm uncomfortable with that. I can see a compiler optimizing
this out simply because it can never be true. (That's actually what
happens when pos is unsigned char, due to type promotion.)
Post by Felix Janda
// Seek to the right spot, output data from there.
if (bytes) {
@@ -88,40 +91,36 @@
bytes = pos;
while (lines && pos) {
- int offset;
+ size_t offset;
This offset is into the current chunk. Chunks are page size, and offset
is initialized to list->len which is at most sizeof(toybuf). So int is
plenty big for this.
Post by Felix Janda
// Read in next chunk from end of file
- if (chunk>pos) chunk = pos;
+ if (chunk > pos) chunk = pos;
pos -= chunk;
if (pos != lseek(fd, pos, SEEK_SET)) {
perror_msg("seek failed");
break;
}
if (!(temp = get_chunk(fd, chunk))) break;
- if (list) list->next = temp;
+ if (list) temp->next = list;
list = temp;
Actually you could unconditionally temp->next = list and that takes
care of null terminating the list if temp started null.

And, yes, I believe that fix is needed. :) (This instance is creating a
list in reverse order because we're reading the file in reverse order,
thus it doesn't need a doubly linked list.)
Post by Felix Janda
// Count newlines in this chunk.
offset = list->len;
while (offset--) {
// If the last line ends with a newline, that one doesn't
count.
- if (!flag) {
- flag++;
-
- continue;
- }
+ if (!flag) flag++;
// Start outputting data right after newline
- if (list->data[offset] == '\n' && !++lines) {
+ else if (list->data[offset] == '\n' && !++lines) {
Eh, ok.
Post by Felix Janda
offset++;
list->data += offset;
- list->len -= offset;
offset was initialized to list->len which can't be zero or get_chunk()
would have discarded it. Therefore, offset should never be > list->len
so this can't reduce it to a negative number.

Yet you decided to not adjust the length here, and instead complicate
dump_chunk with knowledge of the structure layout. Why?
Post by Felix Janda
- break;
+ goto done;
}
Which breaks us into a while(lines) loop, and we only got in here if
!++lines so lines has to be 0 _after_ the increment, so the outer loop
must also exit, so the goto is unnecessary.
Post by Felix Janda
}
}
// Output stored data
llist_traverse(list, dump_chunk);
@@ -143,58 +142,54 @@
// Are we measuring from the end of the file?
I thought the non-lseek tail case was already working? (Are there bugs
in both?)

The diffstat on this last hunk is:

onen--- | 64
++++++++++++++++++++++++++++++----------------------------------
1 file changed, 30 insertions(+), 34 deletions(-)

So a net savings of 4 lines, while adding a new intermediate structure.
Previously we had one data structure (a linked list of chunks), now
there's a second structure wrapping that other structure, an array
containing pointers to nodes of a linked list (that's still a linked
list).

Not sure that's actually simpler. Or faster. Or uses less memory.

Sigh. Large rewrite of how something works, including a lot of
unrelated changes, is not something I'm happy to see presented as a
bugfix. Especially when I'm trying to dig an answer to "so what was the
actual _bug_?" out of it, so I can confirm whether or not it was
actually fixed.

I am exerting a lot of pondering answering what should be a simple
question.
Post by Felix Janda
if (bytes<0 || lines<0) {
- struct line_list *list = 0, *new;
+ struct line_list *head = 0, *tail, *new;
+ // circular buffer of lines
+ struct {
+ char *start;
+ struct line_list *inchunk;
+ } *l = xzalloc(2*-lines*sizeof(void*));
+ int i = 0, flag = 0;
+ size_t count, len = bytes;
malloc(0) is allowed to return NULL, which means xmalloc() will abort,
so in some libc implementations -c just stopped working.
Post by Felix Janda
// The slow codepath is always needed, and can handle all input,
// so make lseek support optional.
- if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines));
+ if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines)) return;
// Read data until we run out, keep a trailing buffer
- else for (;;) {
- int len, count;
+ for (;;) {
Eh, cosmetic, but ok.
Post by Felix Janda
char *try;
if (!(new = get_chunk(fd, sizeof(toybuf)))) break;
// append in order
- dlist_add_nomalloc((void *)&list, (struct double_list *)new);
+ if (head) tail->next = new;
+ else head = new;
+ tail = new;
Open code the function, adding more local variables? I can sort of see
it, but I tend to lean on the dlist functions because (as you can see
from above), manual linked list code is easy to screw up and hard to
read. (It's 8 extra bytes per 4k chunk to use common code, and you were
comfortable adding an entire array of structures here...)
Post by Felix Janda
- // Measure new chunk, discarding extra data from buffer
- len = new->len;
try = new->data;
- for (count=0; count<len; count++) {
- if ((toys.optflags & FLAG_c) && bytes) {
- bytes++;
- continue;
+ if (lines) for (count = 0; count < new->len; count++, try++) {
+ if (flag) { // last char was a newline
+ while (l[i].inchunk && (l[i].inchunk!=head))
free(llist_pop(&head));
+ l[i].inchunk = tail;
+ l[i].start = try;
+ i = (i + 1) % -lines;
+ flag = 0;
}
-
- if (lines) {
- if(try[count] != '\n' && count != len-1) continue;
- if (lines<0) {
- if (!++lines) ++lines;
- continue;
- }
+ if (*try == '\n') flag = 1;
+ } else { // bytes
Actually we don't have an exclusion in the option string for [!nc] and
I vaguely recall I was trying to make it work if you specified both at
the same time. (So it showed you that many characters from the end
_plus_ that many lines before that point.)

That's why -c ate its argument (counting up), and then -n ate its
argument (counting up) in sequence. However, other implementations
instead make "tail -c 7 -n 3" act like just -n 3, and "tail -n 3 -c 7"
act like -c 7. And the way to do _that_ is a [-cn] exclusion on the
string, then check the flag.

(Hmmm... ideally I'd have lib/args.c zero the save slot when it unsets
the flag, then we don't have to check the flags... Ok, I think I can
make that work without too much extra code.)
Post by Felix Janda
+ if (len + new->len < len) flag = 1; // overflow -> have now
read enough
+ for (len += new->len; flag && (len - head->len < len);) {
+ len -= head->len;
+ free(llist_pop(&head));
}
-
- // Time to discard data; given that bytes and lines were
- // nonzero coming in, we can't discard too much if we're
- // measuring right.
- do {
- char c = *(list->data++);
- if (!(--list->len)) {
- struct line_list *next = list->next;
- list->prev->next = next;
- list->next->prev = list->prev;
- free(list);
- list = next;
- }
- if (c == '\n') break;
- } while (lines);
}
}
+ if (lines) head->data = l[i].start;
+ else head->data += len;
// Output/free the buffer.
- llist_traverse(list, dump_chunk);
+ llist_traverse(head, dump_chunk);
+ free(l);
Sigh. The previous design was _just_ doing linked list traversal, and
it was pretty much the same sort of linked list traversal in both
codepaths.

This change has made the two codepaths have fundamentally different
designs, taking them further away from being unified. (Not that they
necessarily can be unified, but the more different the two
implementations are the harder the command as a whole is to understand.
This makes me uncomfortable.)
Post by Felix Janda
// Measuring from the beginning of the file.
} else for (;;) {
int len, offset = 0;
I thought the existing design was reasonably straightforward:

Loop reading chunks:
1) Count up requested bytes until full
2) Count up requested lines until full
3) Once we have enough data but not the _right_ data,
discard from head as we add to tail.
Print result.

The implementation might not have been, but we shouldn't need more
structures to track this. The old approach even had reasonably good
cache locality. I can see objecting to incrementing bytes++ each time
for large -c values (didn't want to special case it and L1 cache local
spinning on a register to count up to even a few million should be way
less than the read() overhead, but it still wastes battery...)

Let's see, assuming the actual bugs were the uninitialized next pointer
in the last list entry and the list entries being added in the wrong
order (so only the last list entry every showed), so if I separate the
count and line codepaths into an if/else but keep the head/tail
traversing design without this sideband line array...

toys/*/tail.c | tail -n 87) | diffstat
one | 66
+++++++++++++++++++++++++++++++-----------------------------------
1 file changed, 31 insertions(+), 35 deletions(-)

Heh, same number of lines. And I added comments...

Now this is cheating slightly because I added a dlist_pop() function
that adjusts the prev and next pointers to lib/llist.c. (The two
different code paths each discard chunks, so a function is better than
inline.) I could instead do the two pointer thing you're doing to
create a singly linked list inline, but I'd want to make a generic
function to do that and add it to lib/llist.c instead of open coding it
here, and last I tried it that required having a magic list head
structure which I'd rather not do...

(Sorry, I've been vaguely meaning to come back to tail and see if I
could clean this up and unify the codepaths. The backwards traversal vs
forwards traversal thing is what doubly linked lists are _for_. But
every time I sit down to bang on toybox I've got five more messages
from people submitting more code for me to look at, and I never get to
any of my own todo list because I'm perpetually behind...)

Grrr, all right, let's confront the design issue you've raised: am I
ever using the doubly linked list for anything _except_ in-order singly
linked list creation? Let's see, the current users are ls, xargs, tail,
patch, and sed.

ls: is abusing a dirtree * to do in-order creation via
dlist_add_nomalloc() (with ->parent standing in for ->prev) and then
cleaning it up with dlist_to_dirtree() function that's only ever called
from one place... that could use some cleanup. Anyway, it's never using
prev, just writing to it or breaking the circular list with it.

patch: in order creation, using prev to terminate teh circular list to
get a singly linked list, and at one point removing an item while
maintaining the circular list (so adjusting the pointers, but that's
write not read).

xargs: in order creation, never using prev except to break the chain.

sed: unifinished. I'd love to get time to work on it but tonight I'm
doing this instead.

tail: case in point.

So it _looks_ like the dlist stuff can go, and be replaced by a
function that does in order creation of singly linked lists. (If I
really need something that can be traversed both ways, so far it's
either an array or a tree.)

Right. What does said function look like? Because:

llist_add_tail(void *list, void *end, void *new);

Is... kinda sad. It only writes to list if it was null (initializing an
empty list), should that be the caller's job? Can it _return_ list?

list = end = 0;

while (new = get()) list = llist_add_tail(&end, new);

No, then it doesn't know what list was after the first call, I've got
to pass it in so it can check if it's not NULL or have the caller do
that. The inline version is something like:

struct listy *list = 0, *end = 0, *new;

while (new = get()) {
if (!list) list = new;
else end->next = new;
new->next = 0;
end = new;
}

And I wrap that in a function how?

Sigh:

struct listy *list = 0, *new;

while (new = get()) dlist_add_nomalloc(&list, new);

You see why I just did a doubly linked list? That way I had one list
pointer fed to the function and the function could use it to find the
end of the list, but I _didn't_ have different list and list head
types. The downside is each node has an extra pointer, for which I get
the ability to traverse the list backwards, even if I never use it.)

On the bright side, if I do keep dlist_pop() I can use it in patch to
trim a couple lines there...

Eh, I should worry about it in the morning. Bedtime, and still 5000
emails behind. (Mostly linux-kernel, but I need to read the
Documentation stuff in there. Plus the 3.11 kernel dropped on tuesday
and I need to do an Aboriginal release, which means a toybox release
_and_ I wanted to get LFS 7.4 built instead of the old 6.8 control
image that's years stale at this point...)

Rob
Felix Janda
2013-09-06 21:48:10 UTC
Permalink
Post by Rob Landley
Post by Felix Janda
This should now hopefully fix the earlier segfaults.
In the case that lseek doesn't work and we count from the end
the algorithms for tail -n and tail -c are now more separate.
(tail -c is now more efficient since it adds lenghts instead of
counting bytes.)
And made it back here with my email client rather than snooping in
gmail's web interface. Time for a closer look...
Post by Felix Janda
POSIX tail BTW does only accept one input file. tail is not specified
by LSB. I guess it is convenient when combined with "-f" to watch
multiple files.
I treat posix as a frame of reference to diverge from. The gnu/dammit
guys take credit for every third party contribution from 20 years of
Linux development (and some disgruntled solaris guys back in the 80's
when Ed Zander decided to unbundle the compiler from the OS and charge
extra for it, and they all installed gcc instead).
I care about "what's on my existing linux box", both the actual current
Ubuntu LTS, and the old 2008 version I have in a qemu image, and the
various gentoo/fedora/suse/decade old red hat 9" test images I have
lying around. Some of it's crazy and it's better not to do that, but
it's a point of reference. (So is Android, and busybox, and the linux
standard base...)
What The Hurd implements is not. Those religious fanatics are simply
not relevant, except sometimes as a warning of what _not_ to do.
I just wanted to mention this. It means that we have some more freedom
in the implementation.
Post by Rob Landley
Post by Felix Janda
For tail -f, how about putting the fds into an array and then using
select() to watch the fds?
GNU and busybox print the header again when the file last read from
changes. They behave differently when a file occurs on the command
line more than once.
# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1377945041 -7200
# Node ID 3cd61d16aabec82505da96a142663cedbba8d12a
# Parent 9a059e9b3a17a372fc8b225f512af57c72f4eeaa
tail: Some fixes
- Rewrite most of the not lseek() logic
- Change meaning of len in line_list
- Use single instead of double linked list
diff -r 9a059e9b3a17 -r 3cd61d16aabe toys/posix/tail.c
--- a/toys/posix/tail.c Fri Aug 30 20:09:10 2013 +0200
+++ b/toys/posix/tail.c Sat Aug 31 12:30:41 2013 +0200
@@ -38,19 +38,20 @@
)
struct line_list {
- struct line_list *next, *prev;
+ struct line_list *next;
Double lists are easy to create in order, I usually do that when
sequencing is important.
I was not aware of this. prev just seemed like another thing to
keep track of. do_tail previously unecessarily kept track of it
when drop no longer necessary chunks.
Post by Rob Landley
Post by Felix Janda
char *data;
- int len;
+ size_t len;
};
You changed this from a signed to an unsigned value why? The largest
value fed into get_chunk is sizeof(toybuf), which easily fits in an
int. (If we're allocating a larger than 2 gigabyte block of data,
something is wrong.)
I just feel more comfortable with modular arithmetic than possibly
undefined behavior. In this case it doesn't really matter.
Post by Rob Landley
Post by Felix Janda
-static struct line_list *get_chunk(int fd, int len)
+static struct line_list *get_chunk(int fd, size_t len)
{
struct line_list *line = xmalloc(sizeof(struct line_list)+len);
- line->data = ((char *)line) + sizeof(struct line_list);
+ line->data = (char*)line + sizeof(struct line_list);
line->len = readall(fd, line->data, len);
+ line->next = 0;
Hmmm... the caller was supposed to set this. I see the caller doesn't
do so reliably for the end of the list and that is a bug. Quite
possibly the rest of this patch is noise?
Some large part. Sorry.
Post by Rob Landley
Post by Felix Janda
- if (line->len < 1) {
+ if (line->len + 1 < 2) {
It was signed for a reason; readall returns -1 on error, and depending
on wraparound like that is nonobvious and awkward.
Unless you want to do a signed comparison there is no problem of storing
-1 in an unsigned variable. -1 makes sense in any ring.

But just revert this, if it makes you feel uncomfortable.
Post by Rob Landley
Post by Felix Janda
free(line);
return 0;
}
@@ -61,7 +62,9 @@
static void dump_chunk(void *ptr)
{
struct line_list *list = ptr;
- xwrite(1, list->data, list->len);
+ size_t len = list->len - (list->data - (char*)list - sizeof(struct line_list));
+
+ xwrite(1, list->data, len);
free(list);
}
Actually the point of dump_chunk() was to output whole chunks. The
previous code should have either written up to the end of the previous
chunk or adjusted the start and length of the chunk for us.
(It's a llist_traverse() callback function to output all pending
buffered data.)
That was just a horrible attemp to shave off a line. See my other mail
on how to fix it.
Post by Rob Landley
Sigh, it is _annoying_ to have two codepaths...
They are quite different after all. There even two code paths in
do_tail() and my patch introduces another one...
Post by Rob Landley
Post by Felix Janda
@@ -71,11 +74,11 @@
static int try_lseek(int fd, long bytes, long lines)
{
struct line_list *list = 0, *temp;
- int flag = 0, chunk = sizeof(toybuf);
- ssize_t pos = lseek(fd, 0, SEEK_END);
+ int flag = 0;
+ size_t chunk = sizeof(toybuf), pos = lseek(fd, 0, SEEK_END);
pos was a ssize_t because lseek() can return failure, now you've made
it a size_t which can't be negative. (Do you have something against
signed numbers?)
It can't be negative. But it can be -1. But actually we should use
an off_t for pos. One might be curious about the end of a 8GB file
in a 32bit system.
Post by Rob Landley
Post by Felix Janda
// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;
And yet you compare it with a negative. Maybe the bit patterns work
out, but I'm uncomfortable with that. I can see a compiler optimizing
this out simply because it can never be true. (That's actually what
happens when pos is unsigned char, due to type promotion.)
You are right. It should be -1u. (This can only be optimized out if
sizeof(size_t) < sizeof(int) (is that possible?). Otherwise there are
two cases: If pos can be represented as an int, it will be converted
to an int and we have a signed comparison. Otherwise -1 is promoted
to size_t and an unsigned comparison happens. The implementation of
signed numbers is irrelevant.)
Post by Rob Landley
Post by Felix Janda
// Seek to the right spot, output data from there.
if (bytes) {
@@ -88,40 +91,36 @@
bytes = pos;
while (lines && pos) {
- int offset;
+ size_t offset;
This offset is into the current chunk. Chunks are page size, and offset
is initialized to list->len which is at most sizeof(toybuf). So int is
plenty big for this.
Yes.
Post by Rob Landley
Post by Felix Janda
// Read in next chunk from end of file
- if (chunk>pos) chunk = pos;
+ if (chunk > pos) chunk = pos;
pos -= chunk;
if (pos != lseek(fd, pos, SEEK_SET)) {
perror_msg("seek failed");
break;
}
if (!(temp = get_chunk(fd, chunk))) break;
- if (list) list->next = temp;
+ if (list) temp->next = list;
list = temp;
Actually you could unconditionally temp->next = list and that takes
care of null terminating the list if temp started null.
Yes. (currently your if clause is false.)
Post by Rob Landley
And, yes, I believe that fix is needed. :) (This instance is creating a
list in reverse order because we're reading the file in reverse order,
thus it doesn't need a doubly linked list.)
Right.
Post by Rob Landley
Post by Felix Janda
// Count newlines in this chunk.
offset = list->len;
while (offset--) {
// If the last line ends with a newline, that one doesn't count.
- if (!flag) {
- flag++;
-
- continue;
- }
+ if (!flag) flag++;
// Start outputting data right after newline
- if (list->data[offset] == '\n' && !++lines) {
+ else if (list->data[offset] == '\n' && !++lines) {
Eh, ok.
Post by Felix Janda
offset++;
list->data += offset;
- list->len -= offset;
offset was initialized to list->len which can't be zero or get_chunk()
would have discarded it. Therefore, offset should never be > list->len
so this can't reduce it to a negative number.
Yet you decided to not adjust the length here, and instead complicate
dump_chunk with knowledge of the structure layout. Why?
I gave a (very lame) explaination above.
Post by Rob Landley
Post by Felix Janda
- break;
+ goto done;
}
Which breaks us into a while(lines) loop, and we only got in here if
!++lines so lines has to be 0 _after_ the increment, so the outer loop
must also exit, so the goto is unnecessary.
Sorry, that's a remnant of a previous version. (I wasn't sure whether the
code was right in the case that the last line doesn't end with a newline.)
Post by Rob Landley
Post by Felix Janda
}
}
// Output stored data
llist_traverse(list, dump_chunk);
@@ -143,58 +142,54 @@
// Are we measuring from the end of the file?
I thought the non-lseek tail case was already working? (Are there bugs
in both?)
In some cases... On the contrary, I had produced a reproducible segfault
in the non-lseek logic and unexpectedly found a bug in the lseek logic.
Post by Rob Landley
onen--- | 64
++++++++++++++++++++++++++++++----------------------------------
1 file changed, 30 insertions(+), 34 deletions(-)
So a net savings of 4 lines, while adding a new intermediate structure.
Previously we had one data structure (a linked list of chunks), now
there's a second structure wrapping that other structure, an array
containing pointers to nodes of a linked list (that's still a linked
list).
Not sure that's actually simpler. Or faster. Or uses less memory.
Sigh. Large rewrite of how something works, including a lot of
unrelated changes, is not something I'm happy to see presented as a
bugfix. Especially when I'm trying to dig an answer to "so what was the
actual _bug_?" out of it, so I can confirm whether or not it was
actually fixed.
The previous code had the assumption that every chunk is terminated by
a newline (which is utterly wrong). This is the simplest implementation
I could come up. I'll try to explain further things below.

After spending quite some time with this reply I see the simple
(less efficient (in terms of number of instructions)) algorithm.
Post by Rob Landley
I am exerting a lot of pondering answering what should be a simple
question.
Post by Felix Janda
if (bytes<0 || lines<0) {
- struct line_list *list = 0, *new;
+ struct line_list *head = 0, *tail, *new;
+ // circular buffer of lines
+ struct {
+ char *start;
+ struct line_list *inchunk;
+ } *l = xzalloc(2*-lines*sizeof(void*));
+ int i = 0, flag = 0;
+ size_t count, len = bytes;
malloc(0) is allowed to return NULL, which means xmalloc() will abort,
so in some libc implementations -c just stopped working.
That's bad. Could one declare it as an array on the stack instead?
Post by Rob Landley
Post by Felix Janda
// The slow codepath is always needed, and can handle all input,
// so make lseek support optional.
- if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines));
+ if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines)) return;
// Read data until we run out, keep a trailing buffer
- else for (;;) {
- int len, count;
+ for (;;) {
Eh, cosmetic, but ok.
Post by Felix Janda
char *try;
if (!(new = get_chunk(fd, sizeof(toybuf)))) break;
// append in order
- dlist_add_nomalloc((void *)&list, (struct double_list *)new);
+ if (head) tail->next = new;
+ else head = new;
+ tail = new;
Open code the function, adding more local variables? I can sort of see
it, but I tend to lean on the dlist functions because (as you can see
from above), manual linked list code is easy to screw up and hard to
read. (It's 8 extra bytes per 4k chunk to use common code, and you were
comfortable adding an entire array of structures here...)
I just thought simple linked lists were easier to keep track of. I see
your point. (Again more about the structures below.)
Post by Rob Landley
Post by Felix Janda
- // Measure new chunk, discarding extra data from buffer
- len = new->len;
try = new->data;
- for (count=0; count<len; count++) {
- if ((toys.optflags & FLAG_c) && bytes) {
- bytes++;
- continue;
+ if (lines) for (count = 0; count < new->len; count++, try++) {
+ if (flag) { // last char was a newline
+ while (l[i].inchunk && (l[i].inchunk!=head))
free(llist_pop(&head));
+ l[i].inchunk = tail;
+ l[i].start = try;
+ i = (i + 1) % -lines;
+ flag = 0;
}
-
- if (lines) {
- if(try[count] != '\n' && count != len-1) continue;
- if (lines<0) {
- if (!++lines) ++lines;
- continue;
- }
+ if (*try == '\n') flag = 1;
+ } else { // bytes
Actually we don't have an exclusion in the option string for [!nc] and
I vaguely recall I was trying to make it work if you specified both at
the same time. (So it showed you that many characters from the end
_plus_ that many lines before that point.)
I didn't know that. Could you specify it in more detail (the order matters)?
Post by Rob Landley
That's why -c ate its argument (counting up), and then -n ate its
argument (counting up) in sequence. However, other implementations
instead make "tail -c 7 -n 3" act like just -n 3, and "tail -n 3 -c 7"
act like -c 7. And the way to do _that_ is a [-cn] exclusion on the
string, then check the flag.
(Hmmm... ideally I'd have lib/args.c zero the save slot when it unsets
the flag, then we don't have to check the flags... Ok, I think I can
make that work without too much extra code.)
Post by Felix Janda
+ if (len + new->len < len) flag = 1; // overflow -> have now read enough
+ for (len += new->len; flag && (len - head->len < len);) {
+ len -= head->len;
+ free(llist_pop(&head));
}
-
- // Time to discard data; given that bytes and lines were
- // nonzero coming in, we can't discard too much if we're
- // measuring right.
- do {
- char c = *(list->data++);
- if (!(--list->len)) {
- struct line_list *next = list->next;
- list->prev->next = next;
- list->next->prev = list->prev;
- free(list);
- list = next;
- }
- if (c == '\n') break;
- } while (lines);
}
}
+ if (lines) head->data = l[i].start;
+ else head->data += len;
// Output/free the buffer.
- llist_traverse(list, dump_chunk);
+ llist_traverse(head, dump_chunk);
+ free(l);
Sigh. The previous design was _just_ doing linked list traversal, and
it was pretty much the same sort of linked list traversal in both
codepaths.
This change has made the two codepaths have fundamentally different
designs, taking them further away from being unified. (Not that they
necessarily can be unified, but the more different the two
implementations are the harder the command as a whole is to understand.
This makes me uncomfortable.)
The lines variant just needs to keep track of much more data so I didn't
want to burden the bytes method with that. (And the count++ seemed quite
inefficient for the bytes.)
Post by Rob Landley
Post by Felix Janda
// Measuring from the beginning of the file.
} else for (;;) {
int len, offset = 0;
1) Count up requested bytes until full
2) Count up requested lines until full
3) Once we have enough data but not the _right_ data,
discard from head as we add to tail.
Print result.
Now it makes some sense to me. The "Time to discard..." section is
however wrong. If we have enough data, so that lines = 0, the loop
is only run once. It seems to me that in this case for each new byte
we add to the data at the end we drop one in the beginning. How should
this in the end when we reach the end of the file tell us the right
point to output the lines?
Post by Rob Landley
The implementation might not have been, but we shouldn't need more
structures to track this. The old approach even had reasonably good
cache locality. I can see objecting to incrementing bytes++ each time
for large -c values (didn't want to special case it and L1 cache local
spinning on a register to count up to even a few million should be way
less than the read() overhead, but it still wastes battery...)
The extra structure keeps track of the lines found so far. It remembers
the start of each in line and because of the existence of segmented
memory it also has to remember which chunk the line starts in. Using
the structure there is no need to reread the list to find out where the
lines were located.
Post by Rob Landley
Let's see, assuming the actual bugs were the uninitialized next pointer
in the last list entry and the list entries being added in the wrong
order (so only the last list entry every showed), so if I separate the
count and line codepaths into an if/else but keep the head/tail
traversing design without this sideband line array...
There were more. (Just try the previous lseek logic with something big
containing no newlines.)
Post by Rob Landley
toys/*/tail.c | tail -n 87) | diffstat
one | 66
+++++++++++++++++++++++++++++++-----------------------------------
1 file changed, 31 insertions(+), 35 deletions(-)
Heh, same number of lines. And I added comments...
Now this is cheating slightly because I added a dlist_pop() function
that adjusts the prev and next pointers to lib/llist.c. (The two
different code paths each discard chunks, so a function is better than
inline.) I could instead do the two pointer thing you're doing to
create a singly linked list inline, but I'd want to make a generic
function to do that and add it to lib/llist.c instead of open coding it
here, and last I tried it that required having a magic list head
structure which I'd rather not do...
Do you really understand the "Time to discard data;..." logic? The
comment was not really helpful for me.
Post by Rob Landley
(Sorry, I've been vaguely meaning to come back to tail and see if I
could clean this up and unify the codepaths. The backwards traversal vs
forwards traversal thing is what doubly linked lists are _for_. But
every time I sit down to bang on toybox I've got five more messages
from people submitting more code for me to look at, and I never get to
any of my own todo list because I'm perpetually behind...)
Grrr, all right, let's confront the design issue you've raised: am I
ever using the doubly linked list for anything _except_ in-order singly
linked list creation? Let's see, the current users are ls, xargs, tail,
patch, and sed.
Just to sketch what I was thinking: I noticed that no doubly-linked list
was strictly necessary in the code. Then I found that there is no
llist_add_nomalloc. Finally I concluded that it must be so trivial that
it should be done inline...
Post by Rob Landley
ls: is abusing a dirtree * to do in-order creation via
dlist_add_nomalloc() (with ->parent standing in for ->prev) and then
cleaning it up with dlist_to_dirtree() function that's only ever called
from one place... that could use some cleanup. Anyway, it's never using
prev, just writing to it or breaking the circular list with it.
patch: in order creation, using prev to terminate teh circular list to
get a singly linked list, and at one point removing an item while
maintaining the circular list (so adjusting the pointers, but that's
write not read).
xargs: in order creation, never using prev except to break the chain.
sed: unifinished. I'd love to get time to work on it but tonight I'm
doing this instead.
tail: case in point.
So it _looks_ like the dlist stuff can go, and be replaced by a
function that does in order creation of singly linked lists. (If I
really need something that can be traversed both ways, so far it's
either an array or a tree.)
llist_add_tail(void *list, void *end, void *new);
Is... kinda sad. It only writes to list if it was null (initializing an
empty list), should that be the caller's job? Can it _return_ list?
list = end = 0;
while (new = get()) list = llist_add_tail(&end, new);
No, then it doesn't know what list was after the first call, I've got
to pass it in so it can check if it's not NULL or have the caller do
struct listy *list = 0, *end = 0, *new;
while (new = get()) {
if (!list) list = new;
else end->next = new;
new->next = 0;
end = new;
}
And I wrap that in a function how?
struct listy *list = 0, *new;
while (new = get()) dlist_add_nomalloc(&list, new);
You see why I just did a doubly linked list? That way I had one list
pointer fed to the function and the function could use it to find the
end of the list, but I _didn't_ have different list and list head
types. The downside is each node has an extra pointer, for which I get
the ability to traverse the list backwards, even if I never use it.)
It's still a bit confusing to me. list always points to the head and
list->prev points to the tail? Could you maybe remark that somewhere?
So it's nearly the same as the three argument version. (But we get a
doubly linked list in the end.)
Post by Rob Landley
On the bright side, if I do keep dlist_pop() I can use it in patch to
trim a couple lines there...
Eh, I should worry about it in the morning. Bedtime, and still 5000
emails behind. (Mostly linux-kernel, but I need to read the
Documentation stuff in there. Plus the 3.11 kernel dropped on tuesday
and I need to do an Aboriginal release, which means a toybox release
_and_ I wanted to get LFS 7.4 built instead of the old 6.8 control
image that's years stale at this point...)
Sorry for keeping you up with this patch.

Felix
Felix Janda
2013-09-07 20:39:41 UTC
Permalink
Post by Felix Janda
Post by Rob Landley
Post by Felix Janda
// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;
And yet you compare it with a negative. Maybe the bit patterns work
out, but I'm uncomfortable with that. I can see a compiler optimizing
this out simply because it can never be true. (That's actually what
happens when pos is unsigned char, due to type promotion.)
You are right. It should be -1u. (This can only be optimized out if
Oh, it should rather be (size_t)-1.
Felix Janda
2013-09-07 20:39:41 UTC
Permalink
Post by Felix Janda
Post by Rob Landley
Post by Felix Janda
// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;
And yet you compare it with a negative. Maybe the bit patterns work
out, but I'm uncomfortable with that. I can see a compiler optimizing
this out simply because it can never be true. (That's actually what
happens when pos is unsigned char, due to type promotion.)
You are right. It should be -1u. (This can only be optimized out if
Oh, it should rather be (size_t)-1.
Felix Janda
2013-09-06 21:48:10 UTC
Permalink
Post by Rob Landley
Post by Felix Janda
This should now hopefully fix the earlier segfaults.
In the case that lseek doesn't work and we count from the end
the algorithms for tail -n and tail -c are now more separate.
(tail -c is now more efficient since it adds lenghts instead of
counting bytes.)
And made it back here with my email client rather than snooping in
gmail's web interface. Time for a closer look...
Post by Felix Janda
POSIX tail BTW does only accept one input file. tail is not specified
by LSB. I guess it is convenient when combined with "-f" to watch
multiple files.
I treat posix as a frame of reference to diverge from. The gnu/dammit
guys take credit for every third party contribution from 20 years of
Linux development (and some disgruntled solaris guys back in the 80's
when Ed Zander decided to unbundle the compiler from the OS and charge
extra for it, and they all installed gcc instead).
I care about "what's on my existing linux box", both the actual current
Ubuntu LTS, and the old 2008 version I have in a qemu image, and the
various gentoo/fedora/suse/decade old red hat 9" test images I have
lying around. Some of it's crazy and it's better not to do that, but
it's a point of reference. (So is Android, and busybox, and the linux
standard base...)
What The Hurd implements is not. Those religious fanatics are simply
not relevant, except sometimes as a warning of what _not_ to do.
I just wanted to mention this. It means that we have some more freedom
in the implementation.
Post by Rob Landley
Post by Felix Janda
For tail -f, how about putting the fds into an array and then using
select() to watch the fds?
GNU and busybox print the header again when the file last read from
changes. They behave differently when a file occurs on the command
line more than once.
# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1377945041 -7200
# Node ID 3cd61d16aabec82505da96a142663cedbba8d12a
# Parent 9a059e9b3a17a372fc8b225f512af57c72f4eeaa
tail: Some fixes
- Rewrite most of the not lseek() logic
- Change meaning of len in line_list
- Use single instead of double linked list
diff -r 9a059e9b3a17 -r 3cd61d16aabe toys/posix/tail.c
--- a/toys/posix/tail.c Fri Aug 30 20:09:10 2013 +0200
+++ b/toys/posix/tail.c Sat Aug 31 12:30:41 2013 +0200
@@ -38,19 +38,20 @@
)
struct line_list {
- struct line_list *next, *prev;
+ struct line_list *next;
Double lists are easy to create in order, I usually do that when
sequencing is important.
I was not aware of this. prev just seemed like another thing to
keep track of. do_tail previously unecessarily kept track of it
when drop no longer necessary chunks.
Post by Rob Landley
Post by Felix Janda
char *data;
- int len;
+ size_t len;
};
You changed this from a signed to an unsigned value why? The largest
value fed into get_chunk is sizeof(toybuf), which easily fits in an
int. (If we're allocating a larger than 2 gigabyte block of data,
something is wrong.)
I just feel more comfortable with modular arithmetic than possibly
undefined behavior. In this case it doesn't really matter.
Post by Rob Landley
Post by Felix Janda
-static struct line_list *get_chunk(int fd, int len)
+static struct line_list *get_chunk(int fd, size_t len)
{
struct line_list *line = xmalloc(sizeof(struct line_list)+len);
- line->data = ((char *)line) + sizeof(struct line_list);
+ line->data = (char*)line + sizeof(struct line_list);
line->len = readall(fd, line->data, len);
+ line->next = 0;
Hmmm... the caller was supposed to set this. I see the caller doesn't
do so reliably for the end of the list and that is a bug. Quite
possibly the rest of this patch is noise?
Some large part. Sorry.
Post by Rob Landley
Post by Felix Janda
- if (line->len < 1) {
+ if (line->len + 1 < 2) {
It was signed for a reason; readall returns -1 on error, and depending
on wraparound like that is nonobvious and awkward.
Unless you want to do a signed comparison there is no problem of storing
-1 in an unsigned variable. -1 makes sense in any ring.

But just revert this, if it makes you feel uncomfortable.
Post by Rob Landley
Post by Felix Janda
free(line);
return 0;
}
@@ -61,7 +62,9 @@
static void dump_chunk(void *ptr)
{
struct line_list *list = ptr;
- xwrite(1, list->data, list->len);
+ size_t len = list->len - (list->data - (char*)list - sizeof(struct line_list));
+
+ xwrite(1, list->data, len);
free(list);
}
Actually the point of dump_chunk() was to output whole chunks. The
previous code should have either written up to the end of the previous
chunk or adjusted the start and length of the chunk for us.
(It's a llist_traverse() callback function to output all pending
buffered data.)
That was just a horrible attemp to shave off a line. See my other mail
on how to fix it.
Post by Rob Landley
Sigh, it is _annoying_ to have two codepaths...
They are quite different after all. There even two code paths in
do_tail() and my patch introduces another one...
Post by Rob Landley
Post by Felix Janda
@@ -71,11 +74,11 @@
static int try_lseek(int fd, long bytes, long lines)
{
struct line_list *list = 0, *temp;
- int flag = 0, chunk = sizeof(toybuf);
- ssize_t pos = lseek(fd, 0, SEEK_END);
+ int flag = 0;
+ size_t chunk = sizeof(toybuf), pos = lseek(fd, 0, SEEK_END);
pos was a ssize_t because lseek() can return failure, now you've made
it a size_t which can't be negative. (Do you have something against
signed numbers?)
It can't be negative. But it can be -1. But actually we should use
an off_t for pos. One might be curious about the end of a 8GB file
in a 32bit system.
Post by Rob Landley
Post by Felix Janda
// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;
And yet you compare it with a negative. Maybe the bit patterns work
out, but I'm uncomfortable with that. I can see a compiler optimizing
this out simply because it can never be true. (That's actually what
happens when pos is unsigned char, due to type promotion.)
You are right. It should be -1u. (This can only be optimized out if
sizeof(size_t) < sizeof(int) (is that possible?). Otherwise there are
two cases: If pos can be represented as an int, it will be converted
to an int and we have a signed comparison. Otherwise -1 is promoted
to size_t and an unsigned comparison happens. The implementation of
signed numbers is irrelevant.)
Post by Rob Landley
Post by Felix Janda
// Seek to the right spot, output data from there.
if (bytes) {
@@ -88,40 +91,36 @@
bytes = pos;
while (lines && pos) {
- int offset;
+ size_t offset;
This offset is into the current chunk. Chunks are page size, and offset
is initialized to list->len which is at most sizeof(toybuf). So int is
plenty big for this.
Yes.
Post by Rob Landley
Post by Felix Janda
// Read in next chunk from end of file
- if (chunk>pos) chunk = pos;
+ if (chunk > pos) chunk = pos;
pos -= chunk;
if (pos != lseek(fd, pos, SEEK_SET)) {
perror_msg("seek failed");
break;
}
if (!(temp = get_chunk(fd, chunk))) break;
- if (list) list->next = temp;
+ if (list) temp->next = list;
list = temp;
Actually you could unconditionally temp->next = list and that takes
care of null terminating the list if temp started null.
Yes. (currently your if clause is false.)
Post by Rob Landley
And, yes, I believe that fix is needed. :) (This instance is creating a
list in reverse order because we're reading the file in reverse order,
thus it doesn't need a doubly linked list.)
Right.
Post by Rob Landley
Post by Felix Janda
// Count newlines in this chunk.
offset = list->len;
while (offset--) {
// If the last line ends with a newline, that one doesn't count.
- if (!flag) {
- flag++;
-
- continue;
- }
+ if (!flag) flag++;
// Start outputting data right after newline
- if (list->data[offset] == '\n' && !++lines) {
+ else if (list->data[offset] == '\n' && !++lines) {
Eh, ok.
Post by Felix Janda
offset++;
list->data += offset;
- list->len -= offset;
offset was initialized to list->len which can't be zero or get_chunk()
would have discarded it. Therefore, offset should never be > list->len
so this can't reduce it to a negative number.
Yet you decided to not adjust the length here, and instead complicate
dump_chunk with knowledge of the structure layout. Why?
I gave a (very lame) explaination above.
Post by Rob Landley
Post by Felix Janda
- break;
+ goto done;
}
Which breaks us into a while(lines) loop, and we only got in here if
!++lines so lines has to be 0 _after_ the increment, so the outer loop
must also exit, so the goto is unnecessary.
Sorry, that's a remnant of a previous version. (I wasn't sure whether the
code was right in the case that the last line doesn't end with a newline.)
Post by Rob Landley
Post by Felix Janda
}
}
// Output stored data
llist_traverse(list, dump_chunk);
@@ -143,58 +142,54 @@
// Are we measuring from the end of the file?
I thought the non-lseek tail case was already working? (Are there bugs
in both?)
In some cases... On the contrary, I had produced a reproducible segfault
in the non-lseek logic and unexpectedly found a bug in the lseek logic.
Post by Rob Landley
onen--- | 64
++++++++++++++++++++++++++++++----------------------------------
1 file changed, 30 insertions(+), 34 deletions(-)
So a net savings of 4 lines, while adding a new intermediate structure.
Previously we had one data structure (a linked list of chunks), now
there's a second structure wrapping that other structure, an array
containing pointers to nodes of a linked list (that's still a linked
list).
Not sure that's actually simpler. Or faster. Or uses less memory.
Sigh. Large rewrite of how something works, including a lot of
unrelated changes, is not something I'm happy to see presented as a
bugfix. Especially when I'm trying to dig an answer to "so what was the
actual _bug_?" out of it, so I can confirm whether or not it was
actually fixed.
The previous code had the assumption that every chunk is terminated by
a newline (which is utterly wrong). This is the simplest implementation
I could come up. I'll try to explain further things below.

After spending quite some time with this reply I see the simple
(less efficient (in terms of number of instructions)) algorithm.
Post by Rob Landley
I am exerting a lot of pondering answering what should be a simple
question.
Post by Felix Janda
if (bytes<0 || lines<0) {
- struct line_list *list = 0, *new;
+ struct line_list *head = 0, *tail, *new;
+ // circular buffer of lines
+ struct {
+ char *start;
+ struct line_list *inchunk;
+ } *l = xzalloc(2*-lines*sizeof(void*));
+ int i = 0, flag = 0;
+ size_t count, len = bytes;
malloc(0) is allowed to return NULL, which means xmalloc() will abort,
so in some libc implementations -c just stopped working.
That's bad. Could one declare it as an array on the stack instead?
Post by Rob Landley
Post by Felix Janda
// The slow codepath is always needed, and can handle all input,
// so make lseek support optional.
- if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines));
+ if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines)) return;
// Read data until we run out, keep a trailing buffer
- else for (;;) {
- int len, count;
+ for (;;) {
Eh, cosmetic, but ok.
Post by Felix Janda
char *try;
if (!(new = get_chunk(fd, sizeof(toybuf)))) break;
// append in order
- dlist_add_nomalloc((void *)&list, (struct double_list *)new);
+ if (head) tail->next = new;
+ else head = new;
+ tail = new;
Open code the function, adding more local variables? I can sort of see
it, but I tend to lean on the dlist functions because (as you can see
from above), manual linked list code is easy to screw up and hard to
read. (It's 8 extra bytes per 4k chunk to use common code, and you were
comfortable adding an entire array of structures here...)
I just thought simple linked lists were easier to keep track of. I see
your point. (Again more about the structures below.)
Post by Rob Landley
Post by Felix Janda
- // Measure new chunk, discarding extra data from buffer
- len = new->len;
try = new->data;
- for (count=0; count<len; count++) {
- if ((toys.optflags & FLAG_c) && bytes) {
- bytes++;
- continue;
+ if (lines) for (count = 0; count < new->len; count++, try++) {
+ if (flag) { // last char was a newline
+ while (l[i].inchunk && (l[i].inchunk!=head))
free(llist_pop(&head));
+ l[i].inchunk = tail;
+ l[i].start = try;
+ i = (i + 1) % -lines;
+ flag = 0;
}
-
- if (lines) {
- if(try[count] != '\n' && count != len-1) continue;
- if (lines<0) {
- if (!++lines) ++lines;
- continue;
- }
+ if (*try == '\n') flag = 1;
+ } else { // bytes
Actually we don't have an exclusion in the option string for [!nc] and
I vaguely recall I was trying to make it work if you specified both at
the same time. (So it showed you that many characters from the end
_plus_ that many lines before that point.)
I didn't know that. Could you specify it in more detail (the order matters)?
Post by Rob Landley
That's why -c ate its argument (counting up), and then -n ate its
argument (counting up) in sequence. However, other implementations
instead make "tail -c 7 -n 3" act like just -n 3, and "tail -n 3 -c 7"
act like -c 7. And the way to do _that_ is a [-cn] exclusion on the
string, then check the flag.
(Hmmm... ideally I'd have lib/args.c zero the save slot when it unsets
the flag, then we don't have to check the flags... Ok, I think I can
make that work without too much extra code.)
Post by Felix Janda
+ if (len + new->len < len) flag = 1; // overflow -> have now read enough
+ for (len += new->len; flag && (len - head->len < len);) {
+ len -= head->len;
+ free(llist_pop(&head));
}
-
- // Time to discard data; given that bytes and lines were
- // nonzero coming in, we can't discard too much if we're
- // measuring right.
- do {
- char c = *(list->data++);
- if (!(--list->len)) {
- struct line_list *next = list->next;
- list->prev->next = next;
- list->next->prev = list->prev;
- free(list);
- list = next;
- }
- if (c == '\n') break;
- } while (lines);
}
}
+ if (lines) head->data = l[i].start;
+ else head->data += len;
// Output/free the buffer.
- llist_traverse(list, dump_chunk);
+ llist_traverse(head, dump_chunk);
+ free(l);
Sigh. The previous design was _just_ doing linked list traversal, and
it was pretty much the same sort of linked list traversal in both
codepaths.
This change has made the two codepaths have fundamentally different
designs, taking them further away from being unified. (Not that they
necessarily can be unified, but the more different the two
implementations are the harder the command as a whole is to understand.
This makes me uncomfortable.)
The lines variant just needs to keep track of much more data so I didn't
want to burden the bytes method with that. (And the count++ seemed quite
inefficient for the bytes.)
Post by Rob Landley
Post by Felix Janda
// Measuring from the beginning of the file.
} else for (;;) {
int len, offset = 0;
1) Count up requested bytes until full
2) Count up requested lines until full
3) Once we have enough data but not the _right_ data,
discard from head as we add to tail.
Print result.
Now it makes some sense to me. The "Time to discard..." section is
however wrong. If we have enough data, so that lines = 0, the loop
is only run once. It seems to me that in this case for each new byte
we add to the data at the end we drop one in the beginning. How should
this in the end when we reach the end of the file tell us the right
point to output the lines?
Post by Rob Landley
The implementation might not have been, but we shouldn't need more
structures to track this. The old approach even had reasonably good
cache locality. I can see objecting to incrementing bytes++ each time
for large -c values (didn't want to special case it and L1 cache local
spinning on a register to count up to even a few million should be way
less than the read() overhead, but it still wastes battery...)
The extra structure keeps track of the lines found so far. It remembers
the start of each in line and because of the existence of segmented
memory it also has to remember which chunk the line starts in. Using
the structure there is no need to reread the list to find out where the
lines were located.
Post by Rob Landley
Let's see, assuming the actual bugs were the uninitialized next pointer
in the last list entry and the list entries being added in the wrong
order (so only the last list entry every showed), so if I separate the
count and line codepaths into an if/else but keep the head/tail
traversing design without this sideband line array...
There were more. (Just try the previous lseek logic with something big
containing no newlines.)
Post by Rob Landley
toys/*/tail.c | tail -n 87) | diffstat
one | 66
+++++++++++++++++++++++++++++++-----------------------------------
1 file changed, 31 insertions(+), 35 deletions(-)
Heh, same number of lines. And I added comments...
Now this is cheating slightly because I added a dlist_pop() function
that adjusts the prev and next pointers to lib/llist.c. (The two
different code paths each discard chunks, so a function is better than
inline.) I could instead do the two pointer thing you're doing to
create a singly linked list inline, but I'd want to make a generic
function to do that and add it to lib/llist.c instead of open coding it
here, and last I tried it that required having a magic list head
structure which I'd rather not do...
Do you really understand the "Time to discard data;..." logic? The
comment was not really helpful for me.
Post by Rob Landley
(Sorry, I've been vaguely meaning to come back to tail and see if I
could clean this up and unify the codepaths. The backwards traversal vs
forwards traversal thing is what doubly linked lists are _for_. But
every time I sit down to bang on toybox I've got five more messages
from people submitting more code for me to look at, and I never get to
any of my own todo list because I'm perpetually behind...)
Grrr, all right, let's confront the design issue you've raised: am I
ever using the doubly linked list for anything _except_ in-order singly
linked list creation? Let's see, the current users are ls, xargs, tail,
patch, and sed.
Just to sketch what I was thinking: I noticed that no doubly-linked list
was strictly necessary in the code. Then I found that there is no
llist_add_nomalloc. Finally I concluded that it must be so trivial that
it should be done inline...
Post by Rob Landley
ls: is abusing a dirtree * to do in-order creation via
dlist_add_nomalloc() (with ->parent standing in for ->prev) and then
cleaning it up with dlist_to_dirtree() function that's only ever called
from one place... that could use some cleanup. Anyway, it's never using
prev, just writing to it or breaking the circular list with it.
patch: in order creation, using prev to terminate teh circular list to
get a singly linked list, and at one point removing an item while
maintaining the circular list (so adjusting the pointers, but that's
write not read).
xargs: in order creation, never using prev except to break the chain.
sed: unifinished. I'd love to get time to work on it but tonight I'm
doing this instead.
tail: case in point.
So it _looks_ like the dlist stuff can go, and be replaced by a
function that does in order creation of singly linked lists. (If I
really need something that can be traversed both ways, so far it's
either an array or a tree.)
llist_add_tail(void *list, void *end, void *new);
Is... kinda sad. It only writes to list if it was null (initializing an
empty list), should that be the caller's job? Can it _return_ list?
list = end = 0;
while (new = get()) list = llist_add_tail(&end, new);
No, then it doesn't know what list was after the first call, I've got
to pass it in so it can check if it's not NULL or have the caller do
struct listy *list = 0, *end = 0, *new;
while (new = get()) {
if (!list) list = new;
else end->next = new;
new->next = 0;
end = new;
}
And I wrap that in a function how?
struct listy *list = 0, *new;
while (new = get()) dlist_add_nomalloc(&list, new);
You see why I just did a doubly linked list? That way I had one list
pointer fed to the function and the function could use it to find the
end of the list, but I _didn't_ have different list and list head
types. The downside is each node has an extra pointer, for which I get
the ability to traverse the list backwards, even if I never use it.)
It's still a bit confusing to me. list always points to the head and
list->prev points to the tail? Could you maybe remark that somewhere?
So it's nearly the same as the three argument version. (But we get a
doubly linked list in the end.)
Post by Rob Landley
On the bright side, if I do keep dlist_pop() I can use it in patch to
trim a couple lines there...
Eh, I should worry about it in the morning. Bedtime, and still 5000
emails behind. (Mostly linux-kernel, but I need to read the
Documentation stuff in there. Plus the 3.11 kernel dropped on tuesday
and I need to do an Aboriginal release, which means a toybox release
_and_ I wanted to get LFS 7.4 built instead of the old 6.8 control
image that's years stale at this point...)
Sorry for keeping you up with this patch.

Felix
Rob Landley
2013-09-12 18:02:29 UTC
Permalink
Catching up on email while waiting at the airport. (Ohio Linuxfest!)
Post by Felix Janda
Post by Rob Landley
What The Hurd implements is not. Those religious fanatics are simply
not relevant, except sometimes as a warning of what _not_ to do.
I just wanted to mention this. It means that we have some more freedom
in the implementation.
I've since done a checkin that obsoletes most of the analysis here, but
there were a few other topics...
Post by Felix Janda
Post by Rob Landley
pos was a ssize_t because lseek() can return failure, now you've
made
Post by Rob Landley
it a size_t which can't be negative. (Do you have something against
signed numbers?)
It can't be negative. But it can be -1.
Uh-huh.
Post by Felix Janda
But actually we should use
an off_t for pos. One might be curious about the end of a 8GB file
in a 32bit system.
Is ssize_t not sufficient? (That's what the lseek version is using for
pos. The non-lseek version isn't particularly keeping track of pos
after grabbing enough characters and lines.)

Also, the argument parsing logic is using "long" for the numbers we
input. This means a 32 bit system can't accept a -c larger than 2 gigs.
(Known limitation, didn't want to complicate the code to cope with 32
bit systems when everything and its dog has 64 bit variants these days,
and the year 2038 problem is going to flush out the remaining 32-bit
systems in 25 years. Support them, yes. Make them handle large numbers
in all cases, not necessarily.)

(The real hiccup with argument parsing is that
sizeof(long)==sizeof(pointer) so I allocate pointer sized slots at the
start of GLOBALS() to save arguments into. I can jump through hoops to
get around that, but the simple approach is to wait for the switch to
64 bit arm.)
Post by Felix Janda
The previous code had the assumption that every chunk is terminated by
a newline (which is utterly wrong). This is the simplest
implementation
I could come up. I'll try to explain further things below.
After spending quite some time with this reply I see the simple
(less efficient (in terms of number of instructions)) algorithm.
I think I fixed that, but hopefully if there are still problems they'll
be mentioned in one of the other pending mails I'm reading towards...
Post by Felix Janda
Post by Rob Landley
Actually we don't have an exclusion in the option string for [!nc]
and
Post by Rob Landley
I vaguely recall I was trying to make it work if you specified both
at
Post by Rob Landley
the same time. (So it showed you that many characters from the end
_plus_ that many lines before that point.)
I didn't know that. Could you specify it in more detail (the order
matters)?
It was a bad idea, and the attempt at implementing it was broken in
several places. I made this implementation produce the same results
other ones do instead.
Post by Felix Janda
Post by Rob Landley
Let's see, assuming the actual bugs were the uninitialized next
pointer
Post by Rob Landley
in the last list entry and the list entries being added in the wrong
order (so only the last list entry every showed), so if I separate
the
Post by Rob Landley
count and line codepaths into an if/else but keep the head/tail
traversing design without this sideband line array...
There were more. (Just try the previous lseek logic with something big
containing no newlines.)
Hopefully the current version works. (I had the test suite do a larger
test file. Probably didn't hit everything.)
Post by Felix Janda
Post by Rob Landley
You see why I just did a doubly linked list? That way I had one list
pointer fed to the function and the function could use it to find
the
Post by Rob Landley
end of the list, but I _didn't_ have different list and list head
types. The downside is each node has an extra pointer, for which I
get
Post by Rob Landley
the ability to traverse the list backwards, even if I never use it.)
It's still a bit confusing to me. list always points to the head and
list->prev points to the tail?
Yes, it's a circular doubly linked list. Fairly common data structure,
the kernel uses 'em all over the place. (You have a head pointer, from
there you can traverse in either direction, and when you hit it again
you're done.)

This sort of thing is actually why the kernel has magic PID 1 and
rootfs entries that never go away: the head pointer points to them and
if that entry is never removed, you never have to move the head pointer
which makes some nasty races go away (and RCU can handle the rest).
Post by Felix Janda
Could you maybe remark that somewhere?
Fluff out http://landley.net/toybox/code.html#lib_llist some more, you
mean?

I need to do another documentation pass, I expect...
Post by Felix Janda
So it's nearly the same as the three argument version. (But we get a
doubly linked list in the end.)
Post by Rob Landley
On the bright side, if I do keep dlist_pop() I can use it in patch
to
Post by Rob Landley
trim a couple lines there...
Eh, I should worry about it in the morning. Bedtime, and still 5000
emails behind. (Mostly linux-kernel, but I need to read the
Documentation stuff in there. Plus the 3.11 kernel dropped on
tuesday
Post by Rob Landley
and I need to do an Aboriginal release, which means a toybox release
_and_ I wanted to get LFS 7.4 built instead of the old 6.8 control
image that's years stale at this point...)
Sorry for keeping you up with this patch.
The point is getting the best code at the end we know how to do. I'm
often wrong about what's best, either finding local peaks or being
outright mistaken. Thanks for the kick out of this particular rut...

Rob
Felix Janda
2013-09-17 19:44:05 UTC
Permalink
Post by Rob Landley
Catching up on email while waiting at the airport. (Ohio Linuxfest!)
Post by Felix Janda
Post by Rob Landley
What The Hurd implements is not. Those religious fanatics are simply
not relevant, except sometimes as a warning of what _not_ to do.
I just wanted to mention this. It means that we have some more freedom
in the implementation.
I've since done a checkin that obsoletes most of the analysis here, but
there were a few other topics...
Yeah, it seems so.
Post by Rob Landley
Post by Felix Janda
Post by Rob Landley
pos was a ssize_t because lseek() can return failure, now you've
made
Post by Rob Landley
it a size_t which can't be negative. (Do you have something against
signed numbers?)
It can't be negative. But it can be -1.
Uh-huh.
Just some magic value. (Ok, if sizeof(size_t) < sizeof(int), there is
a problem with this.)
Post by Rob Landley
Post by Felix Janda
But actually we should use
an off_t for pos. One might be curious about the end of a 8GB file
in a 32bit system.
Is ssize_t not sufficient? (That's what the lseek version is using for
pos. The non-lseek version isn't particularly keeping track of pos
after grabbing enough characters and lines.)
In my particular example obviously not. The return value of lseek will
be truncated and we will output stuff from somewhere in the file but
not at its end.
Post by Rob Landley
Also, the argument parsing logic is using "long" for the numbers we
input. This means a 32 bit system can't accept a -c larger than 2 gigs.
(Known limitation, didn't want to complicate the code to cope with 32
bit systems when everything and its dog has 64 bit variants these days,
and the year 2038 problem is going to flush out the remaining 32-bit
systems in 25 years. Support them, yes. Make them handle large numbers
in all cases, not necessarily.)
(The real hiccup with argument parsing is that
sizeof(long)==sizeof(pointer) so I allocate pointer sized slots at the
start of GLOBALS() to save arguments into. I can jump through hoops to
get around that, but the simple approach is to wait for the switch to
64 bit arm.)
Thanks for mentioning this known limitation. Someone commented on
fallocate on github. It should also use off_t but the argparsing logic can't
handle it anyway. There might be some reasons to create large files with
this toy on 32bit systems.

atolx() seems to have pretty undefined behavior with large numbers?
Post by Rob Landley
Post by Felix Janda
The previous code had the assumption that every chunk is terminated by
a newline (which is utterly wrong). This is the simplest
implementation
I could come up. I'll try to explain further things below.
After spending quite some time with this reply I see the simple
(less efficient (in terms of number of instructions)) algorithm.
I think I fixed that, but hopefully if there are still problems they'll
be mentioned in one of the other pending mails I'm reading towards...
It looks good to me.
Post by Rob Landley
Post by Felix Janda
Post by Rob Landley
Actually we don't have an exclusion in the option string for [!nc]
and
Post by Rob Landley
I vaguely recall I was trying to make it work if you specified both
at
Post by Rob Landley
the same time. (So it showed you that many characters from the end
_plus_ that many lines before that point.)
I didn't know that. Could you specify it in more detail (the order matters)?
It was a bad idea, and the attempt at implementing it was broken in
several places. I made this implementation produce the same results
other ones do instead.
Ok.
Post by Rob Landley
Post by Felix Janda
Post by Rob Landley
Let's see, assuming the actual bugs were the uninitialized next
pointer
Post by Rob Landley
in the last list entry and the list entries being added in the wrong
order (so only the last list entry every showed), so if I separate
the
Post by Rob Landley
count and line codepaths into an if/else but keep the head/tail
traversing design without this sideband line array...
There were more. (Just try the previous lseek logic with something big
containing no newlines.)
Hopefully the current version works. (I had the test suite do a larger
test file. Probably didn't hit everything.)
Did a few tests. Seems good.
Post by Rob Landley
Post by Felix Janda
Post by Rob Landley
You see why I just did a doubly linked list? That way I had one list
pointer fed to the function and the function could use it to find
the
Post by Rob Landley
end of the list, but I _didn't_ have different list and list head
types. The downside is each node has an extra pointer, for which I
get
Post by Rob Landley
the ability to traverse the list backwards, even if I never use it.)
It's still a bit confusing to me. list always points to the head and
list->prev points to the tail?
Yes, it's a circular doubly linked list. Fairly common data structure,
the kernel uses 'em all over the place. (You have a head pointer, from
there you can traverse in either direction, and when you hit it again
you're done.)
This sort of thing is actually why the kernel has magic PID 1 and
rootfs entries that never go away: the head pointer points to them and
if that entry is never removed, you never have to move the head pointer
which makes some nasty races go away (and RCU can handle the rest).
Post by Felix Janda
Could you maybe remark that somewhere?
Fluff out http://landley.net/toybox/code.html#lib_llist some more, you
mean?
I need to do another documentation pass, I expect...
How about calling them circular lists instead of double lists?
Post by Rob Landley
Post by Felix Janda
So it's nearly the same as the three argument version. (But we get a
doubly linked list in the end.)
Post by Rob Landley
On the bright side, if I do keep dlist_pop() I can use it in patch
to
Post by Rob Landley
trim a couple lines there...
Eh, I should worry about it in the morning. Bedtime, and still 5000
emails behind. (Mostly linux-kernel, but I need to read the
Documentation stuff in there. Plus the 3.11 kernel dropped on
tuesday
Post by Rob Landley
and I need to do an Aboriginal release, which means a toybox release
_and_ I wanted to get LFS 7.4 built instead of the old 6.8 control
image that's years stale at this point...)
Sorry for keeping you up with this patch.
The point is getting the best code at the end we know how to do. I'm
often wrong about what's best, either finding local peaks or being
outright mistaken. Thanks for the kick out of this particular rut...
It's nice that we now don't have a horribly broken tail anymore.

Felix
Felix Janda
2013-09-17 19:44:05 UTC
Permalink
Post by Rob Landley
Catching up on email while waiting at the airport. (Ohio Linuxfest!)
Post by Felix Janda
Post by Rob Landley
What The Hurd implements is not. Those religious fanatics are simply
not relevant, except sometimes as a warning of what _not_ to do.
I just wanted to mention this. It means that we have some more freedom
in the implementation.
I've since done a checkin that obsoletes most of the analysis here, but
there were a few other topics...
Yeah, it seems so.
Post by Rob Landley
Post by Felix Janda
Post by Rob Landley
pos was a ssize_t because lseek() can return failure, now you've
made
Post by Rob Landley
it a size_t which can't be negative. (Do you have something against
signed numbers?)
It can't be negative. But it can be -1.
Uh-huh.
Just some magic value. (Ok, if sizeof(size_t) < sizeof(int), there is
a problem with this.)
Post by Rob Landley
Post by Felix Janda
But actually we should use
an off_t for pos. One might be curious about the end of a 8GB file
in a 32bit system.
Is ssize_t not sufficient? (That's what the lseek version is using for
pos. The non-lseek version isn't particularly keeping track of pos
after grabbing enough characters and lines.)
In my particular example obviously not. The return value of lseek will
be truncated and we will output stuff from somewhere in the file but
not at its end.
Post by Rob Landley
Also, the argument parsing logic is using "long" for the numbers we
input. This means a 32 bit system can't accept a -c larger than 2 gigs.
(Known limitation, didn't want to complicate the code to cope with 32
bit systems when everything and its dog has 64 bit variants these days,
and the year 2038 problem is going to flush out the remaining 32-bit
systems in 25 years. Support them, yes. Make them handle large numbers
in all cases, not necessarily.)
(The real hiccup with argument parsing is that
sizeof(long)==sizeof(pointer) so I allocate pointer sized slots at the
start of GLOBALS() to save arguments into. I can jump through hoops to
get around that, but the simple approach is to wait for the switch to
64 bit arm.)
Thanks for mentioning this known limitation. Someone commented on
fallocate on github. It should also use off_t but the argparsing logic can't
handle it anyway. There might be some reasons to create large files with
this toy on 32bit systems.

atolx() seems to have pretty undefined behavior with large numbers?
Post by Rob Landley
Post by Felix Janda
The previous code had the assumption that every chunk is terminated by
a newline (which is utterly wrong). This is the simplest
implementation
I could come up. I'll try to explain further things below.
After spending quite some time with this reply I see the simple
(less efficient (in terms of number of instructions)) algorithm.
I think I fixed that, but hopefully if there are still problems they'll
be mentioned in one of the other pending mails I'm reading towards...
It looks good to me.
Post by Rob Landley
Post by Felix Janda
Post by Rob Landley
Actually we don't have an exclusion in the option string for [!nc]
and
Post by Rob Landley
I vaguely recall I was trying to make it work if you specified both
at
Post by Rob Landley
the same time. (So it showed you that many characters from the end
_plus_ that many lines before that point.)
I didn't know that. Could you specify it in more detail (the order matters)?
It was a bad idea, and the attempt at implementing it was broken in
several places. I made this implementation produce the same results
other ones do instead.
Ok.
Post by Rob Landley
Post by Felix Janda
Post by Rob Landley
Let's see, assuming the actual bugs were the uninitialized next
pointer
Post by Rob Landley
in the last list entry and the list entries being added in the wrong
order (so only the last list entry every showed), so if I separate
the
Post by Rob Landley
count and line codepaths into an if/else but keep the head/tail
traversing design without this sideband line array...
There were more. (Just try the previous lseek logic with something big
containing no newlines.)
Hopefully the current version works. (I had the test suite do a larger
test file. Probably didn't hit everything.)
Did a few tests. Seems good.
Post by Rob Landley
Post by Felix Janda
Post by Rob Landley
You see why I just did a doubly linked list? That way I had one list
pointer fed to the function and the function could use it to find
the
Post by Rob Landley
end of the list, but I _didn't_ have different list and list head
types. The downside is each node has an extra pointer, for which I
get
Post by Rob Landley
the ability to traverse the list backwards, even if I never use it.)
It's still a bit confusing to me. list always points to the head and
list->prev points to the tail?
Yes, it's a circular doubly linked list. Fairly common data structure,
the kernel uses 'em all over the place. (You have a head pointer, from
there you can traverse in either direction, and when you hit it again
you're done.)
This sort of thing is actually why the kernel has magic PID 1 and
rootfs entries that never go away: the head pointer points to them and
if that entry is never removed, you never have to move the head pointer
which makes some nasty races go away (and RCU can handle the rest).
Post by Felix Janda
Could you maybe remark that somewhere?
Fluff out http://landley.net/toybox/code.html#lib_llist some more, you
mean?
I need to do another documentation pass, I expect...
How about calling them circular lists instead of double lists?
Post by Rob Landley
Post by Felix Janda
So it's nearly the same as the three argument version. (But we get a
doubly linked list in the end.)
Post by Rob Landley
On the bright side, if I do keep dlist_pop() I can use it in patch
to
Post by Rob Landley
trim a couple lines there...
Eh, I should worry about it in the morning. Bedtime, and still 5000
emails behind. (Mostly linux-kernel, but I need to read the
Documentation stuff in there. Plus the 3.11 kernel dropped on
tuesday
Post by Rob Landley
and I need to do an Aboriginal release, which means a toybox release
_and_ I wanted to get LFS 7.4 built instead of the old 6.8 control
image that's years stale at this point...)
Sorry for keeping you up with this patch.
The point is getting the best code at the end we know how to do. I'm
often wrong about what's best, either finding local peaks or being
outright mistaken. Thanks for the kick out of this particular rut...
It's nice that we now don't have a horribly broken tail anymore.

Felix

Rob Landley
2013-09-04 23:46:44 UTC
Permalink
Replying with gmail, sorry if this winds up a bit garbled...
Post by Felix Janda
This should now hopefully fix the earlier segfaults.
Yay!

(Applied on the theory that what's there segfaults.)
Post by Felix Janda
In the case that lseek doesn't work and we count from the end
the algorithms for tail -n and tail -c are now more separate.
(tail -c is now more efficient since it adds lenghts instead of
counting bytes.)
POSIX tail BTW does only accept one input file. tail is not specified
by LSB. I guess it is convenient when combined with "-f" to watch
multiple files.
For tail -f, how about putting the fds into an array and then using
select() to watch the fds?
I've been thinking about that. It's a toss up between not wanting to
complicate the code and that being the obvious way to do that. (It
boils down to "is this worth doing".)

Right now netcat has either poll or select logic in it (I forget
which), and there are a couple other things like tee and less that
probably should have it. I have a vague note that if I can work out a
good library function abstracting this away into something simple,
using it in more places costs less. But haven't gone back to try to
work out a design. (Netcat's next big todo item is udp support, then
ipv6.)
Post by Felix Janda
GNU and busybox print the header again when the file last read from
changes. They behave differently when a file occurs on the command
line more than once.
Sheesh, I'd missed that last one. (Behave differently how?)
Post by Felix Janda
# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1377945041 -7200
# Node ID 3cd61d16aabec82505da96a142663cedbba8d12a
# Parent 9a059e9b3a17a372fc8b225f512af57c72f4eeaa
tail: Some fixes
- Rewrite most of the not lseek() logic
- Change meaning of len in line_list
- Use single instead of double linked list
diff -r 9a059e9b3a17 -r 3cd61d16aabe toys/posix/tail.c
--- a/toys/posix/tail.c Fri Aug 30 20:09:10 2013 +0200
+++ b/toys/posix/tail.c Sat Aug 31 12:30:41 2013 +0200
@@ -38,19 +38,20 @@
)
struct line_list {
- struct line_list *next, *prev;
+ struct line_list *next;
char *data;
- int len;
+ size_t len;
};
-static struct line_list *get_chunk(int fd, int len)
+static struct line_list *get_chunk(int fd, size_t len)
{
struct line_list *line = xmalloc(sizeof(struct line_list)+len);
- line->data = ((char *)line) + sizeof(struct line_list);
+ line->data = (char*)line + sizeof(struct line_list);
Adding a space between char and *. Pet peeve of mine: people doing
"char* a, b;" and then avoiding declaring more than one pointer at a
time after getting bit by that because they blame the wrong thing for
their mistake.
Post by Felix Janda
line->len = readall(fd, line->data, len);
Which is returning signed, and -1 for error...
Post by Felix Janda
+ line->next = 0;
- if (line->len < 1) {
+ if (line->len + 1 < 2) {
And this trick is assuming -1 will wrap to 0 in unsigned, and is
uncomfortably subtle. Which is why len was an int in the first place.

Possibly len should be ssize_t?
Post by Felix Janda
free(line);
return 0;
}
@@ -61,7 +62,9 @@
static void dump_chunk(void *ptr)
{
struct line_list *list = ptr;
- xwrite(1, list->data, list->len);
+ size_t len = list->len - (list->data - (char*)list - sizeof(struct line_list));
*blink* *blink*

Ok, data minus the start of the structure minus the size of the
structure... you're moving data forward _without_ adjusting len? And
assuming things about the layout of the structure...

Would it make more sense to add an "int used;" to the structure?
Post by Felix Janda
+
+ xwrite(1, list->data, len);
free(list);
}
@@ -71,11 +74,11 @@
static int try_lseek(int fd, long bytes, long lines)
{
struct line_list *list = 0, *temp;
- int flag = 0, chunk = sizeof(toybuf);
- ssize_t pos = lseek(fd, 0, SEEK_END);
+ int flag = 0;
+ size_t chunk = sizeof(toybuf), pos = lseek(fd, 0, SEEK_END);
Are single lines bigger than 2 gigabytes really a huge problem?
Post by Felix Janda
// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;
Negative one is less than zero...
Post by Felix Janda
// Seek to the right spot, output data from there.
if (bytes) {
@@ -88,40 +91,36 @@
bytes = pos;
while (lines && pos) {
- int offset;
+ size_t offset;
// Read in next chunk from end of file
- if (chunk>pos) chunk = pos;
+ if (chunk > pos) chunk = pos;
pos -= chunk;
if (pos != lseek(fd, pos, SEEK_SET)) {
perror_msg("seek failed");
break;
}
if (!(temp = get_chunk(fd, chunk))) break;
- if (list) list->next = temp;
+ if (list) temp->next = list;
Ok, _that_ is a largeish behavior change. Was that the bug?

I tend to use double lists as an easy way to assemble them in-order,
and also because I have existing dlist functions to manipulate them.
(I didn't bother to write the single linked functions in the library
because they're about as easy to do inline... and then the lazy way
was just to use the double ones. Six of one...)

I need to read through the rest of this more closely, but this message
has already sat half finished on my desktop for a couple days now...

Rob
Rob Landley
2013-09-06 09:34:43 UTC
Permalink
Post by Felix Janda
This should now hopefully fix the earlier segfaults.
In the case that lseek doesn't work and we count from the end
the algorithms for tail -n and tail -c are now more separate.
(tail -c is now more efficient since it adds lenghts instead of
counting bytes.)
And made it back here with my email client rather than snooping in
gmail's web interface. Time for a closer look...
Post by Felix Janda
POSIX tail BTW does only accept one input file. tail is not specified
by LSB. I guess it is convenient when combined with "-f" to watch
multiple files.
I treat posix as a frame of reference to diverge from. The gnu/dammit
guys take credit for every third party contribution from 20 years of
Linux development (and some disgruntled solaris guys back in the 80's
when Ed Zander decided to unbundle the compiler from the OS and charge
extra for it, and they all installed gcc instead).

I care about "what's on my existing linux box", both the actual current
Ubuntu LTS, and the old 2008 version I have in a qemu image, and the
various gentoo/fedora/suse/decade old red hat 9" test images I have
lying around. Some of it's crazy and it's better not to do that, but
it's a point of reference. (So is Android, and busybox, and the linux
standard base...)

What The Hurd implements is not. Those religious fanatics are simply
not relevant, except sometimes as a warning of what _not_ to do.
Post by Felix Janda
For tail -f, how about putting the fds into an array and then using
select() to watch the fds?
GNU and busybox print the header again when the file last read from
changes. They behave differently when a file occurs on the command
line more than once.
# HG changeset patch
# User Felix Janda <felix.janda at posteo.de>
# Date 1377945041 -7200
# Node ID 3cd61d16aabec82505da96a142663cedbba8d12a
# Parent 9a059e9b3a17a372fc8b225f512af57c72f4eeaa
tail: Some fixes
- Rewrite most of the not lseek() logic
- Change meaning of len in line_list
- Use single instead of double linked list
diff -r 9a059e9b3a17 -r 3cd61d16aabe toys/posix/tail.c
--- a/toys/posix/tail.c Fri Aug 30 20:09:10 2013 +0200
+++ b/toys/posix/tail.c Sat Aug 31 12:30:41 2013 +0200
@@ -38,19 +38,20 @@
)
struct line_list {
- struct line_list *next, *prev;
+ struct line_list *next;
Double lists are easy to create in order, I usually do that when
sequencing is important.
Post by Felix Janda
char *data;
- int len;
+ size_t len;
};
You changed this from a signed to an unsigned value why? The largest
value fed into get_chunk is sizeof(toybuf), which easily fits in an
int. (If we're allocating a larger than 2 gigabyte block of data,
something is wrong.)
Post by Felix Janda
-static struct line_list *get_chunk(int fd, int len)
+static struct line_list *get_chunk(int fd, size_t len)
{
struct line_list *line = xmalloc(sizeof(struct line_list)+len);
- line->data = ((char *)line) + sizeof(struct line_list);
+ line->data = (char*)line + sizeof(struct line_list);
line->len = readall(fd, line->data, len);
+ line->next = 0;
Hmmm... the caller was supposed to set this. I see the caller doesn't
do so reliably for the end of the list and that is a bug. Quite
possibly the rest of this patch is noise?
Post by Felix Janda
- if (line->len < 1) {
+ if (line->len + 1 < 2) {
It was signed for a reason; readall returns -1 on error, and depending
on wraparound like that is nonobvious and awkward.
Post by Felix Janda
free(line);
return 0;
}
@@ -61,7 +62,9 @@
static void dump_chunk(void *ptr)
{
struct line_list *list = ptr;
- xwrite(1, list->data, list->len);
+ size_t len = list->len - (list->data - (char*)list - sizeof(struct
line_list));
+
+ xwrite(1, list->data, len);
free(list);
}
Actually the point of dump_chunk() was to output whole chunks. The
previous code should have either written up to the end of the previous
chunk or adjusted the start and length of the chunk for us.

(It's a llist_traverse() callback function to output all pending
buffered data.)

Sigh, it is _annoying_ to have two codepaths...
Post by Felix Janda
@@ -71,11 +74,11 @@
static int try_lseek(int fd, long bytes, long lines)
{
struct line_list *list = 0, *temp;
- int flag = 0, chunk = sizeof(toybuf);
- ssize_t pos = lseek(fd, 0, SEEK_END);
+ int flag = 0;
+ size_t chunk = sizeof(toybuf), pos = lseek(fd, 0, SEEK_END);
pos was a ssize_t because lseek() can return failure, now you've made
it a size_t which can't be negative. (Do you have something against
signed numbers?)
Post by Felix Janda
// If lseek() doesn't work on this stream, return now.
- if (pos<0) return 0;
+ if (pos == -1) return 0;
And yet you compare it with a negative. Maybe the bit patterns work
out, but I'm uncomfortable with that. I can see a compiler optimizing
this out simply because it can never be true. (That's actually what
happens when pos is unsigned char, due to type promotion.)
Post by Felix Janda
// Seek to the right spot, output data from there.
if (bytes) {
@@ -88,40 +91,36 @@
bytes = pos;
while (lines && pos) {
- int offset;
+ size_t offset;
This offset is into the current chunk. Chunks are page size, and offset
is initialized to list->len which is at most sizeof(toybuf). So int is
plenty big for this.
Post by Felix Janda
// Read in next chunk from end of file
- if (chunk>pos) chunk = pos;
+ if (chunk > pos) chunk = pos;
pos -= chunk;
if (pos != lseek(fd, pos, SEEK_SET)) {
perror_msg("seek failed");
break;
}
if (!(temp = get_chunk(fd, chunk))) break;
- if (list) list->next = temp;
+ if (list) temp->next = list;
list = temp;
Actually you could unconditionally temp->next = list and that takes
care of null terminating the list if temp started null.

And, yes, I believe that fix is needed. :) (This instance is creating a
list in reverse order because we're reading the file in reverse order,
thus it doesn't need a doubly linked list.)
Post by Felix Janda
// Count newlines in this chunk.
offset = list->len;
while (offset--) {
// If the last line ends with a newline, that one doesn't
count.
- if (!flag) {
- flag++;
-
- continue;
- }
+ if (!flag) flag++;
// Start outputting data right after newline
- if (list->data[offset] == '\n' && !++lines) {
+ else if (list->data[offset] == '\n' && !++lines) {
Eh, ok.
Post by Felix Janda
offset++;
list->data += offset;
- list->len -= offset;
offset was initialized to list->len which can't be zero or get_chunk()
would have discarded it. Therefore, offset should never be > list->len
so this can't reduce it to a negative number.

Yet you decided to not adjust the length here, and instead complicate
dump_chunk with knowledge of the structure layout. Why?
Post by Felix Janda
- break;
+ goto done;
}
Which breaks us into a while(lines) loop, and we only got in here if
!++lines so lines has to be 0 _after_ the increment, so the outer loop
must also exit, so the goto is unnecessary.
Post by Felix Janda
}
}
// Output stored data
llist_traverse(list, dump_chunk);
@@ -143,58 +142,54 @@
// Are we measuring from the end of the file?
I thought the non-lseek tail case was already working? (Are there bugs
in both?)

The diffstat on this last hunk is:

onen--- | 64
++++++++++++++++++++++++++++++----------------------------------
1 file changed, 30 insertions(+), 34 deletions(-)

So a net savings of 4 lines, while adding a new intermediate structure.
Previously we had one data structure (a linked list of chunks), now
there's a second structure wrapping that other structure, an array
containing pointers to nodes of a linked list (that's still a linked
list).

Not sure that's actually simpler. Or faster. Or uses less memory.

Sigh. Large rewrite of how something works, including a lot of
unrelated changes, is not something I'm happy to see presented as a
bugfix. Especially when I'm trying to dig an answer to "so what was the
actual _bug_?" out of it, so I can confirm whether or not it was
actually fixed.

I am exerting a lot of pondering answering what should be a simple
question.
Post by Felix Janda
if (bytes<0 || lines<0) {
- struct line_list *list = 0, *new;
+ struct line_list *head = 0, *tail, *new;
+ // circular buffer of lines
+ struct {
+ char *start;
+ struct line_list *inchunk;
+ } *l = xzalloc(2*-lines*sizeof(void*));
+ int i = 0, flag = 0;
+ size_t count, len = bytes;
malloc(0) is allowed to return NULL, which means xmalloc() will abort,
so in some libc implementations -c just stopped working.
Post by Felix Janda
// The slow codepath is always needed, and can handle all input,
// so make lseek support optional.
- if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines));
+ if (CFG_TAIL_SEEK && try_lseek(fd, bytes, lines)) return;
// Read data until we run out, keep a trailing buffer
- else for (;;) {
- int len, count;
+ for (;;) {
Eh, cosmetic, but ok.
Post by Felix Janda
char *try;
if (!(new = get_chunk(fd, sizeof(toybuf)))) break;
// append in order
- dlist_add_nomalloc((void *)&list, (struct double_list *)new);
+ if (head) tail->next = new;
+ else head = new;
+ tail = new;
Open code the function, adding more local variables? I can sort of see
it, but I tend to lean on the dlist functions because (as you can see
from above), manual linked list code is easy to screw up and hard to
read. (It's 8 extra bytes per 4k chunk to use common code, and you were
comfortable adding an entire array of structures here...)
Post by Felix Janda
- // Measure new chunk, discarding extra data from buffer
- len = new->len;
try = new->data;
- for (count=0; count<len; count++) {
- if ((toys.optflags & FLAG_c) && bytes) {
- bytes++;
- continue;
+ if (lines) for (count = 0; count < new->len; count++, try++) {
+ if (flag) { // last char was a newline
+ while (l[i].inchunk && (l[i].inchunk!=head))
free(llist_pop(&head));
+ l[i].inchunk = tail;
+ l[i].start = try;
+ i = (i + 1) % -lines;
+ flag = 0;
}
-
- if (lines) {
- if(try[count] != '\n' && count != len-1) continue;
- if (lines<0) {
- if (!++lines) ++lines;
- continue;
- }
+ if (*try == '\n') flag = 1;
+ } else { // bytes
Actually we don't have an exclusion in the option string for [!nc] and
I vaguely recall I was trying to make it work if you specified both at
the same time. (So it showed you that many characters from the end
_plus_ that many lines before that point.)

That's why -c ate its argument (counting up), and then -n ate its
argument (counting up) in sequence. However, other implementations
instead make "tail -c 7 -n 3" act like just -n 3, and "tail -n 3 -c 7"
act like -c 7. And the way to do _that_ is a [-cn] exclusion on the
string, then check the flag.

(Hmmm... ideally I'd have lib/args.c zero the save slot when it unsets
the flag, then we don't have to check the flags... Ok, I think I can
make that work without too much extra code.)
Post by Felix Janda
+ if (len + new->len < len) flag = 1; // overflow -> have now
read enough
+ for (len += new->len; flag && (len - head->len < len);) {
+ len -= head->len;
+ free(llist_pop(&head));
}
-
- // Time to discard data; given that bytes and lines were
- // nonzero coming in, we can't discard too much if we're
- // measuring right.
- do {
- char c = *(list->data++);
- if (!(--list->len)) {
- struct line_list *next = list->next;
- list->prev->next = next;
- list->next->prev = list->prev;
- free(list);
- list = next;
- }
- if (c == '\n') break;
- } while (lines);
}
}
+ if (lines) head->data = l[i].start;
+ else head->data += len;
// Output/free the buffer.
- llist_traverse(list, dump_chunk);
+ llist_traverse(head, dump_chunk);
+ free(l);
Sigh. The previous design was _just_ doing linked list traversal, and
it was pretty much the same sort of linked list traversal in both
codepaths.

This change has made the two codepaths have fundamentally different
designs, taking them further away from being unified. (Not that they
necessarily can be unified, but the more different the two
implementations are the harder the command as a whole is to understand.
This makes me uncomfortable.)
Post by Felix Janda
// Measuring from the beginning of the file.
} else for (;;) {
int len, offset = 0;
I thought the existing design was reasonably straightforward:

Loop reading chunks:
1) Count up requested bytes until full
2) Count up requested lines until full
3) Once we have enough data but not the _right_ data,
discard from head as we add to tail.
Print result.

The implementation might not have been, but we shouldn't need more
structures to track this. The old approach even had reasonably good
cache locality. I can see objecting to incrementing bytes++ each time
for large -c values (didn't want to special case it and L1 cache local
spinning on a register to count up to even a few million should be way
less than the read() overhead, but it still wastes battery...)

Let's see, assuming the actual bugs were the uninitialized next pointer
in the last list entry and the list entries being added in the wrong
order (so only the last list entry every showed), so if I separate the
count and line codepaths into an if/else but keep the head/tail
traversing design without this sideband line array...

toys/*/tail.c | tail -n 87) | diffstat
one | 66
+++++++++++++++++++++++++++++++-----------------------------------
1 file changed, 31 insertions(+), 35 deletions(-)

Heh, same number of lines. And I added comments...

Now this is cheating slightly because I added a dlist_pop() function
that adjusts the prev and next pointers to lib/llist.c. (The two
different code paths each discard chunks, so a function is better than
inline.) I could instead do the two pointer thing you're doing to
create a singly linked list inline, but I'd want to make a generic
function to do that and add it to lib/llist.c instead of open coding it
here, and last I tried it that required having a magic list head
structure which I'd rather not do...

(Sorry, I've been vaguely meaning to come back to tail and see if I
could clean this up and unify the codepaths. The backwards traversal vs
forwards traversal thing is what doubly linked lists are _for_. But
every time I sit down to bang on toybox I've got five more messages
from people submitting more code for me to look at, and I never get to
any of my own todo list because I'm perpetually behind...)

Grrr, all right, let's confront the design issue you've raised: am I
ever using the doubly linked list for anything _except_ in-order singly
linked list creation? Let's see, the current users are ls, xargs, tail,
patch, and sed.

ls: is abusing a dirtree * to do in-order creation via
dlist_add_nomalloc() (with ->parent standing in for ->prev) and then
cleaning it up with dlist_to_dirtree() function that's only ever called
from one place... that could use some cleanup. Anyway, it's never using
prev, just writing to it or breaking the circular list with it.

patch: in order creation, using prev to terminate teh circular list to
get a singly linked list, and at one point removing an item while
maintaining the circular list (so adjusting the pointers, but that's
write not read).

xargs: in order creation, never using prev except to break the chain.

sed: unifinished. I'd love to get time to work on it but tonight I'm
doing this instead.

tail: case in point.

So it _looks_ like the dlist stuff can go, and be replaced by a
function that does in order creation of singly linked lists. (If I
really need something that can be traversed both ways, so far it's
either an array or a tree.)

Right. What does said function look like? Because:

llist_add_tail(void *list, void *end, void *new);

Is... kinda sad. It only writes to list if it was null (initializing an
empty list), should that be the caller's job? Can it _return_ list?

list = end = 0;

while (new = get()) list = llist_add_tail(&end, new);

No, then it doesn't know what list was after the first call, I've got
to pass it in so it can check if it's not NULL or have the caller do
that. The inline version is something like:

struct listy *list = 0, *end = 0, *new;

while (new = get()) {
if (!list) list = new;
else end->next = new;
new->next = 0;
end = new;
}

And I wrap that in a function how?

Sigh:

struct listy *list = 0, *new;

while (new = get()) dlist_add_nomalloc(&list, new);

You see why I just did a doubly linked list? That way I had one list
pointer fed to the function and the function could use it to find the
end of the list, but I _didn't_ have different list and list head
types. The downside is each node has an extra pointer, for which I get
the ability to traverse the list backwards, even if I never use it.)

On the bright side, if I do keep dlist_pop() I can use it in patch to
trim a couple lines there...

Eh, I should worry about it in the morning. Bedtime, and still 5000
emails behind. (Mostly linux-kernel, but I need to read the
Documentation stuff in there. Plus the 3.11 kernel dropped on tuesday
and I need to do an Aboriginal release, which means a toybox release
_and_ I wanted to get LFS 7.4 built instead of the old 6.8 control
image that's years stale at this point...)

Rob
Rob Landley
2013-09-12 18:02:29 UTC
Permalink
Catching up on email while waiting at the airport. (Ohio Linuxfest!)
Post by Felix Janda
Post by Rob Landley
What The Hurd implements is not. Those religious fanatics are simply
not relevant, except sometimes as a warning of what _not_ to do.
I just wanted to mention this. It means that we have some more freedom
in the implementation.
I've since done a checkin that obsoletes most of the analysis here, but
there were a few other topics...
Post by Felix Janda
Post by Rob Landley
pos was a ssize_t because lseek() can return failure, now you've
made
Post by Rob Landley
it a size_t which can't be negative. (Do you have something against
signed numbers?)
It can't be negative. But it can be -1.
Uh-huh.
Post by Felix Janda
But actually we should use
an off_t for pos. One might be curious about the end of a 8GB file
in a 32bit system.
Is ssize_t not sufficient? (That's what the lseek version is using for
pos. The non-lseek version isn't particularly keeping track of pos
after grabbing enough characters and lines.)

Also, the argument parsing logic is using "long" for the numbers we
input. This means a 32 bit system can't accept a -c larger than 2 gigs.
(Known limitation, didn't want to complicate the code to cope with 32
bit systems when everything and its dog has 64 bit variants these days,
and the year 2038 problem is going to flush out the remaining 32-bit
systems in 25 years. Support them, yes. Make them handle large numbers
in all cases, not necessarily.)

(The real hiccup with argument parsing is that
sizeof(long)==sizeof(pointer) so I allocate pointer sized slots at the
start of GLOBALS() to save arguments into. I can jump through hoops to
get around that, but the simple approach is to wait for the switch to
64 bit arm.)
Post by Felix Janda
The previous code had the assumption that every chunk is terminated by
a newline (which is utterly wrong). This is the simplest
implementation
I could come up. I'll try to explain further things below.
After spending quite some time with this reply I see the simple
(less efficient (in terms of number of instructions)) algorithm.
I think I fixed that, but hopefully if there are still problems they'll
be mentioned in one of the other pending mails I'm reading towards...
Post by Felix Janda
Post by Rob Landley
Actually we don't have an exclusion in the option string for [!nc]
and
Post by Rob Landley
I vaguely recall I was trying to make it work if you specified both
at
Post by Rob Landley
the same time. (So it showed you that many characters from the end
_plus_ that many lines before that point.)
I didn't know that. Could you specify it in more detail (the order
matters)?
It was a bad idea, and the attempt at implementing it was broken in
several places. I made this implementation produce the same results
other ones do instead.
Post by Felix Janda
Post by Rob Landley
Let's see, assuming the actual bugs were the uninitialized next
pointer
Post by Rob Landley
in the last list entry and the list entries being added in the wrong
order (so only the last list entry every showed), so if I separate
the
Post by Rob Landley
count and line codepaths into an if/else but keep the head/tail
traversing design without this sideband line array...
There were more. (Just try the previous lseek logic with something big
containing no newlines.)
Hopefully the current version works. (I had the test suite do a larger
test file. Probably didn't hit everything.)
Post by Felix Janda
Post by Rob Landley
You see why I just did a doubly linked list? That way I had one list
pointer fed to the function and the function could use it to find
the
Post by Rob Landley
end of the list, but I _didn't_ have different list and list head
types. The downside is each node has an extra pointer, for which I
get
Post by Rob Landley
the ability to traverse the list backwards, even if I never use it.)
It's still a bit confusing to me. list always points to the head and
list->prev points to the tail?
Yes, it's a circular doubly linked list. Fairly common data structure,
the kernel uses 'em all over the place. (You have a head pointer, from
there you can traverse in either direction, and when you hit it again
you're done.)

This sort of thing is actually why the kernel has magic PID 1 and
rootfs entries that never go away: the head pointer points to them and
if that entry is never removed, you never have to move the head pointer
which makes some nasty races go away (and RCU can handle the rest).
Post by Felix Janda
Could you maybe remark that somewhere?
Fluff out http://landley.net/toybox/code.html#lib_llist some more, you
mean?

I need to do another documentation pass, I expect...
Post by Felix Janda
So it's nearly the same as the three argument version. (But we get a
doubly linked list in the end.)
Post by Rob Landley
On the bright side, if I do keep dlist_pop() I can use it in patch
to
Post by Rob Landley
trim a couple lines there...
Eh, I should worry about it in the morning. Bedtime, and still 5000
emails behind. (Mostly linux-kernel, but I need to read the
Documentation stuff in there. Plus the 3.11 kernel dropped on
tuesday
Post by Rob Landley
and I need to do an Aboriginal release, which means a toybox release
_and_ I wanted to get LFS 7.4 built instead of the old 6.8 control
image that's years stale at this point...)
Sorry for keeping you up with this patch.
The point is getting the best code at the end we know how to do. I'm
often wrong about what's best, either finding local peaks or being
outright mistaken. Thanks for the kick out of this particular rut...

Rob
Continue reading on narkive:
Loading...