aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Oberhollenzer <goliath@infraroot.at>2021-02-13 14:40:29 +0100
committerDavid Oberhollenzer <goliath@infraroot.at>2021-04-17 23:06:25 +0200
commitf6424e26d45de9c599f3b4ce860ca4ad812a1bee (patch)
tree8f093fc29b552cc6471dc510221b89888d4ea603
parenta1cf19f350ef67ade57e320752af47b44c11a3fa (diff)
Add basic userland writeupHEADmaster
Signed-off-by: David Oberhollenzer <goliath@infraroot.at>
-rw-r--r--03_basic_usr.md891
-rw-r--r--README.md8
-rwxr-xr-xdownload_usr1.sh103
-rw-r--r--list.txt41
-rw-r--r--util.sh24
5 files changed, 1063 insertions, 4 deletions
diff --git a/03_basic_usr.md b/03_basic_usr.md
new file mode 100644
index 0000000..6ae2b24
--- /dev/null
+++ b/03_basic_usr.md
@@ -0,0 +1,891 @@
+# Building a More Sophisticated Userspace
+
+## Helper Functions
+
+Because we are going to build a bunch of autotools based packages, I am going to
+use two simple helper functions to make things a lot easier.
+
+For simplicity, I added a script called `util.sh` that already contains those
+functions. You can simply source it in your shell using `. <path>/util.sh`.
+
+The first helper function runs the `configure` script for us with some
+default options:
+
+ run_configure() {
+ "$srcdir/configure" --host="$TARGET" --prefix="" --sbindir=/bin \
+ --includedir=/usr/include --datarootdir=/usr/share\
+ --libexecdir=/lib/libexec --disable-static \
+ --enable-shared $@
+ }
+
+The host-touple is set to our target machine touple, the prefix is left empty,
+which means install everything into `/` of our target filesystem.
+
+In case the package wants to install programs into `/sbin`, we explicitly tell
+it that sbin-programs go into `/bin`.
+
+However, despite having `/` as our prefix, we want headers to go
+into `/usr/include` and any possible data files into `/usr/share`.
+
+If a package wants to install helper programs that are regular executables, but
+not intended to be used by a user on the command line, those are usually
+installed into a `libexec` sub directory. We explicitly tell the configure
+script to install those in the historic `/lib/libexec` location instead of
+clobbering the filesystem root with an extra directory.
+
+The last two switches **--disable-static** and **--enable-shared** tell any
+libtool based packages to prefer building shared libraries over static ones.
+
+If a package doesn't use libtool and maybe doesn't even install libraries, it
+will simply issue a warning that it doesn't know those switches, but will
+otherwise ignore them and compile just fine.
+
+The `$@` at the end basically paste any arguments passed to this function, so we
+can still pass along package specific configure switches.
+
+The second function encapsulates the entire dance for building a package that
+we already did several times:
+
+ auto_build() {
+ local pkgname="$1"
+ shift
+
+ mkdir -p "$BUILDROOT/build/$pkgname"
+ cd "$BUILDROOT/build/$pkgname"
+ srcdir="$BUILDROOT/src/$pkgname"
+
+ run_configure $@
+ make -j `nproc`
+ make DESTDIR="$SYSROOT" install
+ cd "$BUILDROOT"
+ }
+
+The package name is specified as first argument, the remaining arguments are
+passed to `run_configure`, again using `$@`. The `shift` command removes the
+first argument, so we can do this without passing the package name along.
+
+Another noticable difference is the usage of `nproc` to determine the number of
+available CPU cores and pass it to `make -j` to speed up the build.
+
+## About pkg-config and libtool
+
+We will build a bunch of packages that either provide libraries, or depend on
+libraries provided by other packages. This also means that the programs that
+require libraries need a way to locate them, i.e. find out what compiler flags
+to add in order to find the headers and what linker flags to add in order to
+actually link against a library. Especially since a library may itself have
+dependencies that the program needs to link against.
+
+The [pkg-config program](https://en.wikipedia.org/wiki/Pkg-config) tries to
+provide a unified solution for this problem. Packages that provide a library
+can install a configuration file at a special location (in our
+case `$SYSROOT/lib/pkgconfig`) and packages that need a library can
+use `pkg-config` to query if the library is present, and what compiler/linker
+flags are required to use it.
+
+With most autotools based packages, this luckily isn't that much of an issue.
+Most of them use standard macros for querying `pkg-config` that automagically
+generate `configure` flags and variables that can be used to override the
+results from `pkg-config`.
+
+We are basically going to use the `pkg-config` version installed on our build
+system and just need to set a few environment variables to instruct it to look
+at the right place instead of the system defaults. The `util.sh` script also
+sets those:
+
+ export PKG_CONFIG_SYSROOT_DIR="$SYSROOT"
+ export PKG_CONFIG_LIBDIR="$SYSROOT/lib/pkgconfig"
+ export PKG_CONFIG_PATH="$SYSROOT/lib/pkgconfig"
+
+The later two are actually *both* paths where `pkg-config` looks for its input
+files, the LIBDIR has nothing to do with shared libraries. Setting
+the `PKG_CONFIG_SYSROOT_DIR` variable instructs `pkg-config` to re-target any
+paths it generates to point to the correct root filesystem directory.
+
+The GNU `libtool` is a wrapper script for building shared libraries in an
+autotools project and takes care of providing reasonable fallbacks on ancient
+Unix systems where this works either qurky or not at all.
+
+At the same time, it also tries to serve a similar purpose as `pkg-config` and
+install libtool archives (with an `.la` file extension) to the `/lib`
+directory. However, the way it does this is horribly broken and when using
+`make install` with a `DESTDIR` argument, it will *always* store full path in
+the `.la` file, which breaks the linking step of any other `libtool` based
+package that happens to pick it up.
+
+We will go with the easy solution and simply delete those files when they are
+installed. Since we have `pkg-config` and a flat `/lib` directory, most
+packages will find their libraries easily (except for `ncurses`, where,
+historical reason, some programs stubbornly try their own way for).
+
+
+## Building GNU Bash
+
+In order to build Bash, we first build two support libraries: `ncurses`
+and `readline`.
+
+### Ncurses
+
+The `ncurses` library is itself an extension/reimplementation of the `pcurses`
+and `curses` libraries dating back to the System V and BSD era. At the time
+that they were around, the landscape of terminals was much more diverse,
+compared to nowadays, where everyone uses a different variant of Frankensteins
+DEC VTxxx emulator.
+
+As a result `curses` internally used the `termcap` library for handling terminal
+escape codes, which was later replaced with `terminfo`. Both of those libraries
+have their own configuration formats for storing terminal capabilities.
+
+The `ncurses` library can work with both `termcap` and `terminfo` files and
+thankfully provides the later for a huge number of terminals.
+
+We can build ncurses as follows:
+
+ auto_build "ncurses-6.2" --disable-stripping --with-shared --without-debug \
+ --enable-pc-files --with-pkg-config-libdir=/lib/pkgconfig \
+ --enable-widec --with-termlib
+
+A few configure clutches are required, because it is one of those packages where
+the maintainer tried to be clever and work around autotools semantics, so we
+have to explicitly tell it to *not strip debug symbols* when installing the
+binaries and explicitly tell it to generate shared libraries. We also need to
+tell it to generate pkg-config files and where to put them.
+
+If the **--disable-stripping** flag wasn't set, the `make install` would fail
+later, because it would try to strip the debug information using the host
+systems `strip` program, which will choke on the ARM binaries.
+
+The **--enable-widec** flag instructs the build system to generate an `ncurses`
+version with multi byte, wide character unicode support. The **--with-termlib**
+switch instructs it to generate build the `terminfo` library as a separate
+library instead of having it built into ncurses.
+
+In addition to a bunch of libraries and header files, `ncurses` also installs
+a few programs for handling `terminfo` files, such as the terminfo
+compiler `tic` or the `tset` program for querying the database
+(e.g. `tput reset` fully resets your terminal back into a sane state, in a way
+that is supported by your terminal).
+
+It also installs the terminal database files into `$SYSROOT/usr/share/terminfo`
+and `$SYSROOT/usr/share/tabset`, as well as a hand full of man pages
+to `$SYSROOT/usr/share/man`. For historical/backwards compatibillity reasons,
+a symlink to the `terminfo` directory is also added to `$SYSROOT/lib`.
+
+Because we installed the version with wide charater and unicode support, the
+libraries that `ncurses` installs all have a `w` suffix at the end (as well as
+the `pkg-config` files it installs), so it's particularly hard for other
+programs to find the libraries.
+
+So we add a bunch of symlinks for the programs that don't bother to check both:
+
+ ln -s "$SYSROOT/lib/pkgconfig/formw.pc" "$SYSROOT/lib/pkgconfig/form.pc"
+ ln -s "$SYSROOT/lib/pkgconfig/menuw.pc" "$SYSROOT/lib/pkgconfig/menu.pc"
+ ln -s "$SYSROOT/lib/pkgconfig/ncursesw.pc" "$SYSROOT/lib/pkgconfig/ncurses.pc"
+ ln -s "$SYSROOT/lib/pkgconfig/panelw.pc" "$SYSROOT/lib/pkgconfig/panel.pc"
+ ln -s "$SYSROOT/lib/pkgconfig/tinfow.pc" "$SYSROOT/lib/pkgconfig/tinfo.pc"
+
+In addition to `pkg-config` scripts, the `ncurses` package also provides a
+shell script that tries to implement the same functionality.
+
+Some packages (like `util-linux`) try to run that first and may accidentally
+pick up a version you have installed on your host system.
+
+So we copy that over from the `$SYSROOT/bin` directory to `$TCDIR/bin`:
+
+ cp "$SYSROOT/bin/ncursesw6-config" "$TCDIR/bin/ncursesw6-config"
+ cp "$SYSROOT/bin/ncursesw6-config" "$TCDIR/bin/$TARGET-ncursesw6-config"
+
+### Readline
+
+Having built the ncurses library, we can now build the `readline` library,
+which implements a highly configurable command prompt with a search-able
+history, auto completion and Emacs style key bindings.
+
+Unlike `ncurses` that it is based on, it does not requires any special
+configure flags:
+
+ auto_build "readline-8.1"
+
+It only installs the library (`libreadline` and `libhistory`) and headers
+for the libraries, but no programs.
+
+A bunch of documentation files are installed to `$SYSROOT/usr/share`, including
+not only man pages, but info pages, plain text documentation in a `doc` sub
+directory.
+
+### Bash
+
+For Bash itself, I used only two extra configure flags:
+
+ auto_build "bash-5.1" --without-bash-malloc --with-installed-readline
+
+The **--without-bash-malloc** flag tells it to use the standard `libc` malloc
+instead of its own implementation and the **--with-installed-readline** flag
+tells it to use the readline library that we just installed, instead of an
+outdated, internal stub implementation.
+
+The `make install` step installs bash to `$SYSROOT/bin/bash`, but also adds an
+additional script `$SYSROOT/bin/bashbug` which is intended to report bugs you
+may encounter back to the bash developers.
+
+Bash has a plugin style support for builtin commands, which in can load as
+libraries from a special directory (in our case `$SYSROOT/lib/bash`). So it will
+install a ton of builtins there by default, as well as development headers
+in `$SYSROOT/usr/include` that for third party packages that implement their own
+builtins.
+
+Just like readline, Bash brings along a bunch of documentation that it installs
+to `$SYSROOT/usr/share`. Namely HTML documentation in `doc`, more info pages
+and man pages.
+
+Bash is also the first program we build that installs localized text messages
+in the `$SYSROOT/usr/share/locale` directory.
+
+Of course we are no where near done with Bash yet, as we still have some
+plumbing to do regarding the bash start-up script, but we will get back to that
+later when we put everything together.
+
+
+## Basic Command Line Programs
+
+### Basic GNU Packages
+
+The standard comamnd line programs, such as `ls`, `cat`, `rm` and many more are
+provided by the [GNU Core Utilities](https://www.gnu.org/software/coreutils/)
+package.
+
+It is fairly simple to build and install:
+
+ auto_build "coreutils-8.32" --enable-single-binary=symlinks
+
+The **--enable-single-binary** configure switch is a relatively recent feature
+that instructs coreutils to build a single, monolithic binary in the same style
+as BusyBox and install a bunch of symlinks to it, instead of installing dozens
+of separate programs.
+
+A few additional, very useful programs are provided by the [GNU diffutils](https://www.gnu.org/software/diffutils/)
+and [GNU findutils](https://www.gnu.org/software/findutils/).
+
+Namely, the diffutils provide `diff`, `patch` and `cmp`, while the findutils
+provide `find`, `locate`, `updatedb` and `xargs`.
+
+The GNU implementation of the `grep` program, as well as the `less` pager are
+packaged separately.
+
+All of those are relatively easy to build and install:
+
+ auto_build "diffutils-3.7"
+ auto_build "findutils-4.8.0"
+ auto_build "grep-3.6"
+ auto_build "less-563"
+
+Again those packages also install a bunch of documentation in the `/usr/share`
+directory in addition to the program binaries.
+
+Coreutils installs a helper library in `$SYSROOT/lib/libexec/coretuils` and
+findutils installs a libexec helper program as well (`frcode`).
+
+The findutils package also creates an empty `$SYSROOT/var` directory, because
+this is where `locate` expects to find a filesystem index database that can be
+generated using `updatedb`.
+
+### Sed and AWK
+
+We have already used `sed`, the *stream editor* for simple file substitution
+in the previous sections for building the kernel and BusyBox.
+
+AWK is a much more advanced stream editing program that actually implements a
+powerful scripting language for the purpose.
+
+For both programs, we will use the GNU implementations (the GNU AWK
+implementation is called `gawk`).
+
+Both are fairly straight forward to build for our system:
+
+ auto_build "sed-4.8"
+ auto_build "gawk-5.1.0"
+
+Again, we not only get the programs, but also a plethora of documentation and
+extra files.
+
+In fact, gawk will install an entire AWK library in `$SYSROOT/usr/share/awk` as
+well as a number of plugin libraries in `$SYSROOT/lib/gawk`, some helper
+programs in `$SYSROOT/lib/libexec/awk` and necessary development headers to
+build external plugins.
+
+
+### procps aka procps-ng
+
+As the name suggests, the `procps` package supplies a few handy command line
+programs for managing processes. More precisely, we install the following
+list of programs:
+
+ - `free`
+ - `watch`
+ - `w`
+ - `vmstat`
+ - `uptime`
+ - `top`
+ - `tload`
+ - `sysctl`
+ - `slabtop`
+ - `pwdx`
+ - `ps`
+ - `pmap`
+ - `pkill`
+ - `pidof`
+ - `pgrep`
+ - `vmstat`
+
+And their common helper library `libprocps.so` plus accompanying documentation.
+The package would also supply the `kill` program, but we don't install that
+here, sine it is also provided by the `util-linux` package and the later has a
+few extra features.
+
+Sadly, we don't get a propper release tarball for `procps`, but only a dump from
+the git repository. Because the `configure` script and Makefile templates are
+generated by autoconf and automake, they are not checked into the repository.
+
+Many autotools based projects have a `autogen.sh` scrip that checks for the
+required tools and takes care of generating the actual build system from the
+configuration.
+
+So the frist thing we do, is goto into the `procps` source tree and generate
+the build system ourselves:
+
+ cd "$BUILDROOT/src/procps-v3.3.16"
+ ./autogen.sh
+ cd "$BUILDROOT"
+
+Now, we can build `procps`:
+
+ export ac_cv_func_malloc_0_nonnull=yes
+ export ac_cv_func_realloc_0_nonnull=yes
+ auto_build "procps-v3.3.16" --enable-watch8bit --disable-kill --with-gnu-ld
+
+As you can see, some clutches are required here. First of all, the configure
+script attempts to find out if `malloc(0)` and `realloc(0)` return NULL or
+something else.
+
+Technically, when trying to allocate 0 bytes of memory, the C standard permits
+the standard library to return a `NULL` pointer instead of a valid pointer to
+some place in memory. Some libraries like the widely used `glibc` opted to
+instead return a valid point. Many programs pass a programatically generated
+size to `malloc` and assume that `NULL` means "out of memory" or similar
+dramatic failure, especially if `glibc` behaviour encourages this.
+
+Instead of fixing this, some programs instead decided to add a compile time
+check and add a wrappers for `malloc`, `realloc` and `free` if it
+returns `NULL`. Since we are cross compiling, this check cannot be run. So
+we set the result variables manually, the `configure` script "sees" that and
+skips the check.
+
+The `--enable-watch8bit` flag enables propper UTF-8 support for the `watch`
+program, for which it requires `ncursesw` instead of regular `ncurses` (but
+ironically this is one of the packages that fails without the symlink
+of `ncurses` to `ncursesw`).
+
+The other flag `--disable-kill` compiles the package without the `kill` program
+for the reasons stated above and the final flag `--with-gnu-ld` tells it that
+the linker is the GNU version of `ld` which it, by default, assumes to not be
+the case.
+
+
+### psmisc
+
+The `psmisc` package contains a hand full of extra programs for process
+management that aren't already in `procps`, namely it contains `fuser`,
+`killall`, `peekfd`, `prtstat` and `pstree`.
+
+Similar to `procps`, we need to generate the build system ourselves, but we will
+also take the opertunity to apply some changes using `sed`:
+
+ cd "$BUILDROOT/src/psmisc-v22.21"
+ sed -i 's/ncurses/ncursesw/g' configure.ac
+ sed -i 's/tinfo/tinfow/g' configure.ac
+ sed -i "s#./configure \"\$@\"##" autogen.sh
+ ./autogen.sh
+ cd "$BUILDROOT"
+
+The frist two `sed` lines patch the configure script to try `ncursesw`
+and `tinfow` instead of `ncruses` and `tinfo` respectively.
+
+The third `sed` line makes sure that the `autogen.sh` _does not_ run the
+configure script once it is done.
+
+With that done, we can now build `psmisc` with largely the similar fixes:
+
+ export ac_cv_func_malloc_0_nonnull=yes
+ export ac_cv_func_realloc_0_nonnull=yes
+ export CFLAGS="-O2 -include limits.h -include sys/sysmacros.h"
+ auto_build "psmisc-v22.21"
+
+The first two are explained in the `procps` build, the final `export CFLAGS`
+line passes some additional flags to the C compiler. The underlying problem
+is that this release of `psmisc` uses the `PATH_MAX` macro from `limits.h`
+in some places and the `makedev` macro from `sys/sysmacros.h`, but includes
+neither of those headers. This works for them, because `glibc` includes those
+from other headers that they include, but `musl` doesn't. Using the `-incldue`
+option, we force `gcc` to include those headers before processign any C file.
+
+When you are don with this, don't forget to
+
+ unset CFLAGS
+
+Lastly, we get rid of the `libtool` archive installed by `procps`:
+
+ rm "$SYSROOT/lib/libprocps.la"
+
+### The GNU nano text editor
+
+GNU nano is an `ncurses` based text editor that is fairly user friendly and
+installed by default on many Debian based distributions. Being a GNU program,
+it knows how to use autotools and is fairly simple to build and install:
+
+ auto_build "nano-5.6.1"
+
+Along with the program itself it also installs a lot of scripts for syntax
+highlighting. There is not much more to say here, other than maybe that it
+has an easter egg, that we could disable through a `configure` flag.
+
+
+## Archival and Compression Programs
+
+### GNU tar and friends
+
+On Unix-like systems `tar`, the **t**ape **ar**chive program, is
+basically *the standard* archival program.
+
+The tar format itself is dead simple. Files are simply glued together with
+a 512 byte header in front of every file and null bytes at the end to round
+it up to a multiple of 512 bytes. Directories and the like simply use a single
+header with no content following. This is also the reason why you can't create
+an empty tar file: it would simply be an empty file. Also, tarballs have no
+index. You cannot do random access and in order to unpack a single file,
+the `tar` program has to scan across the entire file.
+
+Tar itself doesn't do compression. When you see a compressed tarball, such
+as a `*.tar.gz` file, it has been fed through a compression program like `gzip`
+and must be uncompressed before the `tar` program can handle it. Programs
+like `gzip` only do compression of individual files and have no idea what they
+process. As a result, `gzip` will compress across tar headers and you really
+need to unpack and scan the entire thing when you want to unpack a single file.
+
+The `tar` program *can* actually work with compressed tar archives, but what
+it does (or at least what GNU tar does) internally is, checking at run time if
+the compressor program is available and starting it as a child process that it
+through which it feeds the data.
+
+Compression formats typically used together with `tar` are `xz`, `gzip`
+and `bzip2`. Nowadays `Zstd` is also slowly gaining adoption.
+
+The xz-uilities, GNU gzip and GNU tar are fairly simple to build, since they
+all use the GNU build system:
+
+ auto_build "xz-5.2.5"
+ auto_build "gzip-1.10"
+ auto_build "tar-1.34"
+
+Of course, they will also install development headers, libraries, a bunch of
+wrapper shell script that allow using `grep`, `less` or `diff` on compressed
+files, and a lot of documentation.
+
+The `xz` package installs a `libtool` archive that we simply remove:
+
+ rm "$SYSROOT/lib/liblzma.la"
+
+The GNU tar package also installs a `libexec` helper called `rmt`,
+the **r**e**m**ote **t**ape drive server.
+
+
+Of course, the `bzip2` program is a bit more involved, since it uses a custom
+Makefile.
+
+We first setup our build directory in the usual way:
+
+ mkdir -p "$BUILDROOT/build/bzip2-1.0.8"
+ cd "$BUILDROOT/build/bzip2-1.0.8"
+ srcdir="$BUILDROOT/src/bzip2-1.0.8"
+
+Then we copy over the source files:
+
+ cp "$srcdir"/*.c .
+ cp "$srcdir"/*.h .
+ cp "$srcdir"/words* .
+ cp "$srcdir/Makefile" .
+
+We manually compile the Makefile targets that we are interested in:
+
+ make CFLAGS="-Wall -Winline -O2 -D_FILE_OFFSET_BITS=64 -O2 -Os" \
+ CC=${TARGET}-gcc AR=${TARGET}-ar \
+ RANLIB=${TARGET}-ranlib libbz2.a bzip2 bzip2recover
+
+The compiler, archive tool `ar` for building static libraries, and `ranlib`
+(indexing tool for static libraries) are manually specified on the command
+line.
+
+We copy the programs, library and header over manually:
+
+ cp bzip2 "$SYSROOT/bin"
+ cp bzip2recover "$SYSROOT/bin"
+ cp libbz2.a "$SYSROOT/lib"
+ cp bzlib.h "$SYSROOT/usr/include"
+ ln -s bzip2 "$SYSROOT/bin/bunzip2"
+ ln -s bzip2 "$SYSROOT/bin/bzcat"
+
+The symlinks `bunzip2` and `bzcat` both point to the `bzip2` binary which
+when run deduces from the path that it should act as a decompression tool
+instead.
+
+Bzip2 also provides a bunch of wrapper scripts like gzip and xz:
+
+ cp "$srcdir/bzdiff" "$SYSROOT/bin"
+ cp "$srcdir/bzdiff" "$SYSROOT/bin"
+ cp "$srcdir/bzmore" "$SYSROOT/bin"
+ cp "$srcdir/bzgrep" "$SYSROOT/bin"
+ ln -s bzgrep "$SYSROOT/bin/bzegrep"
+ ln -s bzgrep "$SYSROOT/bin/bzfgrep"
+ ln -s bzmore "$SYSROOT/bin/bzless"
+ ln -s bzdiff "$SYSROOT/bin/bzcmp"
+ cd "$BUILDROOT"
+
+
+### Zlib
+
+Zlib is a library that implements the deflate compression algorithm that is used
+for data compression in formats like `gzip` or `zip` (and thanks to `zlib` also
+in a bunch of other formats).
+
+In case you are wondering why we install `zlib` this after `gzip` if it uses the
+same base compression algorithm: the later has it's own implmentation and won't
+benefit from an installed version of zlib.
+
+Because `zlib` also rolls it's own configure script, we do the same dance again
+with copying the required stuff over into our build directory:
+
+ mkdir -p "$BUILDROOT/build/zlib-1.2.11"
+ cd "$BUILDROOT/build/zlib-1.2.11"
+ srcdir="$BUILDROOT/src/zlib-1.2.11"
+
+ cp "$srcdir"/*.c "$srcdir"/*.h "$srcdir"/zlib.pc.in .
+ cp "$srcdir"/configure "$srcdir"/Makefile* .
+
+We can then proceed to cross compile the static `libz.a` library:
+
+ CROSS_PREFIX="${TARGET}-" prefix="/usr" ./configure
+ make libz.a
+
+The target is named explicitly here, because by default the `Makefile` would
+try to compile a couple test programs as well.
+
+With everything compiled, we can then copy the result over into our
+sysroot directory and go back to the build root:
+
+ cp libz.a "$SYSROOT/usr/lib/"
+ cp zlib.h "$SYSROOT/usr/include/"
+ cp zconf.h "$SYSROOT/usr/include/"
+ cp zlib.pc "$SYSROOT/usr/lib/pkgconfig/"
+ cd "$BUILDROOT"
+
+
+## Miscellaneous
+
+### The file program
+
+The `file` command line program can magically identify and describe tons of
+file types. The core functionallity is actually implemented in a library
+called `libmagic` that comes with a data base of magic numbers.
+
+There is one little quirk tough, in order to cross compile `file`, we need
+the same version of `file` already installed, so it can build the magic data
+base.
+
+So first, we manually compile `file` and install it in our toolchain directory:
+
+ srcdir="$BUILDROOT/src/file-5.40"
+ mkdir -p "$BUILDROOT/build/file-host"
+ cd "$BUILDROOT/build/file-host"
+
+ unset PKG_CONFIG_SYSROOT_DIR
+ unset PKG_CONFIG_LIBDIR
+ unset PKG_CONFIG_PATH
+
+ $srcdir/configure --prefix="$TCDIR" --build="$HOST" --host="$HOST"
+ make
+ make install
+
+At this point, it should be fairly straight forward to understand what this
+does. The 3 `unset` lines revert the `pkg-config` paths so that we can propperly
+link it against our host libraries.
+
+After that, we of course need to reset the `pkg-config` exports:
+
+ export PKG_CONFIG_SYSROOT_DIR="$SYSROOT"
+ export PKG_CONFIG_LIBDIR="$SYSROOT/lib/pkgconfig"
+ export PKG_CONFIG_PATH="$SYSROOT/lib/pkgconfig"
+
+After that, we can simply install `file`, `libmagic` and the data base:
+
+ auto_build "file-5.40"
+
+Of course, `libmagic` installs a `libtool` archive, so we delete that:
+
+ rm "$SYSROOT/lib/libmagic.la"
+
+The reason for building this after all the other tools is that `file` can make
+use of the compressor libraries we installed previously to peek into compressed
+files, so for instance, it can identify a gzip compressed tarball as such
+instead of telling you it's a gzip compressed whatever.
+
+### The util-linux collection
+
+The `util-linux` pacakge contains a large collection of command line tools that
+implement Linux specific functionallity that is missing from more generic
+collections like the GNU core utilities. Among them are essentials like
+the `mount` program, loop back device control or `rfkill`. I won't list all of
+them, since we individually enable them throught he `configure` script, so a you
+can see a detailed list below.
+
+The package can easily be built with our autotools helper:
+
+ auto_build "util-linux-2.36" --without-systemd --without-udev \
+ --disable-all-programs --disable-bash-completion --disable-pylibmount \
+ --disable-makeinstall-setuid --disable-makeinstall-chown \
+ --enable-mount --enable-losetup --enable-fsck --enable-mountpoint \
+ --enable-fallocate --enable-unshare --enable-nsenter \
+ --enable-hardlink --enable-eject --enable-agetty --enable-wdctl \
+ --enable-cal --enable-switch_root --enable-pivot_root \
+ --enable-lsmem --enable-ipcrm --enable-ipcs --enable-irqtop \
+ --enable-lsirq --enable-rfkill --enable-kill --enable-last \
+ --enable-mesg --enable-raw --enable-rename --enable-ul --enable-more \
+ --enable-setterm --enable-libmount --enable-libblkid --enable-libuuid \
+ --enable-libsmartcols --enable-libfdisk
+
+As mentioned before, the lengthy list of `--enable-<foo>` is only there because
+of the `--disable-all-programs` switch that turns everything off that we don't
+excplicitly enable.
+
+Among the things we skip are programs like `login` or `su` that handle user
+authentication. However, the `agetty` program which implements the typical
+terminal login promt is explicitly enabled. We will get back to that later
+on, when setting up an init system and `shadow-utils` for the user
+authentication.
+
+After the build is done, some cleanup steps need to be taken care of:
+
+ mv "$SYSROOT"/sbin/* "$SYSROOT/bin"
+ rm "$SYSROOT/lib"/*.la
+ rmdir "$SYSROOT/sbin"
+
+Even when configuring util-linux to install into `/bin` instead of `/sbin`,
+it will still stubbornly install `rfkill` into `/sbin`.
+
+The `*.la` files that are removed are from the helper libraries `libblkid.so`,
+`libsmartcols.so`, `libuuid.so` and`libfdisk.so`.
+
+
+## Cleaning up
+
+A signifficant portion of the compiled binaries consists of debugging symbols
+that a can easily be removed using a strip command.
+
+The following command line uses the `find` command to locate all regular
+files `-type f`, i.e. it won't print symlinks or directories and for each of
+them runs the `file` command to identify their file type. The list is fed
+through `grep ELF` to filter out the ones identified as ELF files (because
+there are also some shell scripts in there, which we obviously can't strip)
+and then fed through`'sed` to remove the `: ELF...` description.
+
+The resulting list of excutable files is then passed on to the `${TARGET}-strip`
+program from our toolchain using `xargs`:
+
+ find "$SYSROOT/bin" -type f -exec file '{}' \; | \
+ grep ELF | sed 's/: ELF.*$//' | xargs ${TARGET}-strip -xs
+
+On my Fedora system, this drastically cuts the size of the `/bin` directory
+from ~67.5 MiB (28,595,781 bytes) down to ~7 MiB (7,346,105 bytes).
+
+We do the same thing a second time for the `/lib` directory. Please note the
+extra argument `! -path '*/lib/modules/*'` for the `find` command to make sure
+we skip the kernel modules.
+
+ find "$SYSROOT/lib" -type f ! -path '*/lib/modules/*' -exec file '{}' \; |\
+ grep ELF | sed 's/: ELF.*$//' | xargs ${TARGET}-strip -xs
+
+On my system, this reduces the size of the `/lib` directory
+from ~67 MiB (70,574,158 bytes) to ~60 MiB (63,604,818 bytes).
+
+If you are the kind of person who loves to ramble about "those pesky shared
+libraries" and insist on statically linking everything, you should take a look
+of what uses up that much space:
+
+The largest chunk of the `/lib` directory, is kernel modules. On my system
+those make up ~54.5 MiB (57,114,381 bytes) of the two numbers above. So if you
+are bent on cutting the size down, you should start by tossing out modules you
+don't need.
+
+
+## Packing into a SquashFS Filesystem
+
+SquashFS is a highly compressed, read-only filesystem that is packed offline
+into an archive that can than be mounted by Linux.
+
+Besides the high compression, which drastically reduces the on-disk memory
+footprint, being immutable and read-only has a number of advantages in itself.
+Our system will stay in a well defined state and the lack of write operations
+reduces wear on the SD card. Writable directories (e.g. for temporary files
+in `/tmp` or for things like log files that you actually want to write) are
+typically achieved by mounting another filesystem to a directory on the
+SquashFS root (e.g. from another SD card partition, or simply mounting
+a `tmpfs`).
+
+This can also be combined with an `overlayfs`, where a directory on the
+SquashFS can be merged with a directory from a writable filesystem. Any changes
+to existing files are implemented by transparentyl copying the file to the
+writable filesystem first and editing it there. Erasing the writable directory
+essentially causes a "factory reset" to the initial content.
+
+We will revisit this topic later on, for now we are just interested in packing
+the filesystem and testing it out.
+
+For packing a SquashFS image, we use [squashfs-tools-ng](https://github.com/AgentD/squashfs-tools-ng).
+
+The reason for using `squashfs-tools-ng` is that it contains a handy tool
+called `gensquashfs` that takes an input listing similar to `gen_init_cpio`.
+
+On some systems, you can just install it from the package repository. But be
+aware that I'm going to use a few features that were introduced in version 1.1,
+which currently isn't packaged on some systems.
+
+If you are building the package yourself, you need the devlopment packages for
+at least one of the compressors that SquashFS supports (e.g. xz-utils or Zstd).
+
+Because we compile a host tool again, we need to unset the `pkg-config` path
+variables first:
+
+ unset PKG_CONFIG_SYSROOT_DIR
+ unset PKG_CONFIG_LIBDIR
+ unset PKG_CONFIG_PATH
+
+We build the package the same way as other host tools and install it into
+the toolchain directory:
+
+ srcdir="$BUILDROOT/src/squashfs-tools-ng-1.1.0"
+ mkdir -p "$BUILDROOT/build/squashfs-tools-ng-1.1.0"
+
+ cd "$BUILDROOT/build/squashfs-tools-ng-1.1.0"
+ $srcdir/configure --prefix=$TCDIR --host=$HOST --build=$HOST
+
+ make
+ make install
+ cd "$BUILDROOT"
+
+The listing file for the SquashFS archive is a little bit longer than the
+one for the initital ramfs. I included [a prepared version](list.txt).
+
+If you examine the list, you will find that many files of the sysroot aren't
+packed. Specifically the header files, man pages (plus other documentation)
+and static libraries. The development files are omitted, because without
+development tools (e.g. gcc, ...) they are useless on the target system.
+Likewise, we don't have any tools yet to actually view the documentation files.
+Omitting those safes us some space.
+
+Using the listing, we can pack the root filesystem using `gensquashfs`:
+
+ gensquashfs --pack-dir "$SYSROOT" --pack-file list.txt -f rootfs.sqfs
+
+
+On my system, the resulting archive is ~18.3 MiB in size (19,238,912 bytes).
+
+For comparison, I also tried the unstripped binaries, resuling in a SquashFS
+archive of ~26.7 MiB (27,971,584 bytes) and packing without any kernel modules,
+resulting in only ~4.4 MiB (4,583,424 bytes).
+
+
+## Testing it on Hardware
+
+First of, we will revise the `/init` script of our initial ram filesystem as
+follows:
+
+ cd "$BUILDROOT/build/initramfs"
+
+ cat > init <<_EOF
+ #!/bin/sh
+
+ PATH=/bin
+
+ /bin/busybox --install
+ /bin/busybox mount -t proc none /proc
+ /bin/busybox mount -t sysfs none /sys
+ /bin/busybox mount -t devtmpfs none /dev
+
+ boot_part="mmcblk0p1"
+ root_sfs="rootfs.sqfs"
+
+ while [ ! -e "/dev/$boot_part" ]; do
+ echo "Waiting for device $boot_part"
+ busybox sleep 1
+ done
+
+ mount "/dev/$boot_part" "/boot"
+
+ if [ ! -e "/boot/${root_sfs}" ]; then
+ echo "${root_sfs} not found!"
+ exec /bin/busybox sh
+ exit 1
+ fi
+
+ mount -t squashfs /boot/${root_sfs} /newroot
+ umount -l /boot
+
+ umount -l /dev
+ umount /sys
+ umount /proc
+
+ unset -v root_sfs boot_part
+
+ exec /bin/busybox switch_root /newroot /bin/bash
+ _EOF
+
+This new init script starts out pretty much the same way, but instead of
+dropping directly into a `busybox` shell, we first mount the primrary
+partition of the SD card (in my case `/dev/mmcblk0p1`) to `/boot`.
+
+As the device node may not be present yet in the `/dev` filesystem, we wait
+in a loop for it to pop up.
+
+From the SD card, we then mount the `rootfs.sqfs` that we just generated,
+to `/newroot`. There is a bit of trickery involved here, because traditional,
+Unix-like operating systems can only mount devices directly. The mount point
+has a filesystem driver associated with it, and an underlying device number.
+In order to mount an archive from a file, Linux has a loop back block device,
+which works like a regular block device, but reflects read/write access back
+to an existing file in the filesystem. The `mount` command transparently takes
+care of setting up the loop device for us, and then actually.
+
+After that comes a bit of a mind screw. We cleanup after ourselves, i.e. unset
+the environment variables, but also unmount everything, *including* the `/boot`
+directory, from which the SquashFS archive was mounted.
+
+Note the `-l` parameter for the mount, which means *lazy*. The kernel detaches
+the filesystem from the hierarchy, but keeps it open until the last reference
+is removed (in our case, held by the loop back block device).
+
+The final `switch_root` works somewhat similar to a `chroot`, except that it
+actually does change the underlying mountpoints and also gets rid of the
+initial ram filesystem for us.
+
+After extending the init script, we can rebuild the initramfs:
+
+ ./gen_init_cpio initramfs.files | xz --check=crc32 > initramfs.xz
+ cp initramfs.xz "$SYSROOT/boot"
+ cd "$BUILDROOT"
+
+We, again, copy everything over to the SD card (don't forget the rootfs.sqfs)
+and boot up the Raspberry Pi.
+
+This should now drop you directly into a `bash` shell on the SquashFS image.
+
+If you try to run certain commands like `mount`, keep in mind that `/proc`
+and `/sys` aren't mounted, causing the resulting error messages. But if you
+manually mount them again, everything should be fine again.
diff --git a/README.md b/README.md
index 2da222b..f068a82 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ This guide is divided into the following parts:
* [Cross compiling a statically linked BusyBox and the kernel](02_kernel.md).
The BusyBox is packaged into a small initramfs. We will make it boot on the
Rapsberry Pi and explore some parts of the Linux boot process.
-* [Building a more sophisticated userland](userland.md). Mostly a
- Linux-From-Scratch-Style "lets build some packages". The userland will be
- packed into a SquashFS image. The BusyBox based initrd is modified to mount
- it and switch into it.
+* [Building a simple userland](03_basic_usr.md). Mostly a
+ Linux-From-Scratch-Style walkthrough to building some packages for a simple
+ GNU userland. The userland is packed into a SquashFS image. The BusyBox
+ based initrd is modified to mount it and switch into it.
diff --git a/download_usr1.sh b/download_usr1.sh
new file mode 100755
index 0000000..c5371a3
--- /dev/null
+++ b/download_usr1.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+
+set -e
+
+NCURSES="ncurses-6.2.tar.gz"
+READLINE="readline-8.1.tar.gz"
+BASH="bash-5.1.tar.gz"
+COREUTILS="coreutils-8.32.tar.xz"
+DIFFUTILS="diffutils-3.7.tar.xz"
+FINDUTILS="findutils-4.8.0.tar.xz"
+UTILLINUX="util-linux-2.36.tar.xz"
+GAWK="gawk-5.1.0.tar.xz"
+GREP="grep-3.6.tar.xz"
+SED="sed-4.8.tar.xz"
+TAR="tar-1.34.tar.xz"
+NANO="nano-5.6.1.tar.xz"
+XZ="xz-5.2.5.tar.xz"
+GZIP="gzip-1.10.tar.xz"
+BZIP2="bzip2-1.0.8.tar.gz"
+ZLIB="zlib-1.2.11.tar.gz"
+LESS="less-563.tar.gz"
+FILE="file-5.40.tar.gz"
+PROCPS="procps-v3.3.16.tar.bz2"
+PSMISC="psmisc-v22.21.tar.bz2"
+SQFSNG="squashfs-tools-ng-1.1.0.tar.xz"
+
+mkdir -p "download" "src"
+
+download() {
+ if [ ! -f "download/$2" ]; then
+ curl -L "https://$1/$2" > "download/$2"
+ fi
+}
+
+download "ftp.gnu.org/gnu/readline" "$READLINE"
+download "ftp.gnu.org/gnu/bash" "$BASH"
+download "ftp.gnu.org/gnu/coreutils" "$COREUTILS"
+download "ftp.gnu.org/gnu/diffutils" "$DIFFUTILS"
+download "ftp.gnu.org/gnu/findutils" "$FINDUTILS"
+download "ftp.gnu.org/gnu/gawk" "$GAWK"
+download "ftp.gnu.org/gnu/grep" "$GREP"
+download "ftp.gnu.org/gnu/sed" "$SED"
+download "ftp.gnu.org/gnu/tar" "$TAR"
+download "ftp.gnu.org/gnu/nano" "$NANO"
+download "ftp.gnu.org/gnu/gzip" "$GZIP"
+download "sourceware.org/pub/bzip2" "$BZIP2"
+download "zlib.net" "$ZLIB"
+download "invisible-mirror.net/archives/ncurses" "$NCURSES"
+download "downloads.sourceforge.net/project/lzmautils" "$XZ"
+download "mirrors.edge.kernel.org/pub/linux/utils/util-linux/v2.36" "$UTILLINUX"
+download "www.greenwoodsoftware.com/less" "$LESS"
+download "astron.com/pub/file" "$FILE"
+download "gitlab.com/procps-ng/procps/-/archive/v3.3.16" "$PROCPS"
+download "gitlab.com/psmisc/psmisc/-/archive/v22.21" "$PSMISC"
+download "infraroot.at/pub/squashfs" "$SQFSNG"
+
+cat > download_usr1.sha256 <<_EOF
+30306e0c76e0f9f1f0de987cf1c82a5c21e1ce6568b9227f7da5b71cbea86c9d download/$NCURSES
+f8ceb4ee131e3232226a17f51b164afc46cd0b9e6cef344be87c65962cb82b02 download/$READLINE
+cc012bc860406dcf42f64431bcd3d2fa7560c02915a601aba9cd597a39329baa download/$BASH
+4458d8de7849df44ccab15e16b1548b285224dbba5f08fac070c1c0e0bcc4cfa download/$COREUTILS
+b3a7a6221c3dc916085f0d205abf6b8e1ba443d4dd965118da364a1dc1cb3a26 download/$DIFFUTILS
+57127b7e97d91282c6ace556378d5455a9509898297e46e10443016ea1387164 download/$FINDUTILS
+9e4b1c67eb13b9b67feb32ae1dc0d50e08ce9e5d82e1cccd0ee771ad2fa9e0b1 download/$UTILLINUX
+cf5fea4ac5665fd5171af4716baab2effc76306a9572988d5ba1078f196382bd download/$GAWK
+667e15e8afe189e93f9f21a7cd3a7b3f776202f417330b248c2ad4f997d9373e download/$GREP
+f79b0cfea71b37a8eeec8490db6c5f7ae7719c35587f21edb0617f370eeff633 download/$SED
+63bebd26879c5e1eea4352f0d03c991f966aeb3ddeb3c7445c902568d5411d28 download/$TAR
+760d7059e0881ca0ee7e2a33b09d999ec456ff7204df86bee58eb6f523ee8b09 download/$NANO
+ab5a03176ee106d3f0fa90e381da478ddae405918153cca248e682cd0c4a2269 download/$BZIP2
+8425ccac99872d544d4310305f915f5ea81e04d0f437ef1a230dc9d1c819d7c0 download/$GZIP
+3e1e518ffc912f86608a8cb35e4bd41ad1aec210df2a47aaa1f95e7f5576ef56 download/$XZ
+c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1 download/$ZLIB
+ce5b6d2b9fc4442d7a07c93ab128d2dff2ce09a1d4f2d055b95cf28dd0dc9a9a download/$LESS
+167321f43c148a553f68a0ea7f579821ef3b11c27b8cbe158e4df897e4a5dd57 download/$FILE
+720caf307ab5dfe6d1cf4fc3e6ce786d749c69baa088627dbe1b01828f2528b1 download/$PROCPS
+1ec96d8f88905f01c857c0bbef1694aa913c5de91bc7b6f51296699df4cef389 download/$PSMISC
+110794124b268e92e28e6a95f0781d1338f48c338434ef746f5de68c64e19aeb download/$SQFSNG
+_EOF
+
+sha256sum -c download_usr1.sha256
+
+tar -xf "download/$NCURSES" -C "src"
+tar -xf "download/$READLINE" -C "src"
+tar -xf "download/$BASH" -C "src"
+tar -xf "download/$COREUTILS" -C "src"
+tar -xf "download/$DIFFUTILS" -C "src"
+tar -xf "download/$FINDUTILS" -C "src"
+tar -xf "download/$GREP" -C "src"
+tar -xf "download/$GAWK" -C "src"
+tar -xf "download/$SED" -C "src"
+tar -xf "download/$TAR" -C "src"
+tar -xf "download/$UTILLINUX" -C "src"
+tar -xf "download/$NANO" -C "src"
+tar -xf "download/$XZ" -C "src"
+tar -xf "download/$GZIP" -C "src"
+tar -xf "download/$BZIP2" -C "src"
+tar -xf "download/$ZLIB" -C "src"
+tar -xf "download/$LESS" -C "src"
+tar -xf "download/$FILE" -C "src"
+tar -xf "download/$PROCPS" -C "src"
+tar -xf "download/$PSMISC" -C "src"
+tar -xf "download/$SQFSNG" -C "src"
diff --git a/list.txt b/list.txt
new file mode 100644
index 0000000..6b476f1
--- /dev/null
+++ b/list.txt
@@ -0,0 +1,41 @@
+dir /boot 0755 0 0
+dir /etc 0755 0 0
+dir /dev 0755 0 0
+dir /tmp 1777 0 0
+dir /mnt 0755 0 0
+dir /proc 0755 0 0
+dir /run 0755 0 0
+dir /sys 0755 0 0
+dir /var 0755 0 0
+
+dir /bin 0755 0 0
+glob /bin 0755 0 0 bin
+
+dir /lib 0755 0 0
+glob /lib 0755 0 0 -type d lib
+glob /lib/bash 0755 0 0 -type f lib/bash
+glob /lib/gawk 0755 0 0 -type f lib/gawk
+glob /lib/libexec 0755 0 0 -type f lib/libexec
+glob /lib/modules 0755 0 0 -type f lib/modules
+glob /lib/modules 0777 0 0 -type l lib/modules
+slink /lib/terminfo 0777 0 0 /usr/share/terminfo
+glob /lib 0755 0 0 -type f -name "*.so*" -nonrecursive lib
+glob /lib 0777 0 0 -type l -name "*.so.*" -nonrecursive lib
+
+dir /usr 0755 0 0
+dir /usr/share 0755 0 0
+slink /usr/bin 0777 0 0 /bin
+slink /usr/lib 0777 0 0 /lib
+dir /usr/share/awk 0755 0 0
+dir /usr/share/misc 0755 0 0
+dir /usr/share/nano 0755 0 0
+dir /usr/share/tabset 0755 0 0
+dir /usr/share/terminfo 0755 0 0
+glob /usr/share/awk 0644 0 0 -type f usr/share/awk
+glob /usr/share/misc 0644 0 0 -type f usr/share/misc
+glob /usr/share/nano 0755 0 0 -type d usr/share/nano
+glob /usr/share/nano 0644 0 0 -type f usr/share/nano
+glob /usr/share/tabset 0644 0 0 -type f usr/share/tabset
+glob /usr/share/terminfo 0755 0 0 -type d usr/share/terminfo
+glob /usr/share/terminfo 0644 0 0 -type f usr/share/terminfo
+glob /usr/share/terminfo 0777 0 0 -type l usr/share/terminfo
diff --git a/util.sh b/util.sh
new file mode 100644
index 0000000..96f06c2
--- /dev/null
+++ b/util.sh
@@ -0,0 +1,24 @@
+run_configure() {
+ "$srcdir/configure" --host="$TARGET" --prefix="" --sbindir=/bin \
+ --includedir=/usr/include --datarootdir=/usr/share\
+ --libexecdir=/lib/libexec --disable-static \
+ --enable-shared $@
+}
+
+auto_build() {
+ local pkgname="$1"
+ shift
+
+ mkdir -p "$BUILDROOT/build/$pkgname"
+ cd "$BUILDROOT/build/$pkgname"
+ srcdir="$BUILDROOT/src/$pkgname"
+
+ run_configure $@
+ make -j `nproc`
+ make DESTDIR="$SYSROOT" install
+ cd "$BUILDROOT"
+}
+
+export PKG_CONFIG_SYSROOT_DIR="$SYSROOT"
+export PKG_CONFIG_LIBDIR="$SYSROOT/lib/pkgconfig"
+export PKG_CONFIG_PATH="$SYSROOT/lib/pkgconfig"