Difference between revisions of "Deprecated: FreeBSD as a PV DomU"

From PrgmrWiki
m (Missed a <pre>)
(UFS via NetBSD)
Line 354: Line 354:
  x: Partition sizes ok
  x: Partition sizes ok
You can also use `disklabel` to look at the same thing:
# disklabel xbd1
# /dev/rxbd1d:
type: ESDI
disk: Xen Virtual ESDI
label: fictitious
bytes/sector: 512
sectors/track: 2048
tracks/cylinder: 1
sectors/cylinder: 2048
cylinders: 10240
total sectors: 20971520
rpm: 3600
interleave: 1
trackskew: 0
cylinderskew: 0
headswitch: 0          # microseconds
track-to-track seek: 0  # microseconds
drivedata: 0
5 partitions:
#        size    offset    fstype [fsize bsize cpg/sgs]
c:  20969472      2048    unused      0    0        # (Cyl.      1 -  10239)
d:  20971520        0    unused      0    0        # (Cyl.      0 -  10239)
e:  20969472      2048    4.2BSD      0    0    0  # (Cyl.      1 -  10239)
disklabel: boot block size 0
disklabel: super block size 0</pre>
Create a new FFSv2 filesystem on slice '''f'''.
Create a new FFSv2 filesystem on slice '''f'''.

Revision as of 06:37, 24 May 2013

Nicole's Snazzy FreeBSD 9.0-RELEASE domU guide

This wiki article is a conversion of a blog post.

FreeBSD has fairly complete Xen domU support, though it's not as well-tested as NetBSD's. Nor does the FreeBSD project distribute an easy domU distribution, like NetBSD's netbsd-INSTALL_XEN3_DOMU.gz that's runnable right from pvgrub, so the installation procedure is a tad involved.

You may either build your own copy of FreeBSD/Xen from an existing FreeBSD installation or download my prebuilt copy.

Some caveats:

  • Xen paravirtualization (PV), as used on prgmr, is supported only on FreeBSD i386, not amd64. FreeBSD amd64 supports only Xen HVM and is thus unusable in the prgmr environment.
  • Someone else's binaries. Do you trust me?

Build Your Own Distribution

The operating system is built using the standard procedure, cross-compiling for i386 when necessary.


FreeBSD 9.0-RELEASE has a few outstanding issues on Xen, such as the inability to function with more than 768mb of provisioned memory and a panic when initializing SMP, even on vcpus=1 instances like mine. These two issues are fixed in 9-STABLE and will be rolled into 9.1-RELEASE later this year. I prefer to stick to releases when I can, though, so apply these two patches to your RELENG_9_0 source tree.

First, to fix the memory limitation. Taken from r228746.

--- sys/i386/xen/pmap.c.orig	2011-11-10 23:20:22.000000000 -0500
+++ sys/i386/xen/pmap.c	2012-03-27 12:36:06.553212765 -0400
@@ -184,9 +184,6 @@
 #define PV_STAT(x)	do { } while (0)
-#define	pa_index(pa)	((pa) >> PDRSHIFT)
-#define	pa_to_pvh(pa)	(&pv_table[pa_index(pa)])
  * Get PDEs and PTEs for user/kernel address space
@@ -230,7 +227,6 @@
  * Data for the pv entry allocation mechanism
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
-static struct md_page *pv_table;
 static int shpgperproc = PMAP_SHPGPERPROC;
 struct pv_chunk *pv_chunkbase;		/* KVA block for pv_chunks */
@@ -278,9 +274,6 @@
 static struct mtx PMAP2mutex;
 SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
-static int pg_ps_enabled;
-SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0,
-    "Are large page mappings enabled?");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
 	"Max number of PV entries");
@@ -636,24 +629,8 @@
-	vm_page_t mpte;
-	vm_size_t s;
-	int i, pv_npg;
-	 * Initialize the vm page array entries for the kernel pmap's
-	 * page table pages.
-	 */ 
-	for (i = 0; i < nkpt; i++) {
-		mpte = PHYS_TO_VM_PAGE(xpmap_mtop(PTD[i + KPTDI] & PG_FRAME));
-		KASSERT(mpte >= vm_page_array &&
-		    mpte < &vm_page_array[vm_page_array_size],
-		    ("pmap_init: page table page is out of range"));
-		mpte->pindex = i + KPTDI;
-		mpte->phys_addr = xpmap_mtop(PTD[i + KPTDI] & PG_FRAME);
-	}
-        /*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
 	 * numbers of pv entries.
@@ -664,26 +641,6 @@
 	pv_entry_max = roundup(pv_entry_max, _NPCPV);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
-	/*
-	 * Are large page mappings enabled?
-	 */
-	TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
-	/*
-	 * Calculate the size of the pv head table for superpages.
-	 */
-	for (i = 0; phys_avail[i + 1]; i += 2);
-	pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR;
-	/*
-	 * Allocate memory for the pv head table for superpages.
-	 */
-	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
-	s = round_page(s);
-	pv_table = (struct md_page *)kmem_alloc(kernel_map, s);
-	for (i = 0; i < pv_npg; i++)
-		TAILQ_INIT(&pv_table[i].pv_list);
 	pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
 	pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map,
 	    PAGE_SIZE * pv_maxchunks);
@@ -3452,21 +3409,15 @@
- * Returns TRUE if the given page is mapped individually or as part of
- * a 4mpage.  Otherwise, returns FALSE.
+ * Returns TRUE if the given page is mapped.  Otherwise, returns FALSE.
 pmap_page_is_mapped(vm_page_t m)
-	boolean_t rv;
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (FALSE);
-	vm_page_lock_queues();
-	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
-	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list);
-	vm_page_unlock_queues();
-	return (rv);
+	return (!TAILQ_EMPTY(&m->md.pv_list));

And second, to bypass the SMP panic by limiting MAXCPU to 1. It may be worth building 9-STABLE instead if you require SMP support.

--- sys/i386/include/param.h.orig	2012-03-27 16:15:06.767507025 -0400
+++ sys/i386/include/param.h	2012-03-27 15:46:33.525462762 -0400
@@ -69,7 +69,7 @@
 #if defined(SMP) || defined(KLD_MODULE)
 #ifndef MAXCPU
-#define MAXCPU		32
+#define MAXCPU		1
 #define MAXCPU		1

Optionally, patch xen/clock.c to silence annoying repeated console-spamming clock nudge messages unless the system is booted verbosely. Seen in kern/155353.

--- sys/i386/xen/clock.c.orig	2012-03-15 12:33:13.887459073 -0400
+++ sys/i386/xen/clock.c	2012-03-15 12:35:03.189146788 -0400
@@ -349,7 +349,8 @@
 	if (shadow_tv_version != HYPERVISOR_shared_info->wc_version &&
 	    !independent_wallclock) {
-		printf("[XEN] hypervisor wallclock nudged; nudging TOD.\n");
+		if(bootverbose)
+			printf("[XEN] hypervisor wallclock nudged; nudging TOD.\n");

Kernel Configuration

The stock XEN kernel config file should boot with no trouble. You'll probably want to customize it a bit, though, to enable features like PF/ALTQ and to disable the system-slowing kernel debugging features, especially witness. Disable debugging with caution. You may want to build a XEN kernel first to ensure it boots and to diagnose why if it doesn't.

--- sys/i386/conf/XEN	2011-11-10 23:20:22.000000000 -0500
+++ sys/i386/conf/XEN-COOLTRAINER	2012-04-03 16:04:41.674814536 -0400
@@ -44,16 +44,6 @@
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 options 	AUDIT			# Security event auditing
-# Debugging for use in -current
-options 	KDB			# Enable kernel debugger support.
-options 	DDB			# Support DDB.
-options 	GDB			# Support remote GDB.
-options 	DEADLKRES		# Enable the deadlock resolver
-options 	INVARIANTS		# Enable calls of extra sanity checking
-options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
-options 	WITNESS			# Enable checks to detect deadlocks and cycles
-options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
 options 	PAE
 nooption	NATIVE
 option		XEN
@@ -88,3 +78,16 @@
 # Note that 'bpf' is required for DHCP.
 device		bpf		# Berkeley packet filter
+# Enable PF
+device		pf
+device		pflog
+device		pfsync
+# Enable ALTQ (QoS)
+options	ALTQ
+options	ALTQ_CBQ # Class Bases Queuing (CBQ)
+options	ALTQ_RED # Random Early Detection (RED)
+options	ALTQ_RIO # RED In/Out
+options	ALTQ_HFSC # Hierarchical Packet Scheduler (HFSC)
+options	ALTQ_PRIQ # Priority Queuing (PRIQ)
+options	ALTQ_NOPCC # Required for SMP build


You're ready to build your i386 world and kernel. Use the TARGET and TARGET_ARCH environment variables to cross-compile for i386 if necessary, such as on my amd64 build host. Specify either the stock XEN kernel configuration or the name of any custom configuration you create.

make buildworld TARGET=i386


Create a directory to serve as the temporary home of your OS distribution, set DESTDIR to its path, and install.

mkdir /root/xenworld
export DESTDIR=/root/xenworld
make installworld TARGET=i386
make distribution TARGET=i386


Set the mount options in fstab for what will be our root filesystem.

echo "/dev/xbd0s2       /               ufs     rw              1       1" > $DESTDIR/etc/fstab

Remove the default virtual terminals from /etc/ttys. They don't exist in the Xen environment and will throw errors into your console at boot if included. Then, add a line for the Xen console device so we can access the system via our out-of-band console.

sed -i '' '/^ttyv/d' $DESTDIR/etc/ttys
echo "xc0     "/usr/libexec/getty Pc"         vt100   on  secure" >> $DESTDIR/etc/ttys

Make any other changes you wish to include, create a tarball of the Xen world, and copy the kernel ELF out of DESTDIR for later use.

cp $DESTDIR/boot/kernel/kernel /root/kernel
cd $DESTDIR && tar zcvf /root/xenworld.tar.gz .


If you're converting an existing Prgmr instance to FreeBSD, double-check your backups and make sure it holds nothing you'd be mad at yourself for losing.

Reboot your instance to the CentOS rescue environment via the out-of-band console, then blow away your disk with fdisk. Create a 512MiB Linux partition, then dedicate the rest of the disk to FreeBSD, hexadecimal partition type a5.

My properly-partitioned 25GB disk on gigadelic looks like this:

[root@gigadelic ~]# fdisk /dev/xvda
Disk /dev/xvda: 25.7 GB, 25769803776 bytes
255 heads, 63 sectors/track, 3133 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes

    Device Boot      Start         End      Blocks   Id  System
/dev/xvda1               1          63      506016   83  Linux
/dev/xvda2              64        3133    24659775   a5  FreeBSD

Format your Linux partition as ext2, a nice universally-mountable default, then mount it and create some needed directories.

mkfs.ext2 /dev/xvda1
mount /dev/xvda1 /mnt && cd /mnt
mkdir -p boot/grub

Transfer your kernel ELF and world tarball to the root of the ext partition. These two files will consume approximately 5MiB and 150MiB, respectively. I hosted the two files using www/woof from my build machine.


[root@emi#src] woof -i /root/xenworld/boot/kernel/kernel
Now serving on
gigadelic.cooltrainer.org - - [05/Apr/2012 10:45:06] "GET /kernel HTTP/1.0" 200 -


wget http://emi.aloe.cooltrainer.org:8080/kernel
wget http://emi.aloe.cooltrainer.org:8080/xenworld.tar.gz

Alternatively, download my pre-built world and kernel.

wget http://prgmr.com/~nb/images/beta/kernel-XEN-COOLTRAINER
wget http://prgmr.com/~nb/images/beta/xenworld.tar.gz

Move your kernel of choice to the /mnt/boot directory with filename kernel.

mv kernel-XEN-COOLTRAINER boot/kernel

UFS via NetBSD

Reboot your instance once more, this time to the included NetBSD installer. If your dom0 doesn't include NetBSD, check the directions for fetching the installer in the NetBSD how-to wiki article.

Drop to a shell when given the option. Double-check your partition layout with NetBSD's fdisk. Your disk device will be /dev/xbd0.

fdisk xbd0
Disk: /dev/rxbd0d
NetBSD disklabel disk geometry:
cylinders: 24576, heads: 1, sectors/track: 2048 (2048 sectors/cylinder)
total sectors: 50331648

BIOS disk geometry:
cylinders: 1024, heads: 255, sectors/track: 63 (16065 sectors/cylinder)
total sectors: 50331648

Partition table:
0: Linux native (sysid 131)
    start 63, size 1012032 (494 MB, Cyls 0-62)
        PBR is not bootable: All bytes are identical (0x00)
1: FreeBSD or 386BSD or old NetBSD (sysid 165)
    start 1012095, size 49319550 (24082 MB, Cyls 63-3132)
        PBR is not bootable: Bad magic number (0x5da0)
No active partition.
Drive serial number: 331975 (0x000510c7)

The FreeBSD partition you created in Linux's fdisk will show up as slice f and the ext partition as slice e of xbd0, as visualized here in the partitioning step of NetBSD's guided installer.

    Start  MB   End  MB  Size  MB FS type    Newfs Mount Mount point
    --------- --------- --------- ---------- ----- ----- -----------
 a:         0         0         0 unused
 b:         0         0         0 unused
 c:         0     24575     24576 NetBSD partition
 d:         0     24575     24576 Whole disk
 e:         0       493       494 Linux Ext2
>f:       494     24574     24081 FFSv1      Yes   Yes   /
 g:         0         0         0 unused
 h: Show all unused partitions
 i: Change input units (sectors/cylinders/MB)
 x: Partition sizes ok

You can also use `disklabel` to look at the same thing:

# disklabel xbd1
# /dev/rxbd1d:
type: ESDI
disk: Xen Virtual ESDI
label: fictitious
bytes/sector: 512
sectors/track: 2048
tracks/cylinder: 1
sectors/cylinder: 2048
cylinders: 10240
total sectors: 20971520
rpm: 3600
interleave: 1
trackskew: 0
cylinderskew: 0
headswitch: 0           # microseconds
track-to-track seek: 0  # microseconds
drivedata: 0

5 partitions:
#        size    offset     fstype [fsize bsize cpg/sgs]
 c:  20969472      2048     unused      0     0        # (Cyl.      1 -  10239)
 d:  20971520         0     unused      0     0        # (Cyl.      0 -  10239)
 e:  20969472      2048     4.2BSD      0     0     0  # (Cyl.      1 -  10239)
disklabel: boot block size 0
disklabel: super block size 0

Create a new FFSv2 filesystem on slice f.

newfs -O 2 /dev/rxbd0f

Create mountpoints for both partitions, mount them, and extract your FreeBSD world tarball.

mkdir -p /mnt/{boot,world}
mount -t ext2fs /dev/xbd0e /mnt/boot
mount /dev/xbd0f /mnt/world
tar zxvf /mnt/boot/xenworld.tar.gz -C /mnt/world

Bootloader Configuration

While still in NetBSD, create a text file at /mnt/boot/boot/grub/menu.lst. pvgrub will expect the user bootloader configuration in (hd0,0)/boot/grub/menu.lst. (hd0,0) is your ext2 partition in this case.


timeout 5

title FreeBSD
root (hd0,0)
kernel /boot/kernel vfs.root.mountfrom=ufs:xbd0s2,machdep.idle_mwait=0,kern.hz=100

title FreeBSD Verbose
root (hd0,0)
kernel /boot/kernel vfs.root.mountfrom=ufs:xbd0s2,machdep.idle_mwait=0,kern.hz=100,bootverbose=1

The kernel lines' arguments instruct the kernel to root from /dev/xbd0s2, as seen earlier in fstab, to avoid a panic seen in kern/152228, and to tick the system clock at 100hz. kern.hz should be detected automatically, but specifying it here won't hurt. The second pvgrub entry is identical but boots verbosely.

Once that's in place, shut down the instance and make sure, through the admininstation console, that you're using the i386 pvgrub. amd64 pvgrub cannot load our i386 kernel ELF. Option #6 in the console allows you to swap bootloaders.

6. swap i386/amd64 bootloaders (pvgrub) currently i386

Finally, start your instance and watch through the out-of-band console as pvgrub loads your user configuration and eventually the FreeBSD kernel!

First Boot Maintenance

Booted successfully? Awesome. There are just a few things to take care of before you can settle in to FreeBSD.

First, use mtree to repair directory and file permissions and flags mangled by NetBSD's tar.

mtree -U -p / -f /etc/mtree/BSD.root.dist
mtree -U -p /usr -f /etc/mtree/BSD.usr.dist
mtree -U -p /usr/local -f /etc/mtree/BSD.local.dist
mtree -U -p /usr/include -f /etc/mtree/BSD.include.dist
mtree -U -p /var -f /etc/mtree/BSD.var.dist

Enable the Blowfish hash function in auth.conf instead of the default aging MD5 algorithm.

echo "crypt_default=blf" >> /etc/auth.conf

Enable blf hashing in login.conf as well, along with the UTF-8 character set.


--- login.conf.default	2012-01-02 17:08:05.804291477 -0500
+++ login.conf	2012-01-02 17:08:16.996213774 -0500
@@ -23,7 +23,7 @@
 # AND SEMANTICS'' section of getcap(3) for more escape sequences).
-	:passwd_format=md5:\
+	:passwd_format=blf:\
@@ -44,7 +44,9 @@
-	:umask=022:
+	:umask=022:\
+	:charset=UTF-8:\
+	:lang=en_US.UTF-8:

Rebuild the login database with cap_mkdb /etc/login.conf, then set your new blf-hashed root password with passwd.

Set your time zone information with tzsetup, then create the mail alias database for Sendmail.

cd /etc/mail && make aliases


Set your hostname and configure networking in rc.conf. IPv4 can be configured with DHCP, but IPv6 must be configured manually. See Setting up IPv6. Gigadelic resides in the Fremont datacenter, so this example uses the Fremont IPv6 gateway address.



You can test your IPv6 setup by ping6-ing a known-v6 host.

[root@gigadelic#nicole] ping6 -c 4 ipv6.google.com
PING6(56=40+8+8 bytes) 2001:470:1:41:a800:ff:fd3e:bc0c --> 2001:4860:4001:800::1014
16 bytes from 2001:4860:4001:800::1014, icmp_seq=0 hlim=58 time=2.219 ms
16 bytes from 2001:4860:4001:800::1014, icmp_seq=1 hlim=58 time=2.064 ms
16 bytes from 2001:4860:4001:800::1014, icmp_seq=2 hlim=58 time=2.141 ms
16 bytes from 2001:4860:4001:800::1014, icmp_seq=3 hlim=58 time=2.005 ms

--- ipv6.l.google.com ping6 statistics ---
4 packets transmitted, 4 packets received, 0.0% packet loss
round-trip min/avg/max/std-dev = 2.005/2.107/2.219/0.081 ms

Turn on OpenSSH if you desire remote access. Please realize your SSH daemon will be subject to near-constant automated break-in attempts. Use good passwords, leave AllowRootLogin disabled in /etc/ssh/sshd_config, and consider blocking bad hosts with PF or another firewall.

echo 'sshd_enable="YES"' >> /etc/rc.conf 
service sshd start

With that, you're all done! Fetch a Ports tree with portsnap fetch extract, and install some software.


Spurious LORs under KERNCONF=XEN

The stock KERNCONF XEN has WITNESS enabled, a debugging feature for lock validation. If you're running with options WITNESS you may see some spurious LORs related to the filesystem. They're false positives, as noted in i386/153260, and your system will safely come right out the other end.

# portsnap fetch extract
Looking up portsnap.FreeBSD.org mirrors... 5 mirrors found.
Fetching public key from portsnap2.FreeBSD.org... done.
Fetching snapshot tag from portsnap2.FreeBSD.org... done.
Fetching snapshot metadata... done.
Fetching snapshot generated at Tue Mar 13 00:07:53 UTC 2012:
cbf91c2407f03283d90f5ded759a3af9833e067158f236100% of   66 MB 3469 kBps 00m00s
Extracting snapshot... lock order reversal:
 1st 0xd8459228 bufwait (bufwait) @ /usr/src/sys/kern/vfs_bio.c:2658
 2nd 0xc38db400 dirhash (dirhash) @ /usr/src/sys/ufs/ufs/ufs_dirhash.c:284
KDB: stack backtrace:
X_db_sym_numargs(c03dd42c,c04320d8,c3a19b80,c3a510ac,b395eb11,...) at X_db_sym_numargs+0x146
kdb_backtrace(c018627b,c03e0d7b,c360dae8,c3610a08,e58f2800,...) at kdb_backtrace+0x2a
witness_display_spinlock(c03e0d7b,c38db400,c04047aa,c3610a08,c040442f,...) at witness_display_spinlock+0x75
witness_checkorder(c38db400,9,c040442f,11c,0,...) at witness_checkorder+0x839
_sx_xlock(c38db400,0,c040442f,11c,c3a14e80,...) at _sx_xlock+0x85
ufsdirhash_enduseful(d84591c8,c3a14e80,e58f2930,dd12884c,e58f28d0,...) at ufsdirhash_enduseful+0x2f5
ufsdirhash_add(c3a14e80,e58f2930,84c,e58f28bc,e58f28c0,...) at ufsdirhash_add+0x13
ufs_direnter(c3a51000,c3b07aa0,e58f2930,e58f2bd0,0,...) at ufs_direnter+0x739
ufs_itimes(e58f2bd0,0,e58f2b14,e58f2a78,c03c1bc5,...) at ufs_itimes+0x14bc
ufs_itimes(e58f2b14,e58f2b2c,0,0,e58f2b90,...) at ufs_itimes+0x17d0
VOP_CREATE_APV(c0449ec0,e58f2b14,2,c04059c0,0,...) at VOP_CREATE_APV+0xa5
vn_open_cred(e58f2b90,e58f2c58,1a4,0,c38e5280,...) at vn_open_cred+0x1d3
vn_open(e58f2b90,e58f2c58,1a4,c38f0690,c0a64dd0,...) at vn_open+0x3b
kern_openat(c3a19b80,ffffff9c,2846f1f0,0,a02,...) at kern_openat+0x1ec
kern_open(c3a19b80,2846f1f0,0,a01,1a4,...) at kern_open+0x35
sys_open(c3a19b80,e58f2cfc,c0414576,c03e19be,206,...) at sys_open+0x30
syscall(e58f2d38) at syscall+0x284
Xint0x80_syscall() at Xint0x80_syscall+0x22
--- syscall (5, FreeBSD ELF32, sys_open), eip = 0x2837dd03, esp = 0xbf7fe83c, ebp = 0xbf7fe868 ---
Verifying snapshot integrity... 

Updating the kernel

If you want to update your FreeBSD kernel ELF from within FreeBSD itself, install sysutils/e2fsprogs, create a mount point, and mount your ext2 boot partition.

mkdir /root/boot
mount -t ext2fs /dev/xbd0s1 /root/boot


FreeBSD has powered my instance for a month now with no panics or other show-stoppers.