This modifies libvirt to use CoW images instead of raw images. This is much more efficient and allows us to use the snapshotting capabilities available for qcow2 images. It also changes local storage to be a separate drive instead of a separate partition.

I'm proposing this branch for review to get feedback.  I may have inadvertently broken a few things.  Comments and possible issues:

1. I haven't tested the other hypervisors.  I may have broken libvirt xen support and uml support with this patch.
2. Is it useful to have a use_cow_images param, or should it just be automatic for qemu/kvm and turned off for everything else.
3. create_image is a large annoying method.  I tried to clean it up a bit, but it could probably use a bit more refactoring.
4. disk.py seems to be only used by the hypervisors, so perhaps it should move into virt dir.
5. disk.py/partition() is unused now. Should we leave it in or throw it away?

Comments welcome
This commit is contained in:
Vishvananda Ishaya 2011-01-14 00:19:14 +00:00 committed by Tarmac
commit 14a42e45cf
5 changed files with 295 additions and 254 deletions

View File

@ -83,9 +83,10 @@ if [ "$CMD" == "install" ]; then
sudo /etc/init.d/iscsitarget restart
sudo modprobe kvm
sudo /etc/init.d/libvirt-bin restart
sudo modprobe nbd
sudo apt-get install -y python-twisted python-sqlalchemy python-mox python-greenlet python-carrot
sudo apt-get install -y python-daemon python-eventlet python-gflags python-tornado python-ipy
sudo apt-get install -y python-libvirt python-libxml2 python-routes
sudo apt-get install -y python-daemon python-eventlet python-gflags python-ipy
sudo apt-get install -y python-libvirt python-libxml2 python-routes python-cheetah
if [ "$USE_MYSQL" == 1 ]; then
cat <<MYSQL_PRESEED | debconf-set-selections
mysql-server-5.1 mysql-server/root_password password $MYSQL_PASS

View File

@ -1,205 +0,0 @@
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Utility methods to resize, repartition, and modify disk images.
Includes injection of SSH PGP keys into authorized_keys file.
"""
import os
import tempfile
from nova import exception
from nova import flags
from nova import log as logging
LOG = logging.getLogger('nova.compute.disk')
FLAGS = flags.FLAGS
flags.DEFINE_integer('minimum_root_size', 1024 * 1024 * 1024 * 10,
'minimum size in bytes of root partition')
flags.DEFINE_integer('block_size', 1024 * 1024 * 256,
'block_size to use for dd')
def partition(infile, outfile, local_bytes=0, resize=True,
local_type='ext2', execute=None):
"""
Turns a partition (infile) into a bootable drive image (outfile).
The first 63 sectors (0-62) of the resulting image is a master boot record.
Infile becomes the first primary partition.
If local bytes is specified, a second primary partition is created and
formatted as ext2.
::
In the diagram below, dashes represent drive sectors.
+-----+------. . .-------+------. . .------+
| 0 a| b c|d e|
+-----+------. . .-------+------. . .------+
| mbr | primary partiton | local partition |
+-----+------. . .-------+------. . .------+
"""
sector_size = 512
file_size = os.path.getsize(infile)
if resize and file_size < FLAGS.minimum_root_size:
last_sector = FLAGS.minimum_root_size / sector_size - 1
execute('dd if=/dev/zero of=%s count=1 seek=%d bs=%d'
% (infile, last_sector, sector_size))
execute('e2fsck -fp %s' % infile, check_exit_code=False)
execute('resize2fs %s' % infile)
file_size = FLAGS.minimum_root_size
elif file_size % sector_size != 0:
LOG.warn(_("Input partition size not evenly divisible by"
" sector size: %d / %d"), file_size, sector_size)
primary_sectors = file_size / sector_size
if local_bytes % sector_size != 0:
LOG.warn(_("Bytes for local storage not evenly divisible"
" by sector size: %d / %d"), local_bytes, sector_size)
local_sectors = local_bytes / sector_size
mbr_last = 62 # a
primary_first = mbr_last + 1 # b
primary_last = primary_first + primary_sectors - 1 # c
local_first = primary_last + 1 # d
local_last = local_first + local_sectors - 1 # e
last_sector = local_last # e
# create an empty file
execute('dd if=/dev/zero of=%s count=1 seek=%d bs=%d'
% (outfile, mbr_last, sector_size))
# make mbr partition
execute('parted --script %s mklabel msdos' % outfile)
# append primary file
execute('dd if=%s of=%s bs=%s conv=notrunc,fsync oflag=append'
% (infile, outfile, FLAGS.block_size))
# make primary partition
execute('parted --script %s mkpart primary %ds %ds'
% (outfile, primary_first, primary_last))
if local_bytes > 0:
# make the file bigger
execute('dd if=/dev/zero of=%s count=1 seek=%d bs=%d'
% (outfile, last_sector, sector_size))
# make and format local partition
execute('parted --script %s mkpartfs primary %s %ds %ds'
% (outfile, local_type, local_first, local_last))
def extend(image, size, execute):
file_size = os.path.getsize(image)
if file_size >= size:
return
return execute('truncate -s size %s' % (image,))
def inject_data(image, key=None, net=None, partition=None, execute=None):
"""Injects a ssh key and optionally net data into a disk image.
it will mount the image as a fully partitioned disk and attempt to inject
into the specified partition number.
If partition is not specified it mounts the image as a single partition.
"""
out, err = execute('sudo losetup --find --show %s' % image)
if err:
raise exception.Error(_('Could not attach image to loopback: %s')
% err)
device = out.strip()
try:
if not partition is None:
# create partition
out, err = execute('sudo kpartx -a %s' % device)
if err:
raise exception.Error(_('Failed to load partition: %s') % err)
mapped_device = '/dev/mapper/%sp%s' % (device.split('/')[-1],
partition)
else:
mapped_device = device
# We can only loopback mount raw images. If the device isn't there,
# it's normally because it's a .vmdk or a .vdi etc
if not os.path.exists(mapped_device):
raise exception.Error('Mapped device was not found (we can'
' only inject raw disk images): %s' %
mapped_device)
# Configure ext2fs so that it doesn't auto-check every N boots
out, err = execute('sudo tune2fs -c 0 -i 0 %s' % mapped_device)
tmpdir = tempfile.mkdtemp()
try:
# mount loopback to dir
out, err = execute(
'sudo mount %s %s' % (mapped_device, tmpdir))
if err:
raise exception.Error(_('Failed to mount filesystem: %s')
% err)
try:
if key:
# inject key file
_inject_key_into_fs(key, tmpdir, execute=execute)
if net:
_inject_net_into_fs(net, tmpdir, execute=execute)
finally:
# unmount device
execute('sudo umount %s' % mapped_device)
finally:
# remove temporary directory
execute('rmdir %s' % tmpdir)
if not partition is None:
# remove partitions
execute('sudo kpartx -d %s' % device)
finally:
# remove loopback
execute('sudo losetup --detach %s' % device)
def _inject_key_into_fs(key, fs, execute=None):
"""Add the given public ssh key to root's authorized_keys.
key is an ssh key string.
fs is the path to the base of the filesystem into which to inject the key.
"""
sshdir = os.path.join(fs, 'root', '.ssh')
execute('sudo mkdir -p %s' % sshdir) # existing dir doesn't matter
execute('sudo chown root %s' % sshdir)
execute('sudo chmod 700 %s' % sshdir)
keyfile = os.path.join(sshdir, 'authorized_keys')
execute('sudo tee -a %s' % keyfile, '\n' + key.strip() + '\n')
def _inject_net_into_fs(net, fs, execute=None):
"""Inject /etc/network/interfaces into the filesystem rooted at fs.
net is the contents of /etc/network/interfaces.
"""
netdir = os.path.join(os.path.join(fs, 'etc'), 'network')
execute('sudo mkdir -p %s' % netdir) # existing dir doesn't matter
execute('sudo chown root:root %s' % netdir)
execute('sudo chmod 755 %s' % netdir)
netfile = os.path.join(netdir, 'interfaces')
execute('sudo tee %s' % netfile, net)

186
nova/virt/disk.py Normal file
View File

@ -0,0 +1,186 @@
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Utility methods to resize, repartition, and modify disk images.
Includes injection of SSH PGP keys into authorized_keys file.
"""
import os
import tempfile
import time
from nova import exception
from nova import flags
from nova import log as logging
from nova import utils
LOG = logging.getLogger('nova.compute.disk')
FLAGS = flags.FLAGS
flags.DEFINE_integer('minimum_root_size', 1024 * 1024 * 1024 * 10,
'minimum size in bytes of root partition')
flags.DEFINE_integer('block_size', 1024 * 1024 * 256,
'block_size to use for dd')
def extend(image, size):
"""Increase image to size"""
file_size = os.path.getsize(image)
if file_size >= size:
return
utils.execute('truncate -s %s %s' % (size, image))
# NOTE(vish): attempts to resize filesystem
utils.execute('e2fsck -fp %s' % image, check_exit_code=False)
utils.execute('resize2fs %s' % image, check_exit_code=False)
def inject_data(image, key=None, net=None, partition=None, nbd=False):
"""Injects a ssh key and optionally net data into a disk image.
it will mount the image as a fully partitioned disk and attempt to inject
into the specified partition number.
If partition is not specified it mounts the image as a single partition.
"""
device = _link_device(image, nbd)
try:
if not partition is None:
# create partition
out, err = utils.execute('sudo kpartx -a %s' % device)
if err:
raise exception.Error(_('Failed to load partition: %s') % err)
mapped_device = '/dev/mapper/%sp%s' % (device.split('/')[-1],
partition)
else:
mapped_device = device
# We can only loopback mount raw images. If the device isn't there,
# it's normally because it's a .vmdk or a .vdi etc
if not os.path.exists(mapped_device):
raise exception.Error('Mapped device was not found (we can'
' only inject raw disk images): %s' %
mapped_device)
# Configure ext2fs so that it doesn't auto-check every N boots
out, err = utils.execute('sudo tune2fs -c 0 -i 0 %s' % mapped_device)
tmpdir = tempfile.mkdtemp()
try:
# mount loopback to dir
out, err = utils.execute(
'sudo mount %s %s' % (mapped_device, tmpdir))
if err:
raise exception.Error(_('Failed to mount filesystem: %s')
% err)
try:
if key:
# inject key file
_inject_key_into_fs(key, tmpdir)
if net:
_inject_net_into_fs(net, tmpdir)
finally:
# unmount device
utils.execute('sudo umount %s' % mapped_device)
finally:
# remove temporary directory
utils.execute('rmdir %s' % tmpdir)
if not partition is None:
# remove partitions
utils.execute('sudo kpartx -d %s' % device)
finally:
_unlink_device(device, nbd)
def _link_device(image, nbd):
"""Link image to device using loopback or nbd"""
if nbd:
device = _allocate_device()
utils.execute('sudo qemu-nbd -c %s %s' % (device, image))
# NOTE(vish): this forks into another process, so give it a chance
# to set up before continuuing
for i in xrange(10):
if os.path.exists("/sys/block/%s/pid" % os.path.basename(device)):
return device
time.sleep(1)
raise exception.Error(_('nbd device %s did not show up') % device)
else:
out, err = utils.execute('sudo losetup --find --show %s' % image)
if err:
raise exception.Error(_('Could not attach image to loopback: %s')
% err)
return out.strip()
def _unlink_device(device, nbd):
"""Unlink image from device using loopback or nbd"""
if nbd:
utils.execute('sudo qemu-nbd -d %s' % device)
_free_device(device)
else:
utils.execute('sudo losetup --detach %s' % device)
_DEVICES = ['/dev/nbd%s' % i for i in xrange(16)]
def _allocate_device():
# NOTE(vish): This assumes no other processes are allocating nbd devices.
# It may race cause a race condition if multiple
# workers are running on a given machine.
while True:
if not _DEVICES:
raise exception.Error(_('No free nbd devices'))
device = _DEVICES.pop()
if not os.path.exists("/sys/block/%s/pid" % os.path.basename(device)):
break
return device
def _free_device(device):
_DEVICES.append(device)
def _inject_key_into_fs(key, fs):
"""Add the given public ssh key to root's authorized_keys.
key is an ssh key string.
fs is the path to the base of the filesystem into which to inject the key.
"""
sshdir = os.path.join(fs, 'root', '.ssh')
utils.execute('sudo mkdir -p %s' % sshdir) # existing dir doesn't matter
utils.execute('sudo chown root %s' % sshdir)
utils.execute('sudo chmod 700 %s' % sshdir)
keyfile = os.path.join(sshdir, 'authorized_keys')
utils.execute('sudo tee -a %s' % keyfile, '\n' + key.strip() + '\n')
def _inject_net_into_fs(net, fs):
"""Inject /etc/network/interfaces into the filesystem rooted at fs.
net is the contents of /etc/network/interfaces.
"""
netdir = os.path.join(os.path.join(fs, 'etc'), 'network')
utils.execute('sudo mkdir -p %s' % netdir) # existing dir doesn't matter
utils.execute('sudo chown root:root %s' % netdir)
utils.execute('sudo chmod 755 %s' % netdir)
netfile = os.path.join(netdir, 'interfaces')
utils.execute('sudo tee %s' % netfile, net)

View File

@ -7,13 +7,13 @@
#set $disk_bus = 'uml'
<type>uml</type>
<kernel>/usr/bin/linux</kernel>
<root>/dev/ubda1</root>
<root>/dev/ubda</root>
#else
#if $type == 'xen'
#set $disk_prefix = 'sd'
#set $disk_bus = 'scsi'
<type>linux</type>
<root>/dev/xvda1</root>
<root>/dev/xvda</root>
#else
#set $disk_prefix = 'vd'
#set $disk_bus = 'virtio'
@ -28,7 +28,7 @@
#if $type == 'xen'
<cmdline>ro</cmdline>
#else
<cmdline>root=/dev/vda1 console=ttyS0</cmdline>
<cmdline>root=/dev/vda console=ttyS0</cmdline>
#end if
#if $getVar('ramdisk', None)
<initrd>${ramdisk}</initrd>
@ -46,18 +46,28 @@
<devices>
#if $getVar('rescue', False)
<disk type='file'>
<driver type='${driver_type}'/>
<source file='${basepath}/rescue-disk'/>
<target dev='${disk_prefix}a' bus='${disk_bus}'/>
</disk>
<disk type='file'>
<driver type='${driver_type}'/>
<source file='${basepath}/disk'/>
<target dev='${disk_prefix}b' bus='${disk_bus}'/>
</disk>
#else
<disk type='file'>
<driver type='${driver_type}'/>
<source file='${basepath}/disk'/>
<target dev='${disk_prefix}a' bus='${disk_bus}'/>
</disk>
#if $getVar('local', False)
<disk type='file'>
<driver type='${driver_type}'/>
<source file='${basepath}/local'/>
<target dev='${disk_prefix}b' bus='${disk_bus}'/>
</disk>
#end if
#end if
<interface type='bridge'>
<source bridge='${bridge_name}'/>

View File

@ -58,9 +58,9 @@ from nova import log as logging
from nova import utils
#from nova.api import context
from nova.auth import manager
from nova.compute import disk
from nova.compute import instance_types
from nova.compute import power_state
from nova.virt import disk
from nova.virt import images
libvirt = None
@ -91,6 +91,9 @@ flags.DEFINE_string('libvirt_uri',
flags.DEFINE_bool('allow_project_net_traffic',
True,
'Whether to allow in project network traffic')
flags.DEFINE_bool('use_cow_images',
True,
'Whether to use cow images')
flags.DEFINE_string('ajaxterm_portrange',
'10000-12000',
'Range of ports that ajaxterm should randomly try to bind')
@ -480,19 +483,57 @@ class LibvirtConnection(object):
subprocess.Popen(cmd, shell=True)
return {'token': token, 'host': host, 'port': port}
def _cache_image(self, fn, target, fname, cow=False, *args, **kwargs):
"""Wrapper for a method that creates an image that caches the image.
This wrapper will save the image into a common store and create a
copy for use by the hypervisor.
The underlying method should specify a kwarg of target representing
where the image will be saved.
fname is used as the filename of the base image. The filename needs
to be unique to a given image.
If cow is True, it will make a CoW image instead of a copy.
"""
if not os.path.exists(target):
base_dir = os.path.join(FLAGS.instances_path, '_base')
if not os.path.exists(base_dir):
os.mkdir(base_dir)
os.chmod(base_dir, 0777)
base = os.path.join(base_dir, fname)
if not os.path.exists(base):
fn(target=base, *args, **kwargs)
if cow:
utils.execute('qemu-img create -f qcow2 -o '
'cluster_size=2M,backing_file=%s %s'
% (base, target))
else:
utils.execute('cp %s %s' % (base, target))
def _fetch_image(self, target, image_id, user, project, size=None):
"""Grab image and optionally attempt to resize it"""
images.fetch(image_id, target, user, project)
if size:
disk.extend(target, size)
def _create_local(self, target, local_gb):
"""Create a blank image of specified size"""
utils.execute('truncate %s -s %dG' % (target, local_gb))
# TODO(vish): should we format disk by default?
def _create_image(self, inst, libvirt_xml, prefix='', disk_images=None):
# syntactic nicety
basepath = lambda fname = '', prefix = prefix: os.path.join(
FLAGS.instances_path,
inst['name'],
prefix + fname)
def basepath(fname='', prefix=prefix):
return os.path.join(FLAGS.instances_path,
inst['name'],
prefix + fname)
# ensure directories exist and are writable
utils.execute('mkdir -p %s' % basepath(prefix=''))
utils.execute('chmod 0777 %s' % basepath(prefix=''))
# TODO(termie): these are blocking calls, it would be great
# if they weren't.
LOG.info(_('instance %s: Creating image'), inst['name'])
f = open(basepath('libvirt.xml'), 'w')
f.write(libvirt_xml)
@ -509,23 +550,44 @@ class LibvirtConnection(object):
disk_images = {'image_id': inst['image_id'],
'kernel_id': inst['kernel_id'],
'ramdisk_id': inst['ramdisk_id']}
if not os.path.exists(basepath('disk')):
images.fetch(inst.image_id, basepath('disk-raw'), user,
project)
if inst['kernel_id']:
if not os.path.exists(basepath('kernel')):
images.fetch(inst['kernel_id'], basepath('kernel'),
user, project)
if inst['ramdisk_id']:
if not os.path.exists(basepath('ramdisk')):
images.fetch(inst['ramdisk_id'], basepath('ramdisk'),
user, project)
if disk_images['kernel_id']:
self._cache_image(fn=self._fetch_image,
target=basepath('kernel'),
fname=disk_images['kernel_id'],
image_id=disk_images['kernel_id'],
user=user,
project=project)
if disk_images['ramdisk_id']:
self._cache_image(fn=self._fetch_image,
target=basepath('ramdisk'),
fname=disk_images['ramdisk_id'],
image_id=disk_images['ramdisk_id'],
user=user,
project=project)
def execute(cmd, process_input=None, check_exit_code=True):
return utils.execute(cmd=cmd,
process_input=process_input,
check_exit_code=check_exit_code)
root_fname = disk_images['image_id']
size = FLAGS.minimum_root_size
if inst['instance_type'] == 'm1.tiny' or prefix == 'rescue-':
size = None
root_fname += "_sm"
self._cache_image(fn=self._fetch_image,
target=basepath('disk'),
fname=root_fname,
cow=FLAGS.use_cow_images,
image_id=disk_images['image_id'],
user=user,
project=project,
size=size)
type_data = instance_types.INSTANCE_TYPES[inst['instance_type']]
if type_data['local_gb']:
self._cache_image(fn=self._create_local,
target=basepath('local'),
fname="local_%s" % type_data['local_gb'],
cow=FLAGS.use_cow_images,
local_gb=type_data['local_gb'])
# For now, we assume that if we're not using a kernel, we're using a
# partitioned disk image where the target partition is the first
@ -555,34 +617,15 @@ class LibvirtConnection(object):
LOG.info(_('instance %s: injecting net into image %s'),
inst['name'], inst.image_id)
try:
disk.inject_data(basepath('disk-raw'), key, net,
disk.inject_data(basepath('disk'), key, net,
partition=target_partition,
execute=execute)
nbd=FLAGS.use_cow_images)
except Exception as e:
# This could be a windows image, or a vmdk format disk
LOG.warn(_('instance %s: ignoring error injecting data'
' into image %s (%s)'),
inst['name'], inst.image_id, e)
if inst['kernel_id']:
if os.path.exists(basepath('disk')):
utils.execute('rm -f %s' % basepath('disk'))
local_bytes = (instance_types.INSTANCE_TYPES[inst.instance_type]
['local_gb']
* 1024 * 1024 * 1024)
resize = True
if inst['instance_type'] == 'm1.tiny' or prefix == 'rescue-':
resize = False
if inst['kernel_id']:
disk.partition(basepath('disk-raw'), basepath('disk'),
local_bytes, resize, execute=execute)
else:
os.rename(basepath('disk-raw'), basepath('disk'))
disk.extend(basepath('disk'), local_bytes, execute=execute)
if FLAGS.libvirt_type == 'uml':
utils.execute('sudo chown root %s' % basepath('disk'))
@ -610,6 +653,10 @@ class LibvirtConnection(object):
"value=\"%s\" />\n") % (net, mask)
else:
extra_params = "\n"
if FLAGS.use_cow_images:
driver_type = 'qcow2'
else:
driver_type = 'raw'
xml_info = {'type': FLAGS.libvirt_type,
'name': instance['name'],
@ -622,7 +669,9 @@ class LibvirtConnection(object):
'ip_address': ip_address,
'dhcp_server': dhcp_server,
'extra_params': extra_params,
'rescue': rescue}
'rescue': rescue,
'local': instance_type['local_gb'],
'driver_type': driver_type}
if not rescue:
if instance['kernel_id']:
xml_info['kernel'] = xml_info['basepath'] + "/kernel"