Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (22 commits) ceph: do not carry i_lock for readdir from dcache fs/ceph/xattr.c: Use kmemdup rbd: passing wrong variable to bvec_kunmap_irq() rbd: null vs ERR_PTR ceph: fix num_pages_free accounting in pagelist ceph: add CEPH_MDS_OP_SETDIRLAYOUT and associated ioctl. ceph: don't crash when passed bad mount options ceph: fix debugfs warnings block: rbd: removing unnecessary test block: rbd: fixed may leaks ceph: switch from BKL to lock_flocks() ceph: preallocate flock state without locks held ceph: add pagelist_reserve, pagelist_truncate, pagelist_set_cursor ceph: use mapping->nrpages to determine if mapping is empty ceph: only invalidate on check_caps if we actually have pages ceph: do not hide .snap in root directory rbd: introduce rados block device (rbd), based on libceph ceph: factor out libceph from Ceph file system ceph-rbd: osdc support for osd call and rollback operations ceph: messenger and osdc changes for rbd ...
This commit is contained in:
@@ -293,6 +293,7 @@ source "net/wimax/Kconfig"
|
||||
source "net/rfkill/Kconfig"
|
||||
source "net/9p/Kconfig"
|
||||
source "net/caif/Kconfig"
|
||||
source "net/ceph/Kconfig"
|
||||
|
||||
|
||||
endif # if NET
|
||||
|
||||
@@ -68,3 +68,4 @@ obj-$(CONFIG_SYSCTL) += sysctl_net.o
|
||||
endif
|
||||
obj-$(CONFIG_WIMAX) += wimax/
|
||||
obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/
|
||||
obj-$(CONFIG_CEPH_LIB) += ceph/
|
||||
|
||||
28
net/ceph/Kconfig
Normal file
28
net/ceph/Kconfig
Normal file
@@ -0,0 +1,28 @@
|
||||
config CEPH_LIB
|
||||
tristate "Ceph core library (EXPERIMENTAL)"
|
||||
depends on INET && EXPERIMENTAL
|
||||
select LIBCRC32C
|
||||
select CRYPTO_AES
|
||||
select CRYPTO
|
||||
default n
|
||||
help
|
||||
Choose Y or M here to include cephlib, which provides the
|
||||
common functionality to both the Ceph filesystem and
|
||||
to the rados block device (rbd).
|
||||
|
||||
More information at http://ceph.newdream.net/.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CEPH_LIB_PRETTYDEBUG
|
||||
bool "Include file:line in ceph debug output"
|
||||
depends on CEPH_LIB
|
||||
default n
|
||||
help
|
||||
If you say Y here, debug output will include a filename and
|
||||
line to aid debugging. This increases kernel size and slows
|
||||
execution slightly when debug call sites are enabled (e.g.,
|
||||
via CONFIG_DYNAMIC_DEBUG).
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
37
net/ceph/Makefile
Normal file
37
net/ceph/Makefile
Normal file
@@ -0,0 +1,37 @@
|
||||
#
|
||||
# Makefile for CEPH filesystem.
|
||||
#
|
||||
|
||||
ifneq ($(KERNELRELEASE),)
|
||||
|
||||
obj-$(CONFIG_CEPH_LIB) += libceph.o
|
||||
|
||||
libceph-objs := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
|
||||
mon_client.o \
|
||||
osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
|
||||
debugfs.o \
|
||||
auth.o auth_none.o \
|
||||
crypto.o armor.o \
|
||||
auth_x.o \
|
||||
ceph_fs.o ceph_strings.o ceph_hash.o \
|
||||
pagevec.o
|
||||
|
||||
else
|
||||
#Otherwise we were called directly from the command
|
||||
# line; invoke the kernel build system.
|
||||
|
||||
KERNELDIR ?= /lib/modules/$(shell uname -r)/build
|
||||
PWD := $(shell pwd)
|
||||
|
||||
default: all
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules
|
||||
|
||||
modules_install:
|
||||
$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_CEPH_LIB=m modules_install
|
||||
|
||||
clean:
|
||||
$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
|
||||
|
||||
endif
|
||||
103
net/ceph/armor.c
Normal file
103
net/ceph/armor.c
Normal file
@@ -0,0 +1,103 @@
|
||||
|
||||
#include <linux/errno.h>
|
||||
|
||||
int ceph_armor(char *dst, const char *src, const char *end);
|
||||
int ceph_unarmor(char *dst, const char *src, const char *end);
|
||||
|
||||
/*
|
||||
* base64 encode/decode.
|
||||
*/
|
||||
|
||||
static const char *pem_key =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
static int encode_bits(int c)
|
||||
{
|
||||
return pem_key[c];
|
||||
}
|
||||
|
||||
static int decode_bits(char c)
|
||||
{
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return c - 'A';
|
||||
if (c >= 'a' && c <= 'z')
|
||||
return c - 'a' + 26;
|
||||
if (c >= '0' && c <= '9')
|
||||
return c - '0' + 52;
|
||||
if (c == '+')
|
||||
return 62;
|
||||
if (c == '/')
|
||||
return 63;
|
||||
if (c == '=')
|
||||
return 0; /* just non-negative, please */
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int ceph_armor(char *dst, const char *src, const char *end)
|
||||
{
|
||||
int olen = 0;
|
||||
int line = 0;
|
||||
|
||||
while (src < end) {
|
||||
unsigned char a, b, c;
|
||||
|
||||
a = *src++;
|
||||
*dst++ = encode_bits(a >> 2);
|
||||
if (src < end) {
|
||||
b = *src++;
|
||||
*dst++ = encode_bits(((a & 3) << 4) | (b >> 4));
|
||||
if (src < end) {
|
||||
c = *src++;
|
||||
*dst++ = encode_bits(((b & 15) << 2) |
|
||||
(c >> 6));
|
||||
*dst++ = encode_bits(c & 63);
|
||||
} else {
|
||||
*dst++ = encode_bits((b & 15) << 2);
|
||||
*dst++ = '=';
|
||||
}
|
||||
} else {
|
||||
*dst++ = encode_bits(((a & 3) << 4));
|
||||
*dst++ = '=';
|
||||
*dst++ = '=';
|
||||
}
|
||||
olen += 4;
|
||||
line += 4;
|
||||
if (line == 64) {
|
||||
line = 0;
|
||||
*(dst++) = '\n';
|
||||
olen++;
|
||||
}
|
||||
}
|
||||
return olen;
|
||||
}
|
||||
|
||||
int ceph_unarmor(char *dst, const char *src, const char *end)
|
||||
{
|
||||
int olen = 0;
|
||||
|
||||
while (src < end) {
|
||||
int a, b, c, d;
|
||||
|
||||
if (src < end && src[0] == '\n')
|
||||
src++;
|
||||
if (src + 4 > end)
|
||||
return -EINVAL;
|
||||
a = decode_bits(src[0]);
|
||||
b = decode_bits(src[1]);
|
||||
c = decode_bits(src[2]);
|
||||
d = decode_bits(src[3]);
|
||||
if (a < 0 || b < 0 || c < 0 || d < 0)
|
||||
return -EINVAL;
|
||||
|
||||
*dst++ = (a << 2) | (b >> 4);
|
||||
if (src[2] == '=')
|
||||
return olen + 1;
|
||||
*dst++ = ((b & 15) << 4) | (c >> 2);
|
||||
if (src[3] == '=')
|
||||
return olen + 2;
|
||||
*dst++ = ((c & 3) << 6) | d;
|
||||
olen += 3;
|
||||
src += 4;
|
||||
}
|
||||
return olen;
|
||||
}
|
||||
259
net/ceph/auth.c
Normal file
259
net/ceph/auth.c
Normal file
@@ -0,0 +1,259 @@
|
||||
#include <linux/ceph/ceph_debug.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <linux/ceph/types.h>
|
||||
#include <linux/ceph/decode.h>
|
||||
#include <linux/ceph/libceph.h>
|
||||
#include <linux/ceph/messenger.h>
|
||||
#include "auth_none.h"
|
||||
#include "auth_x.h"
|
||||
|
||||
|
||||
/*
|
||||
* get protocol handler
|
||||
*/
|
||||
static u32 supported_protocols[] = {
|
||||
CEPH_AUTH_NONE,
|
||||
CEPH_AUTH_CEPHX
|
||||
};
|
||||
|
||||
static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
|
||||
{
|
||||
switch (protocol) {
|
||||
case CEPH_AUTH_NONE:
|
||||
return ceph_auth_none_init(ac);
|
||||
case CEPH_AUTH_CEPHX:
|
||||
return ceph_x_init(ac);
|
||||
default:
|
||||
return -ENOENT;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* setup, teardown.
|
||||
*/
|
||||
struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret)
|
||||
{
|
||||
struct ceph_auth_client *ac;
|
||||
int ret;
|
||||
|
||||
dout("auth_init name '%s' secret '%s'\n", name, secret);
|
||||
|
||||
ret = -ENOMEM;
|
||||
ac = kzalloc(sizeof(*ac), GFP_NOFS);
|
||||
if (!ac)
|
||||
goto out;
|
||||
|
||||
ac->negotiating = true;
|
||||
if (name)
|
||||
ac->name = name;
|
||||
else
|
||||
ac->name = CEPH_AUTH_NAME_DEFAULT;
|
||||
dout("auth_init name %s secret %s\n", ac->name, secret);
|
||||
ac->secret = secret;
|
||||
return ac;
|
||||
|
||||
out:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
void ceph_auth_destroy(struct ceph_auth_client *ac)
|
||||
{
|
||||
dout("auth_destroy %p\n", ac);
|
||||
if (ac->ops)
|
||||
ac->ops->destroy(ac);
|
||||
kfree(ac);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset occurs when reconnecting to the monitor.
|
||||
*/
|
||||
void ceph_auth_reset(struct ceph_auth_client *ac)
|
||||
{
|
||||
dout("auth_reset %p\n", ac);
|
||||
if (ac->ops && !ac->negotiating)
|
||||
ac->ops->reset(ac);
|
||||
ac->negotiating = true;
|
||||
}
|
||||
|
||||
int ceph_entity_name_encode(const char *name, void **p, void *end)
|
||||
{
|
||||
int len = strlen(name);
|
||||
|
||||
if (*p + 2*sizeof(u32) + len > end)
|
||||
return -ERANGE;
|
||||
ceph_encode_32(p, CEPH_ENTITY_TYPE_CLIENT);
|
||||
ceph_encode_32(p, len);
|
||||
ceph_encode_copy(p, name, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initiate protocol negotiation with monitor. Include entity name
|
||||
* and list supported protocols.
|
||||
*/
|
||||
int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
|
||||
{
|
||||
struct ceph_mon_request_header *monhdr = buf;
|
||||
void *p = monhdr + 1, *end = buf + len, *lenp;
|
||||
int i, num;
|
||||
int ret;
|
||||
|
||||
dout("auth_build_hello\n");
|
||||
monhdr->have_version = 0;
|
||||
monhdr->session_mon = cpu_to_le16(-1);
|
||||
monhdr->session_mon_tid = 0;
|
||||
|
||||
ceph_encode_32(&p, 0); /* no protocol, yet */
|
||||
|
||||
lenp = p;
|
||||
p += sizeof(u32);
|
||||
|
||||
ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
|
||||
ceph_encode_8(&p, 1);
|
||||
num = ARRAY_SIZE(supported_protocols);
|
||||
ceph_encode_32(&p, num);
|
||||
ceph_decode_need(&p, end, num * sizeof(u32), bad);
|
||||
for (i = 0; i < num; i++)
|
||||
ceph_encode_32(&p, supported_protocols[i]);
|
||||
|
||||
ret = ceph_entity_name_encode(ac->name, &p, end);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ceph_decode_need(&p, end, sizeof(u64), bad);
|
||||
ceph_encode_64(&p, ac->global_id);
|
||||
|
||||
ceph_encode_32(&lenp, p - lenp - sizeof(u32));
|
||||
return p - buf;
|
||||
|
||||
bad:
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
static int ceph_build_auth_request(struct ceph_auth_client *ac,
|
||||
void *msg_buf, size_t msg_len)
|
||||
{
|
||||
struct ceph_mon_request_header *monhdr = msg_buf;
|
||||
void *p = monhdr + 1;
|
||||
void *end = msg_buf + msg_len;
|
||||
int ret;
|
||||
|
||||
monhdr->have_version = 0;
|
||||
monhdr->session_mon = cpu_to_le16(-1);
|
||||
monhdr->session_mon_tid = 0;
|
||||
|
||||
ceph_encode_32(&p, ac->protocol);
|
||||
|
||||
ret = ac->ops->build_request(ac, p + sizeof(u32), end);
|
||||
if (ret < 0) {
|
||||
pr_err("error %d building auth method %s request\n", ret,
|
||||
ac->ops->name);
|
||||
return ret;
|
||||
}
|
||||
dout(" built request %d bytes\n", ret);
|
||||
ceph_encode_32(&p, ret);
|
||||
return p + ret - msg_buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle auth message from monitor.
|
||||
*/
|
||||
int ceph_handle_auth_reply(struct ceph_auth_client *ac,
|
||||
void *buf, size_t len,
|
||||
void *reply_buf, size_t reply_len)
|
||||
{
|
||||
void *p = buf;
|
||||
void *end = buf + len;
|
||||
int protocol;
|
||||
s32 result;
|
||||
u64 global_id;
|
||||
void *payload, *payload_end;
|
||||
int payload_len;
|
||||
char *result_msg;
|
||||
int result_msg_len;
|
||||
int ret = -EINVAL;
|
||||
|
||||
dout("handle_auth_reply %p %p\n", p, end);
|
||||
ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
|
||||
protocol = ceph_decode_32(&p);
|
||||
result = ceph_decode_32(&p);
|
||||
global_id = ceph_decode_64(&p);
|
||||
payload_len = ceph_decode_32(&p);
|
||||
payload = p;
|
||||
p += payload_len;
|
||||
ceph_decode_need(&p, end, sizeof(u32), bad);
|
||||
result_msg_len = ceph_decode_32(&p);
|
||||
result_msg = p;
|
||||
p += result_msg_len;
|
||||
if (p != end)
|
||||
goto bad;
|
||||
|
||||
dout(" result %d '%.*s' gid %llu len %d\n", result, result_msg_len,
|
||||
result_msg, global_id, payload_len);
|
||||
|
||||
payload_end = payload + payload_len;
|
||||
|
||||
if (global_id && ac->global_id != global_id) {
|
||||
dout(" set global_id %lld -> %lld\n", ac->global_id, global_id);
|
||||
ac->global_id = global_id;
|
||||
}
|
||||
|
||||
if (ac->negotiating) {
|
||||
/* server does not support our protocols? */
|
||||
if (!protocol && result < 0) {
|
||||
ret = result;
|
||||
goto out;
|
||||
}
|
||||
/* set up (new) protocol handler? */
|
||||
if (ac->protocol && ac->protocol != protocol) {
|
||||
ac->ops->destroy(ac);
|
||||
ac->protocol = 0;
|
||||
ac->ops = NULL;
|
||||
}
|
||||
if (ac->protocol != protocol) {
|
||||
ret = ceph_auth_init_protocol(ac, protocol);
|
||||
if (ret) {
|
||||
pr_err("error %d on auth protocol %d init\n",
|
||||
ret, protocol);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ac->negotiating = false;
|
||||
}
|
||||
|
||||
ret = ac->ops->handle_reply(ac, result, payload, payload_end);
|
||||
if (ret == -EAGAIN) {
|
||||
return ceph_build_auth_request(ac, reply_buf, reply_len);
|
||||
} else if (ret) {
|
||||
pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
|
||||
bad:
|
||||
pr_err("failed to decode auth msg\n");
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ceph_build_auth(struct ceph_auth_client *ac,
|
||||
void *msg_buf, size_t msg_len)
|
||||
{
|
||||
if (!ac->protocol)
|
||||
return ceph_auth_build_hello(ac, msg_buf, msg_len);
|
||||
BUG_ON(!ac->ops);
|
||||
if (ac->ops->should_authenticate(ac))
|
||||
return ceph_build_auth_request(ac, msg_buf, msg_len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
|
||||
{
|
||||
if (!ac->ops)
|
||||
return 0;
|
||||
return ac->ops->is_authenticated(ac);
|
||||
}
|
||||
132
net/ceph/auth_none.c
Normal file
132
net/ceph/auth_none.c
Normal file
@@ -0,0 +1,132 @@
|
||||
|
||||
#include <linux/ceph/ceph_debug.h>
|
||||
|
||||
#include <linux/err.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <linux/ceph/decode.h>
|
||||
#include <linux/ceph/auth.h>
|
||||
|
||||
#include "auth_none.h"
|
||||
|
||||
static void reset(struct ceph_auth_client *ac)
|
||||
{
|
||||
struct ceph_auth_none_info *xi = ac->private;
|
||||
|
||||
xi->starting = true;
|
||||
xi->built_authorizer = false;
|
||||
}
|
||||
|
||||
static void destroy(struct ceph_auth_client *ac)
|
||||
{
|
||||
kfree(ac->private);
|
||||
ac->private = NULL;
|
||||
}
|
||||
|
||||
static int is_authenticated(struct ceph_auth_client *ac)
|
||||
{
|
||||
struct ceph_auth_none_info *xi = ac->private;
|
||||
|
||||
return !xi->starting;
|
||||
}
|
||||
|
||||
static int should_authenticate(struct ceph_auth_client *ac)
|
||||
{
|
||||
struct ceph_auth_none_info *xi = ac->private;
|
||||
|
||||
return xi->starting;
|
||||
}
|
||||
|
||||
/*
|
||||
* the generic auth code decode the global_id, and we carry no actual
|
||||
* authenticate state, so nothing happens here.
|
||||
*/
|
||||
static int handle_reply(struct ceph_auth_client *ac, int result,
|
||||
void *buf, void *end)
|
||||
{
|
||||
struct ceph_auth_none_info *xi = ac->private;
|
||||
|
||||
xi->starting = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* build an 'authorizer' with our entity_name and global_id. we can
|
||||
* reuse a single static copy since it is identical for all services
|
||||
* we connect to.
|
||||
*/
|
||||
static int ceph_auth_none_create_authorizer(
|
||||
struct ceph_auth_client *ac, int peer_type,
|
||||
struct ceph_authorizer **a,
|
||||
void **buf, size_t *len,
|
||||
void **reply_buf, size_t *reply_len)
|
||||
{
|
||||
struct ceph_auth_none_info *ai = ac->private;
|
||||
struct ceph_none_authorizer *au = &ai->au;
|
||||
void *p, *end;
|
||||
int ret;
|
||||
|
||||
if (!ai->built_authorizer) {
|
||||
p = au->buf;
|
||||
end = p + sizeof(au->buf);
|
||||
ceph_encode_8(&p, 1);
|
||||
ret = ceph_entity_name_encode(ac->name, &p, end - 8);
|
||||
if (ret < 0)
|
||||
goto bad;
|
||||
ceph_decode_need(&p, end, sizeof(u64), bad2);
|
||||
ceph_encode_64(&p, ac->global_id);
|
||||
au->buf_len = p - (void *)au->buf;
|
||||
ai->built_authorizer = true;
|
||||
dout("built authorizer len %d\n", au->buf_len);
|
||||
}
|
||||
|
||||
*a = (struct ceph_authorizer *)au;
|
||||
*buf = au->buf;
|
||||
*len = au->buf_len;
|
||||
*reply_buf = au->reply_buf;
|
||||
*reply_len = sizeof(au->reply_buf);
|
||||
return 0;
|
||||
|
||||
bad2:
|
||||
ret = -ERANGE;
|
||||
bad:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac,
|
||||
struct ceph_authorizer *a)
|
||||
{
|
||||
/* nothing to do */
|
||||
}
|
||||
|
||||
static const struct ceph_auth_client_ops ceph_auth_none_ops = {
|
||||
.name = "none",
|
||||
.reset = reset,
|
||||
.destroy = destroy,
|
||||
.is_authenticated = is_authenticated,
|
||||
.should_authenticate = should_authenticate,
|
||||
.handle_reply = handle_reply,
|
||||
.create_authorizer = ceph_auth_none_create_authorizer,
|
||||
.destroy_authorizer = ceph_auth_none_destroy_authorizer,
|
||||
};
|
||||
|
||||
int ceph_auth_none_init(struct ceph_auth_client *ac)
|
||||
{
|
||||
struct ceph_auth_none_info *xi;
|
||||
|
||||
dout("ceph_auth_none_init %p\n", ac);
|
||||
xi = kzalloc(sizeof(*xi), GFP_NOFS);
|
||||
if (!xi)
|
||||
return -ENOMEM;
|
||||
|
||||
xi->starting = true;
|
||||
xi->built_authorizer = false;
|
||||
|
||||
ac->protocol = CEPH_AUTH_NONE;
|
||||
ac->private = xi;
|
||||
ac->ops = &ceph_auth_none_ops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
29
net/ceph/auth_none.h
Normal file
29
net/ceph/auth_none.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#ifndef _FS_CEPH_AUTH_NONE_H
|
||||
#define _FS_CEPH_AUTH_NONE_H
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/ceph/auth.h>
|
||||
|
||||
/*
|
||||
* null security mode.
|
||||
*
|
||||
* we use a single static authorizer that simply encodes our entity name
|
||||
* and global id.
|
||||
*/
|
||||
|
||||
struct ceph_none_authorizer {
|
||||
char buf[128];
|
||||
int buf_len;
|
||||
char reply_buf[0];
|
||||
};
|
||||
|
||||
struct ceph_auth_none_info {
|
||||
bool starting;
|
||||
bool built_authorizer;
|
||||
struct ceph_none_authorizer au; /* we only need one; it's static */
|
||||
};
|
||||
|
||||
extern int ceph_auth_none_init(struct ceph_auth_client *ac);
|
||||
|
||||
#endif
|
||||
|
||||
688
net/ceph/auth_x.c
Normal file
688
net/ceph/auth_x.c
Normal file
@@ -0,0 +1,688 @@
|
||||
|
||||
#include <linux/ceph/ceph_debug.h>
|
||||
|
||||
#include <linux/err.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <linux/ceph/decode.h>
|
||||
#include <linux/ceph/auth.h>
|
||||
|
||||
#include "crypto.h"
|
||||
#include "auth_x.h"
|
||||
#include "auth_x_protocol.h"
|
||||
|
||||
#define TEMP_TICKET_BUF_LEN 256
|
||||
|
||||
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
|
||||
|
||||
static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
|
||||
{
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
int need;
|
||||
|
||||
ceph_x_validate_tickets(ac, &need);
|
||||
dout("ceph_x_is_authenticated want=%d need=%d have=%d\n",
|
||||
ac->want_keys, need, xi->have_keys);
|
||||
return (ac->want_keys & xi->have_keys) == ac->want_keys;
|
||||
}
|
||||
|
||||
static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
|
||||
{
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
int need;
|
||||
|
||||
ceph_x_validate_tickets(ac, &need);
|
||||
dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
|
||||
ac->want_keys, need, xi->have_keys);
|
||||
return need != 0;
|
||||
}
|
||||
|
||||
static int ceph_x_encrypt_buflen(int ilen)
|
||||
{
|
||||
return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
|
||||
sizeof(u32);
|
||||
}
|
||||
|
||||
static int ceph_x_encrypt(struct ceph_crypto_key *secret,
|
||||
void *ibuf, int ilen, void *obuf, size_t olen)
|
||||
{
|
||||
struct ceph_x_encrypt_header head = {
|
||||
.struct_v = 1,
|
||||
.magic = cpu_to_le64(CEPHX_ENC_MAGIC)
|
||||
};
|
||||
size_t len = olen - sizeof(u32);
|
||||
int ret;
|
||||
|
||||
ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len,
|
||||
&head, sizeof(head), ibuf, ilen);
|
||||
if (ret)
|
||||
return ret;
|
||||
ceph_encode_32(&obuf, len);
|
||||
return len + sizeof(u32);
|
||||
}
|
||||
|
||||
static int ceph_x_decrypt(struct ceph_crypto_key *secret,
|
||||
void **p, void *end, void *obuf, size_t olen)
|
||||
{
|
||||
struct ceph_x_encrypt_header head;
|
||||
size_t head_len = sizeof(head);
|
||||
int len, ret;
|
||||
|
||||
len = ceph_decode_32(p);
|
||||
if (*p + len > end)
|
||||
return -EINVAL;
|
||||
|
||||
dout("ceph_x_decrypt len %d\n", len);
|
||||
ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen,
|
||||
*p, len);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC)
|
||||
return -EPERM;
|
||||
*p += len;
|
||||
return olen;
|
||||
}
|
||||
|
||||
/*
|
||||
* get existing (or insert new) ticket handler
|
||||
*/
|
||||
static struct ceph_x_ticket_handler *
|
||||
get_ticket_handler(struct ceph_auth_client *ac, int service)
|
||||
{
|
||||
struct ceph_x_ticket_handler *th;
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
struct rb_node *parent = NULL, **p = &xi->ticket_handlers.rb_node;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
th = rb_entry(parent, struct ceph_x_ticket_handler, node);
|
||||
if (service < th->service)
|
||||
p = &(*p)->rb_left;
|
||||
else if (service > th->service)
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
return th;
|
||||
}
|
||||
|
||||
/* add it */
|
||||
th = kzalloc(sizeof(*th), GFP_NOFS);
|
||||
if (!th)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
th->service = service;
|
||||
rb_link_node(&th->node, parent, p);
|
||||
rb_insert_color(&th->node, &xi->ticket_handlers);
|
||||
return th;
|
||||
}
|
||||
|
||||
static void remove_ticket_handler(struct ceph_auth_client *ac,
|
||||
struct ceph_x_ticket_handler *th)
|
||||
{
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
|
||||
dout("remove_ticket_handler %p %d\n", th, th->service);
|
||||
rb_erase(&th->node, &xi->ticket_handlers);
|
||||
ceph_crypto_key_destroy(&th->session_key);
|
||||
if (th->ticket_blob)
|
||||
ceph_buffer_put(th->ticket_blob);
|
||||
kfree(th);
|
||||
}
|
||||
|
||||
static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
|
||||
struct ceph_crypto_key *secret,
|
||||
void *buf, void *end)
|
||||
{
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
int num;
|
||||
void *p = buf;
|
||||
int ret;
|
||||
char *dbuf;
|
||||
char *ticket_buf;
|
||||
u8 reply_struct_v;
|
||||
|
||||
dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
|
||||
if (!dbuf)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = -ENOMEM;
|
||||
ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
|
||||
if (!ticket_buf)
|
||||
goto out_dbuf;
|
||||
|
||||
ceph_decode_need(&p, end, 1 + sizeof(u32), bad);
|
||||
reply_struct_v = ceph_decode_8(&p);
|
||||
if (reply_struct_v != 1)
|
||||
goto bad;
|
||||
num = ceph_decode_32(&p);
|
||||
dout("%d tickets\n", num);
|
||||
while (num--) {
|
||||
int type;
|
||||
u8 tkt_struct_v, blob_struct_v;
|
||||
struct ceph_x_ticket_handler *th;
|
||||
void *dp, *dend;
|
||||
int dlen;
|
||||
char is_enc;
|
||||
struct timespec validity;
|
||||
struct ceph_crypto_key old_key;
|
||||
void *tp, *tpend;
|
||||
struct ceph_timespec new_validity;
|
||||
struct ceph_crypto_key new_session_key;
|
||||
struct ceph_buffer *new_ticket_blob;
|
||||
unsigned long new_expires, new_renew_after;
|
||||
u64 new_secret_id;
|
||||
|
||||
ceph_decode_need(&p, end, sizeof(u32) + 1, bad);
|
||||
|
||||
type = ceph_decode_32(&p);
|
||||
dout(" ticket type %d %s\n", type, ceph_entity_type_name(type));
|
||||
|
||||
tkt_struct_v = ceph_decode_8(&p);
|
||||
if (tkt_struct_v != 1)
|
||||
goto bad;
|
||||
|
||||
th = get_ticket_handler(ac, type);
|
||||
if (IS_ERR(th)) {
|
||||
ret = PTR_ERR(th);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* blob for me */
|
||||
dlen = ceph_x_decrypt(secret, &p, end, dbuf,
|
||||
TEMP_TICKET_BUF_LEN);
|
||||
if (dlen <= 0) {
|
||||
ret = dlen;
|
||||
goto out;
|
||||
}
|
||||
dout(" decrypted %d bytes\n", dlen);
|
||||
dend = dbuf + dlen;
|
||||
dp = dbuf;
|
||||
|
||||
tkt_struct_v = ceph_decode_8(&dp);
|
||||
if (tkt_struct_v != 1)
|
||||
goto bad;
|
||||
|
||||
memcpy(&old_key, &th->session_key, sizeof(old_key));
|
||||
ret = ceph_crypto_key_decode(&new_session_key, &dp, dend);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
|
||||
ceph_decode_timespec(&validity, &new_validity);
|
||||
new_expires = get_seconds() + validity.tv_sec;
|
||||
new_renew_after = new_expires - (validity.tv_sec / 4);
|
||||
dout(" expires=%lu renew_after=%lu\n", new_expires,
|
||||
new_renew_after);
|
||||
|
||||
/* ticket blob for service */
|
||||
ceph_decode_8_safe(&p, end, is_enc, bad);
|
||||
tp = ticket_buf;
|
||||
if (is_enc) {
|
||||
/* encrypted */
|
||||
dout(" encrypted ticket\n");
|
||||
dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf,
|
||||
TEMP_TICKET_BUF_LEN);
|
||||
if (dlen < 0) {
|
||||
ret = dlen;
|
||||
goto out;
|
||||
}
|
||||
dlen = ceph_decode_32(&tp);
|
||||
} else {
|
||||
/* unencrypted */
|
||||
ceph_decode_32_safe(&p, end, dlen, bad);
|
||||
ceph_decode_need(&p, end, dlen, bad);
|
||||
ceph_decode_copy(&p, ticket_buf, dlen);
|
||||
}
|
||||
tpend = tp + dlen;
|
||||
dout(" ticket blob is %d bytes\n", dlen);
|
||||
ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad);
|
||||
blob_struct_v = ceph_decode_8(&tp);
|
||||
new_secret_id = ceph_decode_64(&tp);
|
||||
ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* all is well, update our ticket */
|
||||
ceph_crypto_key_destroy(&th->session_key);
|
||||
if (th->ticket_blob)
|
||||
ceph_buffer_put(th->ticket_blob);
|
||||
th->session_key = new_session_key;
|
||||
th->ticket_blob = new_ticket_blob;
|
||||
th->validity = new_validity;
|
||||
th->secret_id = new_secret_id;
|
||||
th->expires = new_expires;
|
||||
th->renew_after = new_renew_after;
|
||||
dout(" got ticket service %d (%s) secret_id %lld len %d\n",
|
||||
type, ceph_entity_type_name(type), th->secret_id,
|
||||
(int)th->ticket_blob->vec.iov_len);
|
||||
xi->have_keys |= th->service;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
kfree(ticket_buf);
|
||||
out_dbuf:
|
||||
kfree(dbuf);
|
||||
return ret;
|
||||
|
||||
bad:
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
|
||||
struct ceph_x_ticket_handler *th,
|
||||
struct ceph_x_authorizer *au)
|
||||
{
|
||||
int maxlen;
|
||||
struct ceph_x_authorize_a *msg_a;
|
||||
struct ceph_x_authorize_b msg_b;
|
||||
void *p, *end;
|
||||
int ret;
|
||||
int ticket_blob_len =
|
||||
(th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
|
||||
|
||||
dout("build_authorizer for %s %p\n",
|
||||
ceph_entity_type_name(th->service), au);
|
||||
|
||||
maxlen = sizeof(*msg_a) + sizeof(msg_b) +
|
||||
ceph_x_encrypt_buflen(ticket_blob_len);
|
||||
dout(" need len %d\n", maxlen);
|
||||
if (au->buf && au->buf->alloc_len < maxlen) {
|
||||
ceph_buffer_put(au->buf);
|
||||
au->buf = NULL;
|
||||
}
|
||||
if (!au->buf) {
|
||||
au->buf = ceph_buffer_new(maxlen, GFP_NOFS);
|
||||
if (!au->buf)
|
||||
return -ENOMEM;
|
||||
}
|
||||
au->service = th->service;
|
||||
|
||||
msg_a = au->buf->vec.iov_base;
|
||||
msg_a->struct_v = 1;
|
||||
msg_a->global_id = cpu_to_le64(ac->global_id);
|
||||
msg_a->service_id = cpu_to_le32(th->service);
|
||||
msg_a->ticket_blob.struct_v = 1;
|
||||
msg_a->ticket_blob.secret_id = cpu_to_le64(th->secret_id);
|
||||
msg_a->ticket_blob.blob_len = cpu_to_le32(ticket_blob_len);
|
||||
if (ticket_blob_len) {
|
||||
memcpy(msg_a->ticket_blob.blob, th->ticket_blob->vec.iov_base,
|
||||
th->ticket_blob->vec.iov_len);
|
||||
}
|
||||
dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
|
||||
le64_to_cpu(msg_a->ticket_blob.secret_id));
|
||||
|
||||
p = msg_a + 1;
|
||||
p += ticket_blob_len;
|
||||
end = au->buf->vec.iov_base + au->buf->vec.iov_len;
|
||||
|
||||
get_random_bytes(&au->nonce, sizeof(au->nonce));
|
||||
msg_b.struct_v = 1;
|
||||
msg_b.nonce = cpu_to_le64(au->nonce);
|
||||
ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b),
|
||||
p, end - p);
|
||||
if (ret < 0)
|
||||
goto out_buf;
|
||||
p += ret;
|
||||
au->buf->vec.iov_len = p - au->buf->vec.iov_base;
|
||||
dout(" built authorizer nonce %llx len %d\n", au->nonce,
|
||||
(int)au->buf->vec.iov_len);
|
||||
BUG_ON(au->buf->vec.iov_len > maxlen);
|
||||
return 0;
|
||||
|
||||
out_buf:
|
||||
ceph_buffer_put(au->buf);
|
||||
au->buf = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ceph_x_encode_ticket(struct ceph_x_ticket_handler *th,
|
||||
void **p, void *end)
|
||||
{
|
||||
ceph_decode_need(p, end, 1 + sizeof(u64), bad);
|
||||
ceph_encode_8(p, 1);
|
||||
ceph_encode_64(p, th->secret_id);
|
||||
if (th->ticket_blob) {
|
||||
const char *buf = th->ticket_blob->vec.iov_base;
|
||||
u32 len = th->ticket_blob->vec.iov_len;
|
||||
|
||||
ceph_encode_32_safe(p, end, len, bad);
|
||||
ceph_encode_copy_safe(p, end, buf, len, bad);
|
||||
} else {
|
||||
ceph_encode_32_safe(p, end, 0, bad);
|
||||
}
|
||||
|
||||
return 0;
|
||||
bad:
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
|
||||
{
|
||||
int want = ac->want_keys;
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
int service;
|
||||
|
||||
*pneed = ac->want_keys & ~(xi->have_keys);
|
||||
|
||||
for (service = 1; service <= want; service <<= 1) {
|
||||
struct ceph_x_ticket_handler *th;
|
||||
|
||||
if (!(ac->want_keys & service))
|
||||
continue;
|
||||
|
||||
if (*pneed & service)
|
||||
continue;
|
||||
|
||||
th = get_ticket_handler(ac, service);
|
||||
|
||||
if (IS_ERR(th)) {
|
||||
*pneed |= service;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (get_seconds() >= th->renew_after)
|
||||
*pneed |= service;
|
||||
if (get_seconds() >= th->expires)
|
||||
xi->have_keys &= ~service;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int ceph_x_build_request(struct ceph_auth_client *ac,
|
||||
void *buf, void *end)
|
||||
{
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
int need;
|
||||
struct ceph_x_request_header *head = buf;
|
||||
int ret;
|
||||
struct ceph_x_ticket_handler *th =
|
||||
get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
|
||||
|
||||
if (IS_ERR(th))
|
||||
return PTR_ERR(th);
|
||||
|
||||
ceph_x_validate_tickets(ac, &need);
|
||||
|
||||
dout("build_request want %x have %x need %x\n",
|
||||
ac->want_keys, xi->have_keys, need);
|
||||
|
||||
if (need & CEPH_ENTITY_TYPE_AUTH) {
|
||||
struct ceph_x_authenticate *auth = (void *)(head + 1);
|
||||
void *p = auth + 1;
|
||||
struct ceph_x_challenge_blob tmp;
|
||||
char tmp_enc[40];
|
||||
u64 *u;
|
||||
|
||||
if (p > end)
|
||||
return -ERANGE;
|
||||
|
||||
dout(" get_auth_session_key\n");
|
||||
head->op = cpu_to_le16(CEPHX_GET_AUTH_SESSION_KEY);
|
||||
|
||||
/* encrypt and hash */
|
||||
get_random_bytes(&auth->client_challenge, sizeof(u64));
|
||||
tmp.client_challenge = auth->client_challenge;
|
||||
tmp.server_challenge = cpu_to_le64(xi->server_challenge);
|
||||
ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp),
|
||||
tmp_enc, sizeof(tmp_enc));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
auth->struct_v = 1;
|
||||
auth->key = 0;
|
||||
for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++)
|
||||
auth->key ^= *(__le64 *)u;
|
||||
dout(" server_challenge %llx client_challenge %llx key %llx\n",
|
||||
xi->server_challenge, le64_to_cpu(auth->client_challenge),
|
||||
le64_to_cpu(auth->key));
|
||||
|
||||
/* now encode the old ticket if exists */
|
||||
ret = ceph_x_encode_ticket(th, &p, end);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
if (need) {
|
||||
void *p = head + 1;
|
||||
struct ceph_x_service_ticket_request *req;
|
||||
|
||||
if (p > end)
|
||||
return -ERANGE;
|
||||
head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
|
||||
|
||||
ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
|
||||
if (ret)
|
||||
return ret;
|
||||
ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base,
|
||||
xi->auth_authorizer.buf->vec.iov_len);
|
||||
|
||||
req = p;
|
||||
req->keys = cpu_to_le32(need);
|
||||
p += sizeof(*req);
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
|
||||
void *buf, void *end)
|
||||
{
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
struct ceph_x_reply_header *head = buf;
|
||||
struct ceph_x_ticket_handler *th;
|
||||
int len = end - buf;
|
||||
int op;
|
||||
int ret;
|
||||
|
||||
if (result)
|
||||
return result; /* XXX hmm? */
|
||||
|
||||
if (xi->starting) {
|
||||
/* it's a hello */
|
||||
struct ceph_x_server_challenge *sc = buf;
|
||||
|
||||
if (len != sizeof(*sc))
|
||||
return -EINVAL;
|
||||
xi->server_challenge = le64_to_cpu(sc->server_challenge);
|
||||
dout("handle_reply got server challenge %llx\n",
|
||||
xi->server_challenge);
|
||||
xi->starting = false;
|
||||
xi->have_keys &= ~CEPH_ENTITY_TYPE_AUTH;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
op = le16_to_cpu(head->op);
|
||||
result = le32_to_cpu(head->result);
|
||||
dout("handle_reply op %d result %d\n", op, result);
|
||||
switch (op) {
|
||||
case CEPHX_GET_AUTH_SESSION_KEY:
|
||||
/* verify auth key */
|
||||
ret = ceph_x_proc_ticket_reply(ac, &xi->secret,
|
||||
buf + sizeof(*head), end);
|
||||
break;
|
||||
|
||||
case CEPHX_GET_PRINCIPAL_SESSION_KEY:
|
||||
th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
|
||||
if (IS_ERR(th))
|
||||
return PTR_ERR(th);
|
||||
ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
|
||||
buf + sizeof(*head), end);
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
if (ret)
|
||||
return ret;
|
||||
if (ac->want_keys == xi->have_keys)
|
||||
return 0;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
static int ceph_x_create_authorizer(
|
||||
struct ceph_auth_client *ac, int peer_type,
|
||||
struct ceph_authorizer **a,
|
||||
void **buf, size_t *len,
|
||||
void **reply_buf, size_t *reply_len)
|
||||
{
|
||||
struct ceph_x_authorizer *au;
|
||||
struct ceph_x_ticket_handler *th;
|
||||
int ret;
|
||||
|
||||
th = get_ticket_handler(ac, peer_type);
|
||||
if (IS_ERR(th))
|
||||
return PTR_ERR(th);
|
||||
|
||||
au = kzalloc(sizeof(*au), GFP_NOFS);
|
||||
if (!au)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = ceph_x_build_authorizer(ac, th, au);
|
||||
if (ret) {
|
||||
kfree(au);
|
||||
return ret;
|
||||
}
|
||||
|
||||
*a = (struct ceph_authorizer *)au;
|
||||
*buf = au->buf->vec.iov_base;
|
||||
*len = au->buf->vec.iov_len;
|
||||
*reply_buf = au->reply_buf;
|
||||
*reply_len = sizeof(au->reply_buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
|
||||
struct ceph_authorizer *a, size_t len)
|
||||
{
|
||||
struct ceph_x_authorizer *au = (void *)a;
|
||||
struct ceph_x_ticket_handler *th;
|
||||
int ret = 0;
|
||||
struct ceph_x_authorize_reply reply;
|
||||
void *p = au->reply_buf;
|
||||
void *end = p + sizeof(au->reply_buf);
|
||||
|
||||
th = get_ticket_handler(ac, au->service);
|
||||
if (IS_ERR(th))
|
||||
return PTR_ERR(th);
|
||||
ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply));
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret != sizeof(reply))
|
||||
return -EPERM;
|
||||
|
||||
if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one))
|
||||
ret = -EPERM;
|
||||
else
|
||||
ret = 0;
|
||||
dout("verify_authorizer_reply nonce %llx got %llx ret %d\n",
|
||||
au->nonce, le64_to_cpu(reply.nonce_plus_one), ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac,
|
||||
struct ceph_authorizer *a)
|
||||
{
|
||||
struct ceph_x_authorizer *au = (void *)a;
|
||||
|
||||
ceph_buffer_put(au->buf);
|
||||
kfree(au);
|
||||
}
|
||||
|
||||
|
||||
static void ceph_x_reset(struct ceph_auth_client *ac)
|
||||
{
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
|
||||
dout("reset\n");
|
||||
xi->starting = true;
|
||||
xi->server_challenge = 0;
|
||||
}
|
||||
|
||||
static void ceph_x_destroy(struct ceph_auth_client *ac)
|
||||
{
|
||||
struct ceph_x_info *xi = ac->private;
|
||||
struct rb_node *p;
|
||||
|
||||
dout("ceph_x_destroy %p\n", ac);
|
||||
ceph_crypto_key_destroy(&xi->secret);
|
||||
|
||||
while ((p = rb_first(&xi->ticket_handlers)) != NULL) {
|
||||
struct ceph_x_ticket_handler *th =
|
||||
rb_entry(p, struct ceph_x_ticket_handler, node);
|
||||
remove_ticket_handler(ac, th);
|
||||
}
|
||||
|
||||
if (xi->auth_authorizer.buf)
|
||||
ceph_buffer_put(xi->auth_authorizer.buf);
|
||||
|
||||
kfree(ac->private);
|
||||
ac->private = NULL;
|
||||
}
|
||||
|
||||
static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
|
||||
int peer_type)
|
||||
{
|
||||
struct ceph_x_ticket_handler *th;
|
||||
|
||||
th = get_ticket_handler(ac, peer_type);
|
||||
if (!IS_ERR(th))
|
||||
remove_ticket_handler(ac, th);
|
||||
}
|
||||
|
||||
|
||||
static const struct ceph_auth_client_ops ceph_x_ops = {
|
||||
.name = "x",
|
||||
.is_authenticated = ceph_x_is_authenticated,
|
||||
.should_authenticate = ceph_x_should_authenticate,
|
||||
.build_request = ceph_x_build_request,
|
||||
.handle_reply = ceph_x_handle_reply,
|
||||
.create_authorizer = ceph_x_create_authorizer,
|
||||
.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
|
||||
.destroy_authorizer = ceph_x_destroy_authorizer,
|
||||
.invalidate_authorizer = ceph_x_invalidate_authorizer,
|
||||
.reset = ceph_x_reset,
|
||||
.destroy = ceph_x_destroy,
|
||||
};
|
||||
|
||||
|
||||
int ceph_x_init(struct ceph_auth_client *ac)
|
||||
{
|
||||
struct ceph_x_info *xi;
|
||||
int ret;
|
||||
|
||||
dout("ceph_x_init %p\n", ac);
|
||||
ret = -ENOMEM;
|
||||
xi = kzalloc(sizeof(*xi), GFP_NOFS);
|
||||
if (!xi)
|
||||
goto out;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!ac->secret) {
|
||||
pr_err("no secret set (for auth_x protocol)\n");
|
||||
goto out_nomem;
|
||||
}
|
||||
|
||||
ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
|
||||
if (ret)
|
||||
goto out_nomem;
|
||||
|
||||
xi->starting = true;
|
||||
xi->ticket_handlers = RB_ROOT;
|
||||
|
||||
ac->protocol = CEPH_AUTH_CEPHX;
|
||||
ac->private = xi;
|
||||
ac->ops = &ceph_x_ops;
|
||||
return 0;
|
||||
|
||||
out_nomem:
|
||||
kfree(xi);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
50
net/ceph/auth_x.h
Normal file
50
net/ceph/auth_x.h
Normal file
@@ -0,0 +1,50 @@
|
||||
#ifndef _FS_CEPH_AUTH_X_H
|
||||
#define _FS_CEPH_AUTH_X_H
|
||||
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
#include <linux/ceph/auth.h>
|
||||
|
||||
#include "crypto.h"
|
||||
#include "auth_x_protocol.h"
|
||||
|
||||
/*
|
||||
* Handle ticket for a single service.
|
||||
*/
|
||||
struct ceph_x_ticket_handler {
|
||||
struct rb_node node;
|
||||
unsigned service;
|
||||
|
||||
struct ceph_crypto_key session_key;
|
||||
struct ceph_timespec validity;
|
||||
|
||||
u64 secret_id;
|
||||
struct ceph_buffer *ticket_blob;
|
||||
|
||||
unsigned long renew_after, expires;
|
||||
};
|
||||
|
||||
|
||||
struct ceph_x_authorizer {
|
||||
struct ceph_buffer *buf;
|
||||
unsigned service;
|
||||
u64 nonce;
|
||||
char reply_buf[128]; /* big enough for encrypted blob */
|
||||
};
|
||||
|
||||
struct ceph_x_info {
|
||||
struct ceph_crypto_key secret;
|
||||
|
||||
bool starting;
|
||||
u64 server_challenge;
|
||||
|
||||
unsigned have_keys;
|
||||
struct rb_root ticket_handlers;
|
||||
|
||||
struct ceph_x_authorizer auth_authorizer;
|
||||
};
|
||||
|
||||
extern int ceph_x_init(struct ceph_auth_client *ac);
|
||||
|
||||
#endif
|
||||
|
||||
90
net/ceph/auth_x_protocol.h
Normal file
90
net/ceph/auth_x_protocol.h
Normal file
@@ -0,0 +1,90 @@
|
||||
#ifndef __FS_CEPH_AUTH_X_PROTOCOL
|
||||
#define __FS_CEPH_AUTH_X_PROTOCOL
|
||||
|
||||
#define CEPHX_GET_AUTH_SESSION_KEY 0x0100
|
||||
#define CEPHX_GET_PRINCIPAL_SESSION_KEY 0x0200
|
||||
#define CEPHX_GET_ROTATING_KEY 0x0400
|
||||
|
||||
/* common bits */
|
||||
struct ceph_x_ticket_blob {
|
||||
__u8 struct_v;
|
||||
__le64 secret_id;
|
||||
__le32 blob_len;
|
||||
char blob[];
|
||||
} __attribute__ ((packed));
|
||||
|
||||
|
||||
/* common request/reply headers */
|
||||
struct ceph_x_request_header {
|
||||
__le16 op;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
struct ceph_x_reply_header {
|
||||
__le16 op;
|
||||
__le32 result;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
|
||||
/* authenticate handshake */
|
||||
|
||||
/* initial hello (no reply header) */
|
||||
struct ceph_x_server_challenge {
|
||||
__u8 struct_v;
|
||||
__le64 server_challenge;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
struct ceph_x_authenticate {
|
||||
__u8 struct_v;
|
||||
__le64 client_challenge;
|
||||
__le64 key;
|
||||
/* ticket blob */
|
||||
} __attribute__ ((packed));
|
||||
|
||||
struct ceph_x_service_ticket_request {
|
||||
__u8 struct_v;
|
||||
__le32 keys;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
struct ceph_x_challenge_blob {
|
||||
__le64 server_challenge;
|
||||
__le64 client_challenge;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
|
||||
|
||||
/* authorize handshake */
|
||||
|
||||
/*
|
||||
* The authorizer consists of two pieces:
|
||||
* a - service id, ticket blob
|
||||
* b - encrypted with session key
|
||||
*/
|
||||
struct ceph_x_authorize_a {
|
||||
__u8 struct_v;
|
||||
__le64 global_id;
|
||||
__le32 service_id;
|
||||
struct ceph_x_ticket_blob ticket_blob;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
struct ceph_x_authorize_b {
|
||||
__u8 struct_v;
|
||||
__le64 nonce;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
struct ceph_x_authorize_reply {
|
||||
__u8 struct_v;
|
||||
__le64 nonce_plus_one;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
|
||||
/*
|
||||
* encyption bundle
|
||||
*/
|
||||
#define CEPHX_ENC_MAGIC 0xff009cad8826aa55ull
|
||||
|
||||
struct ceph_x_encrypt_header {
|
||||
__u8 struct_v;
|
||||
__le64 magic;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
#endif
|
||||
68
net/ceph/buffer.c
Normal file
68
net/ceph/buffer.c
Normal file
@@ -0,0 +1,68 @@
|
||||
|
||||
#include <linux/ceph/ceph_debug.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <linux/ceph/buffer.h>
|
||||
#include <linux/ceph/decode.h>
|
||||
|
||||
struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
|
||||
{
|
||||
struct ceph_buffer *b;
|
||||
|
||||
b = kmalloc(sizeof(*b), gfp);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN);
|
||||
if (b->vec.iov_base) {
|
||||
b->is_vmalloc = false;
|
||||
} else {
|
||||
b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL);
|
||||
if (!b->vec.iov_base) {
|
||||
kfree(b);
|
||||
return NULL;
|
||||
}
|
||||
b->is_vmalloc = true;
|
||||
}
|
||||
|
||||
kref_init(&b->kref);
|
||||
b->alloc_len = len;
|
||||
b->vec.iov_len = len;
|
||||
dout("buffer_new %p\n", b);
|
||||
return b;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_buffer_new);
|
||||
|
||||
void ceph_buffer_release(struct kref *kref)
|
||||
{
|
||||
struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
|
||||
|
||||
dout("buffer_release %p\n", b);
|
||||
if (b->vec.iov_base) {
|
||||
if (b->is_vmalloc)
|
||||
vfree(b->vec.iov_base);
|
||||
else
|
||||
kfree(b->vec.iov_base);
|
||||
}
|
||||
kfree(b);
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_buffer_release);
|
||||
|
||||
int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end)
|
||||
{
|
||||
size_t len;
|
||||
|
||||
ceph_decode_need(p, end, sizeof(u32), bad);
|
||||
len = ceph_decode_32(p);
|
||||
dout("decode_buffer len %d\n", (int)len);
|
||||
ceph_decode_need(p, end, len, bad);
|
||||
*b = ceph_buffer_new(len, GFP_NOFS);
|
||||
if (!*b)
|
||||
return -ENOMEM;
|
||||
ceph_decode_copy(p, (*b)->vec.iov_base, len);
|
||||
return 0;
|
||||
bad:
|
||||
return -EINVAL;
|
||||
}
|
||||
529
net/ceph/ceph_common.c
Normal file
529
net/ceph/ceph_common.c
Normal file
@@ -0,0 +1,529 @@
|
||||
|
||||
#include <linux/ceph/ceph_debug.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/inet.h>
|
||||
#include <linux/in6.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/parser.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/statfs.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
|
||||
#include <linux/ceph/libceph.h>
|
||||
#include <linux/ceph/debugfs.h>
|
||||
#include <linux/ceph/decode.h>
|
||||
#include <linux/ceph/mon_client.h>
|
||||
#include <linux/ceph/auth.h>
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* find filename portion of a path (/foo/bar/baz -> baz)
|
||||
*/
|
||||
const char *ceph_file_part(const char *s, int len)
|
||||
{
|
||||
const char *e = s + len;
|
||||
|
||||
while (e != s && *(e-1) != '/')
|
||||
e--;
|
||||
return e;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_file_part);
|
||||
|
||||
const char *ceph_msg_type_name(int type)
|
||||
{
|
||||
switch (type) {
|
||||
case CEPH_MSG_SHUTDOWN: return "shutdown";
|
||||
case CEPH_MSG_PING: return "ping";
|
||||
case CEPH_MSG_AUTH: return "auth";
|
||||
case CEPH_MSG_AUTH_REPLY: return "auth_reply";
|
||||
case CEPH_MSG_MON_MAP: return "mon_map";
|
||||
case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
|
||||
case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
|
||||
case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
|
||||
case CEPH_MSG_STATFS: return "statfs";
|
||||
case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
|
||||
case CEPH_MSG_MDS_MAP: return "mds_map";
|
||||
case CEPH_MSG_CLIENT_SESSION: return "client_session";
|
||||
case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
|
||||
case CEPH_MSG_CLIENT_REQUEST: return "client_request";
|
||||
case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
|
||||
case CEPH_MSG_CLIENT_REPLY: return "client_reply";
|
||||
case CEPH_MSG_CLIENT_CAPS: return "client_caps";
|
||||
case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
|
||||
case CEPH_MSG_CLIENT_SNAP: return "client_snap";
|
||||
case CEPH_MSG_CLIENT_LEASE: return "client_lease";
|
||||
case CEPH_MSG_OSD_MAP: return "osd_map";
|
||||
case CEPH_MSG_OSD_OP: return "osd_op";
|
||||
case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_msg_type_name);
|
||||
|
||||
/*
|
||||
* Initially learn our fsid, or verify an fsid matches.
|
||||
*/
|
||||
int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
|
||||
{
|
||||
if (client->have_fsid) {
|
||||
if (ceph_fsid_compare(&client->fsid, fsid)) {
|
||||
pr_err("bad fsid, had %pU got %pU",
|
||||
&client->fsid, fsid);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
|
||||
memcpy(&client->fsid, fsid, sizeof(*fsid));
|
||||
ceph_debugfs_client_init(client);
|
||||
client->have_fsid = true;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_check_fsid);
|
||||
|
||||
static int strcmp_null(const char *s1, const char *s2)
|
||||
{
|
||||
if (!s1 && !s2)
|
||||
return 0;
|
||||
if (s1 && !s2)
|
||||
return -1;
|
||||
if (!s1 && s2)
|
||||
return 1;
|
||||
return strcmp(s1, s2);
|
||||
}
|
||||
|
||||
int ceph_compare_options(struct ceph_options *new_opt,
|
||||
struct ceph_client *client)
|
||||
{
|
||||
struct ceph_options *opt1 = new_opt;
|
||||
struct ceph_options *opt2 = client->options;
|
||||
int ofs = offsetof(struct ceph_options, mon_addr);
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
ret = memcmp(opt1, opt2, ofs);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = strcmp_null(opt1->name, opt2->name);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = strcmp_null(opt1->secret, opt2->secret);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* any matching mon ip implies a match */
|
||||
for (i = 0; i < opt1->num_mon; i++) {
|
||||
if (ceph_monmap_contains(client->monc.monmap,
|
||||
&opt1->mon_addr[i]))
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_compare_options);
|
||||
|
||||
|
||||
static int parse_fsid(const char *str, struct ceph_fsid *fsid)
|
||||
{
|
||||
int i = 0;
|
||||
char tmp[3];
|
||||
int err = -EINVAL;
|
||||
int d;
|
||||
|
||||
dout("parse_fsid '%s'\n", str);
|
||||
tmp[2] = 0;
|
||||
while (*str && i < 16) {
|
||||
if (ispunct(*str)) {
|
||||
str++;
|
||||
continue;
|
||||
}
|
||||
if (!isxdigit(str[0]) || !isxdigit(str[1]))
|
||||
break;
|
||||
tmp[0] = str[0];
|
||||
tmp[1] = str[1];
|
||||
if (sscanf(tmp, "%x", &d) < 1)
|
||||
break;
|
||||
fsid->fsid[i] = d & 0xff;
|
||||
i++;
|
||||
str += 2;
|
||||
}
|
||||
|
||||
if (i == 16)
|
||||
err = 0;
|
||||
dout("parse_fsid ret %d got fsid %pU", err, fsid);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* ceph options
|
||||
*/
|
||||
enum {
|
||||
Opt_osdtimeout,
|
||||
Opt_osdkeepalivetimeout,
|
||||
Opt_mount_timeout,
|
||||
Opt_osd_idle_ttl,
|
||||
Opt_last_int,
|
||||
/* int args above */
|
||||
Opt_fsid,
|
||||
Opt_name,
|
||||
Opt_secret,
|
||||
Opt_ip,
|
||||
Opt_last_string,
|
||||
/* string args above */
|
||||
Opt_noshare,
|
||||
Opt_nocrc,
|
||||
};
|
||||
|
||||
static match_table_t opt_tokens = {
|
||||
{Opt_osdtimeout, "osdtimeout=%d"},
|
||||
{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
|
||||
{Opt_mount_timeout, "mount_timeout=%d"},
|
||||
{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
|
||||
/* int args above */
|
||||
{Opt_fsid, "fsid=%s"},
|
||||
{Opt_name, "name=%s"},
|
||||
{Opt_secret, "secret=%s"},
|
||||
{Opt_ip, "ip=%s"},
|
||||
/* string args above */
|
||||
{Opt_noshare, "noshare"},
|
||||
{Opt_nocrc, "nocrc"},
|
||||
{-1, NULL}
|
||||
};
|
||||
|
||||
void ceph_destroy_options(struct ceph_options *opt)
|
||||
{
|
||||
dout("destroy_options %p\n", opt);
|
||||
kfree(opt->name);
|
||||
kfree(opt->secret);
|
||||
kfree(opt);
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_destroy_options);
|
||||
|
||||
int ceph_parse_options(struct ceph_options **popt, char *options,
|
||||
const char *dev_name, const char *dev_name_end,
|
||||
int (*parse_extra_token)(char *c, void *private),
|
||||
void *private)
|
||||
{
|
||||
struct ceph_options *opt;
|
||||
const char *c;
|
||||
int err = -ENOMEM;
|
||||
substring_t argstr[MAX_OPT_ARGS];
|
||||
|
||||
opt = kzalloc(sizeof(*opt), GFP_KERNEL);
|
||||
if (!opt)
|
||||
return err;
|
||||
opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
|
||||
GFP_KERNEL);
|
||||
if (!opt->mon_addr)
|
||||
goto out;
|
||||
|
||||
dout("parse_options %p options '%s' dev_name '%s'\n", opt, options,
|
||||
dev_name);
|
||||
|
||||
/* start with defaults */
|
||||
opt->flags = CEPH_OPT_DEFAULT;
|
||||
opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT;
|
||||
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
|
||||
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
|
||||
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
|
||||
|
||||
/* get mon ip(s) */
|
||||
/* ip1[:port1][,ip2[:port2]...] */
|
||||
err = ceph_parse_ips(dev_name, dev_name_end, opt->mon_addr,
|
||||
CEPH_MAX_MON, &opt->num_mon);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
/* parse mount options */
|
||||
while ((c = strsep(&options, ",")) != NULL) {
|
||||
int token, intval, ret;
|
||||
if (!*c)
|
||||
continue;
|
||||
err = -EINVAL;
|
||||
token = match_token((char *)c, opt_tokens, argstr);
|
||||
if (token < 0 && parse_extra_token) {
|
||||
/* extra? */
|
||||
err = parse_extra_token((char *)c, private);
|
||||
if (err < 0) {
|
||||
pr_err("bad option at '%s'\n", c);
|
||||
goto out;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (token < Opt_last_int) {
|
||||
ret = match_int(&argstr[0], &intval);
|
||||
if (ret < 0) {
|
||||
pr_err("bad mount option arg (not int) "
|
||||
"at '%s'\n", c);
|
||||
continue;
|
||||
}
|
||||
dout("got int token %d val %d\n", token, intval);
|
||||
} else if (token > Opt_last_int && token < Opt_last_string) {
|
||||
dout("got string token %d val %s\n", token,
|
||||
argstr[0].from);
|
||||
} else {
|
||||
dout("got token %d\n", token);
|
||||
}
|
||||
switch (token) {
|
||||
case Opt_ip:
|
||||
err = ceph_parse_ips(argstr[0].from,
|
||||
argstr[0].to,
|
||||
&opt->my_addr,
|
||||
1, NULL);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
opt->flags |= CEPH_OPT_MYIP;
|
||||
break;
|
||||
|
||||
case Opt_fsid:
|
||||
err = parse_fsid(argstr[0].from, &opt->fsid);
|
||||
if (err == 0)
|
||||
opt->flags |= CEPH_OPT_FSID;
|
||||
break;
|
||||
case Opt_name:
|
||||
opt->name = kstrndup(argstr[0].from,
|
||||
argstr[0].to-argstr[0].from,
|
||||
GFP_KERNEL);
|
||||
break;
|
||||
case Opt_secret:
|
||||
opt->secret = kstrndup(argstr[0].from,
|
||||
argstr[0].to-argstr[0].from,
|
||||
GFP_KERNEL);
|
||||
break;
|
||||
|
||||
/* misc */
|
||||
case Opt_osdtimeout:
|
||||
opt->osd_timeout = intval;
|
||||
break;
|
||||
case Opt_osdkeepalivetimeout:
|
||||
opt->osd_keepalive_timeout = intval;
|
||||
break;
|
||||
case Opt_osd_idle_ttl:
|
||||
opt->osd_idle_ttl = intval;
|
||||
break;
|
||||
case Opt_mount_timeout:
|
||||
opt->mount_timeout = intval;
|
||||
break;
|
||||
|
||||
case Opt_noshare:
|
||||
opt->flags |= CEPH_OPT_NOSHARE;
|
||||
break;
|
||||
|
||||
case Opt_nocrc:
|
||||
opt->flags |= CEPH_OPT_NOCRC;
|
||||
break;
|
||||
|
||||
default:
|
||||
BUG_ON(token);
|
||||
}
|
||||
}
|
||||
|
||||
/* success */
|
||||
*popt = opt;
|
||||
return 0;
|
||||
|
||||
out:
|
||||
ceph_destroy_options(opt);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_parse_options);
|
||||
|
||||
u64 ceph_client_id(struct ceph_client *client)
|
||||
{
|
||||
return client->monc.auth->global_id;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_client_id);
|
||||
|
||||
/*
|
||||
* create a fresh client instance
|
||||
*/
|
||||
struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
|
||||
{
|
||||
struct ceph_client *client;
|
||||
int err = -ENOMEM;
|
||||
|
||||
client = kzalloc(sizeof(*client), GFP_KERNEL);
|
||||
if (client == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
client->private = private;
|
||||
client->options = opt;
|
||||
|
||||
mutex_init(&client->mount_mutex);
|
||||
init_waitqueue_head(&client->auth_wq);
|
||||
client->auth_err = 0;
|
||||
|
||||
client->extra_mon_dispatch = NULL;
|
||||
client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT;
|
||||
client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT;
|
||||
|
||||
client->msgr = NULL;
|
||||
|
||||
/* subsystems */
|
||||
err = ceph_monc_init(&client->monc, client);
|
||||
if (err < 0)
|
||||
goto fail;
|
||||
err = ceph_osdc_init(&client->osdc, client);
|
||||
if (err < 0)
|
||||
goto fail_monc;
|
||||
|
||||
return client;
|
||||
|
||||
fail_monc:
|
||||
ceph_monc_stop(&client->monc);
|
||||
fail:
|
||||
kfree(client);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_create_client);
|
||||
|
||||
void ceph_destroy_client(struct ceph_client *client)
|
||||
{
|
||||
dout("destroy_client %p\n", client);
|
||||
|
||||
/* unmount */
|
||||
ceph_osdc_stop(&client->osdc);
|
||||
|
||||
/*
|
||||
* make sure mds and osd connections close out before destroying
|
||||
* the auth module, which is needed to free those connections'
|
||||
* ceph_authorizers.
|
||||
*/
|
||||
ceph_msgr_flush();
|
||||
|
||||
ceph_monc_stop(&client->monc);
|
||||
|
||||
ceph_debugfs_client_cleanup(client);
|
||||
|
||||
if (client->msgr)
|
||||
ceph_messenger_destroy(client->msgr);
|
||||
|
||||
ceph_destroy_options(client->options);
|
||||
|
||||
kfree(client);
|
||||
dout("destroy_client %p done\n", client);
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_destroy_client);
|
||||
|
||||
/*
|
||||
* true if we have the mon map (and have thus joined the cluster)
|
||||
*/
|
||||
static int have_mon_and_osd_map(struct ceph_client *client)
|
||||
{
|
||||
return client->monc.monmap && client->monc.monmap->epoch &&
|
||||
client->osdc.osdmap && client->osdc.osdmap->epoch;
|
||||
}
|
||||
|
||||
/*
|
||||
* mount: join the ceph cluster, and open root directory.
|
||||
*/
|
||||
int __ceph_open_session(struct ceph_client *client, unsigned long started)
|
||||
{
|
||||
struct ceph_entity_addr *myaddr = NULL;
|
||||
int err;
|
||||
unsigned long timeout = client->options->mount_timeout * HZ;
|
||||
|
||||
/* initialize the messenger */
|
||||
if (client->msgr == NULL) {
|
||||
if (ceph_test_opt(client, MYIP))
|
||||
myaddr = &client->options->my_addr;
|
||||
client->msgr = ceph_messenger_create(myaddr,
|
||||
client->supported_features,
|
||||
client->required_features);
|
||||
if (IS_ERR(client->msgr)) {
|
||||
client->msgr = NULL;
|
||||
return PTR_ERR(client->msgr);
|
||||
}
|
||||
client->msgr->nocrc = ceph_test_opt(client, NOCRC);
|
||||
}
|
||||
|
||||
/* open session, and wait for mon and osd maps */
|
||||
err = ceph_monc_open_session(&client->monc);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
while (!have_mon_and_osd_map(client)) {
|
||||
err = -EIO;
|
||||
if (timeout && time_after_eq(jiffies, started + timeout))
|
||||
return err;
|
||||
|
||||
/* wait */
|
||||
dout("mount waiting for mon_map\n");
|
||||
err = wait_event_interruptible_timeout(client->auth_wq,
|
||||
have_mon_and_osd_map(client) || (client->auth_err < 0),
|
||||
timeout);
|
||||
if (err == -EINTR || err == -ERESTARTSYS)
|
||||
return err;
|
||||
if (client->auth_err < 0)
|
||||
return client->auth_err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__ceph_open_session);
|
||||
|
||||
|
||||
int ceph_open_session(struct ceph_client *client)
|
||||
{
|
||||
int ret;
|
||||
unsigned long started = jiffies; /* note the start time */
|
||||
|
||||
dout("open_session start\n");
|
||||
mutex_lock(&client->mount_mutex);
|
||||
|
||||
ret = __ceph_open_session(client, started);
|
||||
|
||||
mutex_unlock(&client->mount_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_open_session);
|
||||
|
||||
|
||||
static int __init init_ceph_lib(void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
ret = ceph_debugfs_init();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = ceph_msgr_init();
|
||||
if (ret < 0)
|
||||
goto out_debugfs;
|
||||
|
||||
pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n",
|
||||
CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL,
|
||||
CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
|
||||
CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
|
||||
|
||||
return 0;
|
||||
|
||||
out_debugfs:
|
||||
ceph_debugfs_cleanup();
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit exit_ceph_lib(void)
|
||||
{
|
||||
dout("exit_ceph_lib\n");
|
||||
ceph_msgr_exit();
|
||||
ceph_debugfs_cleanup();
|
||||
}
|
||||
|
||||
module_init(init_ceph_lib);
|
||||
module_exit(exit_ceph_lib);
|
||||
|
||||
MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
|
||||
MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
|
||||
MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
|
||||
MODULE_DESCRIPTION("Ceph filesystem for Linux");
|
||||
MODULE_LICENSE("GPL");
|
||||
75
net/ceph/ceph_fs.c
Normal file
75
net/ceph/ceph_fs.c
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Some non-inline ceph helpers
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/ceph/types.h>
|
||||
|
||||
/*
|
||||
* return true if @layout appears to be valid
|
||||
*/
|
||||
int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
|
||||
{
|
||||
__u32 su = le32_to_cpu(layout->fl_stripe_unit);
|
||||
__u32 sc = le32_to_cpu(layout->fl_stripe_count);
|
||||
__u32 os = le32_to_cpu(layout->fl_object_size);
|
||||
|
||||
/* stripe unit, object size must be non-zero, 64k increment */
|
||||
if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
|
||||
return 0;
|
||||
if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
|
||||
return 0;
|
||||
/* object size must be a multiple of stripe unit */
|
||||
if (os < su || os % su)
|
||||
return 0;
|
||||
/* stripe count must be non-zero */
|
||||
if (!sc)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int ceph_flags_to_mode(int flags)
|
||||
{
|
||||
int mode;
|
||||
|
||||
#ifdef O_DIRECTORY /* fixme */
|
||||
if ((flags & O_DIRECTORY) == O_DIRECTORY)
|
||||
return CEPH_FILE_MODE_PIN;
|
||||
#endif
|
||||
if ((flags & O_APPEND) == O_APPEND)
|
||||
flags |= O_WRONLY;
|
||||
|
||||
if ((flags & O_ACCMODE) == O_RDWR)
|
||||
mode = CEPH_FILE_MODE_RDWR;
|
||||
else if ((flags & O_ACCMODE) == O_WRONLY)
|
||||
mode = CEPH_FILE_MODE_WR;
|
||||
else
|
||||
mode = CEPH_FILE_MODE_RD;
|
||||
|
||||
#ifdef O_LAZY
|
||||
if (flags & O_LAZY)
|
||||
mode |= CEPH_FILE_MODE_LAZY;
|
||||
#endif
|
||||
|
||||
return mode;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_flags_to_mode);
|
||||
|
||||
int ceph_caps_for_mode(int mode)
|
||||
{
|
||||
int caps = CEPH_CAP_PIN;
|
||||
|
||||
if (mode & CEPH_FILE_MODE_RD)
|
||||
caps |= CEPH_CAP_FILE_SHARED |
|
||||
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
|
||||
if (mode & CEPH_FILE_MODE_WR)
|
||||
caps |= CEPH_CAP_FILE_EXCL |
|
||||
CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
|
||||
CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
|
||||
CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
|
||||
if (mode & CEPH_FILE_MODE_LAZY)
|
||||
caps |= CEPH_CAP_FILE_LAZYIO;
|
||||
|
||||
return caps;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_caps_for_mode);
|
||||
118
net/ceph/ceph_hash.c
Normal file
118
net/ceph/ceph_hash.c
Normal file
@@ -0,0 +1,118 @@
|
||||
|
||||
#include <linux/ceph/types.h>
|
||||
|
||||
/*
|
||||
* Robert Jenkin's hash function.
|
||||
* http://burtleburtle.net/bob/hash/evahash.html
|
||||
* This is in the public domain.
|
||||
*/
|
||||
#define mix(a, b, c) \
|
||||
do { \
|
||||
a = a - b; a = a - c; a = a ^ (c >> 13); \
|
||||
b = b - c; b = b - a; b = b ^ (a << 8); \
|
||||
c = c - a; c = c - b; c = c ^ (b >> 13); \
|
||||
a = a - b; a = a - c; a = a ^ (c >> 12); \
|
||||
b = b - c; b = b - a; b = b ^ (a << 16); \
|
||||
c = c - a; c = c - b; c = c ^ (b >> 5); \
|
||||
a = a - b; a = a - c; a = a ^ (c >> 3); \
|
||||
b = b - c; b = b - a; b = b ^ (a << 10); \
|
||||
c = c - a; c = c - b; c = c ^ (b >> 15); \
|
||||
} while (0)
|
||||
|
||||
unsigned ceph_str_hash_rjenkins(const char *str, unsigned length)
|
||||
{
|
||||
const unsigned char *k = (const unsigned char *)str;
|
||||
__u32 a, b, c; /* the internal state */
|
||||
__u32 len; /* how many key bytes still need mixing */
|
||||
|
||||
/* Set up the internal state */
|
||||
len = length;
|
||||
a = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||
b = a;
|
||||
c = 0; /* variable initialization of internal state */
|
||||
|
||||
/* handle most of the key */
|
||||
while (len >= 12) {
|
||||
a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) +
|
||||
((__u32)k[3] << 24));
|
||||
b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) +
|
||||
((__u32)k[7] << 24));
|
||||
c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) +
|
||||
((__u32)k[11] << 24));
|
||||
mix(a, b, c);
|
||||
k = k + 12;
|
||||
len = len - 12;
|
||||
}
|
||||
|
||||
/* handle the last 11 bytes */
|
||||
c = c + length;
|
||||
switch (len) { /* all the case statements fall through */
|
||||
case 11:
|
||||
c = c + ((__u32)k[10] << 24);
|
||||
case 10:
|
||||
c = c + ((__u32)k[9] << 16);
|
||||
case 9:
|
||||
c = c + ((__u32)k[8] << 8);
|
||||
/* the first byte of c is reserved for the length */
|
||||
case 8:
|
||||
b = b + ((__u32)k[7] << 24);
|
||||
case 7:
|
||||
b = b + ((__u32)k[6] << 16);
|
||||
case 6:
|
||||
b = b + ((__u32)k[5] << 8);
|
||||
case 5:
|
||||
b = b + k[4];
|
||||
case 4:
|
||||
a = a + ((__u32)k[3] << 24);
|
||||
case 3:
|
||||
a = a + ((__u32)k[2] << 16);
|
||||
case 2:
|
||||
a = a + ((__u32)k[1] << 8);
|
||||
case 1:
|
||||
a = a + k[0];
|
||||
/* case 0: nothing left to add */
|
||||
}
|
||||
mix(a, b, c);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* linux dcache hash
|
||||
*/
|
||||
unsigned ceph_str_hash_linux(const char *str, unsigned length)
|
||||
{
|
||||
unsigned long hash = 0;
|
||||
unsigned char c;
|
||||
|
||||
while (length--) {
|
||||
c = *str++;
|
||||
hash = (hash + (c << 4) + (c >> 4)) * 11;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
unsigned ceph_str_hash(int type, const char *s, unsigned len)
|
||||
{
|
||||
switch (type) {
|
||||
case CEPH_STR_HASH_LINUX:
|
||||
return ceph_str_hash_linux(s, len);
|
||||
case CEPH_STR_HASH_RJENKINS:
|
||||
return ceph_str_hash_rjenkins(s, len);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
const char *ceph_str_hash_name(int type)
|
||||
{
|
||||
switch (type) {
|
||||
case CEPH_STR_HASH_LINUX:
|
||||
return "linux";
|
||||
case CEPH_STR_HASH_RJENKINS:
|
||||
return "rjenkins";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
84
net/ceph/ceph_strings.c
Normal file
84
net/ceph/ceph_strings.c
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Ceph string constants
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/ceph/types.h>
|
||||
|
||||
const char *ceph_entity_type_name(int type)
|
||||
{
|
||||
switch (type) {
|
||||
case CEPH_ENTITY_TYPE_MDS: return "mds";
|
||||
case CEPH_ENTITY_TYPE_OSD: return "osd";
|
||||
case CEPH_ENTITY_TYPE_MON: return "mon";
|
||||
case CEPH_ENTITY_TYPE_CLIENT: return "client";
|
||||
case CEPH_ENTITY_TYPE_AUTH: return "auth";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
const char *ceph_osd_op_name(int op)
|
||||
{
|
||||
switch (op) {
|
||||
case CEPH_OSD_OP_READ: return "read";
|
||||
case CEPH_OSD_OP_STAT: return "stat";
|
||||
|
||||
case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
|
||||
|
||||
case CEPH_OSD_OP_WRITE: return "write";
|
||||
case CEPH_OSD_OP_DELETE: return "delete";
|
||||
case CEPH_OSD_OP_TRUNCATE: return "truncate";
|
||||
case CEPH_OSD_OP_ZERO: return "zero";
|
||||
case CEPH_OSD_OP_WRITEFULL: return "writefull";
|
||||
case CEPH_OSD_OP_ROLLBACK: return "rollback";
|
||||
|
||||
case CEPH_OSD_OP_APPEND: return "append";
|
||||
case CEPH_OSD_OP_STARTSYNC: return "startsync";
|
||||
case CEPH_OSD_OP_SETTRUNC: return "settrunc";
|
||||
case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
|
||||
|
||||
case CEPH_OSD_OP_TMAPUP: return "tmapup";
|
||||
case CEPH_OSD_OP_TMAPGET: return "tmapget";
|
||||
case CEPH_OSD_OP_TMAPPUT: return "tmapput";
|
||||
|
||||
case CEPH_OSD_OP_GETXATTR: return "getxattr";
|
||||
case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
|
||||
case CEPH_OSD_OP_SETXATTR: return "setxattr";
|
||||
case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
|
||||
case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
|
||||
case CEPH_OSD_OP_RMXATTR: return "rmxattr";
|
||||
case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
|
||||
|
||||
case CEPH_OSD_OP_PULL: return "pull";
|
||||
case CEPH_OSD_OP_PUSH: return "push";
|
||||
case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
|
||||
case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
|
||||
case CEPH_OSD_OP_SCRUB: return "scrub";
|
||||
|
||||
case CEPH_OSD_OP_WRLOCK: return "wrlock";
|
||||
case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
|
||||
case CEPH_OSD_OP_RDLOCK: return "rdlock";
|
||||
case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
|
||||
case CEPH_OSD_OP_UPLOCK: return "uplock";
|
||||
case CEPH_OSD_OP_DNLOCK: return "dnlock";
|
||||
|
||||
case CEPH_OSD_OP_CALL: return "call";
|
||||
|
||||
case CEPH_OSD_OP_PGLS: return "pgls";
|
||||
}
|
||||
return "???";
|
||||
}
|
||||
|
||||
|
||||
const char *ceph_pool_op_name(int op)
|
||||
{
|
||||
switch (op) {
|
||||
case POOL_OP_CREATE: return "create";
|
||||
case POOL_OP_DELETE: return "delete";
|
||||
case POOL_OP_AUID_CHANGE: return "auid change";
|
||||
case POOL_OP_CREATE_SNAP: return "create snap";
|
||||
case POOL_OP_DELETE_SNAP: return "delete snap";
|
||||
case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap";
|
||||
case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap";
|
||||
}
|
||||
return "???";
|
||||
}
|
||||
151
net/ceph/crush/crush.c
Normal file
151
net/ceph/crush/crush.c
Normal file
@@ -0,0 +1,151 @@
|
||||
|
||||
#ifdef __KERNEL__
|
||||
# include <linux/slab.h>
|
||||
#else
|
||||
# include <stdlib.h>
|
||||
# include <assert.h>
|
||||
# define kfree(x) do { if (x) free(x); } while (0)
|
||||
# define BUG_ON(x) assert(!(x))
|
||||
#endif
|
||||
|
||||
#include <linux/crush/crush.h>
|
||||
|
||||
const char *crush_bucket_alg_name(int alg)
|
||||
{
|
||||
switch (alg) {
|
||||
case CRUSH_BUCKET_UNIFORM: return "uniform";
|
||||
case CRUSH_BUCKET_LIST: return "list";
|
||||
case CRUSH_BUCKET_TREE: return "tree";
|
||||
case CRUSH_BUCKET_STRAW: return "straw";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* crush_get_bucket_item_weight - Get weight of an item in given bucket
|
||||
* @b: bucket pointer
|
||||
* @p: item index in bucket
|
||||
*/
|
||||
int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
|
||||
{
|
||||
if (p >= b->size)
|
||||
return 0;
|
||||
|
||||
switch (b->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
return ((struct crush_bucket_uniform *)b)->item_weight;
|
||||
case CRUSH_BUCKET_LIST:
|
||||
return ((struct crush_bucket_list *)b)->item_weights[p];
|
||||
case CRUSH_BUCKET_TREE:
|
||||
if (p & 1)
|
||||
return ((struct crush_bucket_tree *)b)->node_weights[p];
|
||||
return 0;
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
return ((struct crush_bucket_straw *)b)->item_weights[p];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* crush_calc_parents - Calculate parent vectors for the given crush map.
|
||||
* @map: crush_map pointer
|
||||
*/
|
||||
void crush_calc_parents(struct crush_map *map)
|
||||
{
|
||||
int i, b, c;
|
||||
|
||||
for (b = 0; b < map->max_buckets; b++) {
|
||||
if (map->buckets[b] == NULL)
|
||||
continue;
|
||||
for (i = 0; i < map->buckets[b]->size; i++) {
|
||||
c = map->buckets[b]->items[i];
|
||||
BUG_ON(c >= map->max_devices ||
|
||||
c < -map->max_buckets);
|
||||
if (c >= 0)
|
||||
map->device_parents[c] = map->buckets[b]->id;
|
||||
else
|
||||
map->bucket_parents[-1-c] = map->buckets[b]->id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
|
||||
{
|
||||
kfree(b->h.perm);
|
||||
kfree(b->h.items);
|
||||
kfree(b);
|
||||
}
|
||||
|
||||
void crush_destroy_bucket_list(struct crush_bucket_list *b)
|
||||
{
|
||||
kfree(b->item_weights);
|
||||
kfree(b->sum_weights);
|
||||
kfree(b->h.perm);
|
||||
kfree(b->h.items);
|
||||
kfree(b);
|
||||
}
|
||||
|
||||
void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
|
||||
{
|
||||
kfree(b->node_weights);
|
||||
kfree(b);
|
||||
}
|
||||
|
||||
void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
|
||||
{
|
||||
kfree(b->straws);
|
||||
kfree(b->item_weights);
|
||||
kfree(b->h.perm);
|
||||
kfree(b->h.items);
|
||||
kfree(b);
|
||||
}
|
||||
|
||||
void crush_destroy_bucket(struct crush_bucket *b)
|
||||
{
|
||||
switch (b->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
crush_destroy_bucket_uniform((struct crush_bucket_uniform *)b);
|
||||
break;
|
||||
case CRUSH_BUCKET_LIST:
|
||||
crush_destroy_bucket_list((struct crush_bucket_list *)b);
|
||||
break;
|
||||
case CRUSH_BUCKET_TREE:
|
||||
crush_destroy_bucket_tree((struct crush_bucket_tree *)b);
|
||||
break;
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* crush_destroy - Destroy a crush_map
|
||||
* @map: crush_map pointer
|
||||
*/
|
||||
void crush_destroy(struct crush_map *map)
|
||||
{
|
||||
int b;
|
||||
|
||||
/* buckets */
|
||||
if (map->buckets) {
|
||||
for (b = 0; b < map->max_buckets; b++) {
|
||||
if (map->buckets[b] == NULL)
|
||||
continue;
|
||||
crush_destroy_bucket(map->buckets[b]);
|
||||
}
|
||||
kfree(map->buckets);
|
||||
}
|
||||
|
||||
/* rules */
|
||||
if (map->rules) {
|
||||
for (b = 0; b < map->max_rules; b++)
|
||||
kfree(map->rules[b]);
|
||||
kfree(map->rules);
|
||||
}
|
||||
|
||||
kfree(map->bucket_parents);
|
||||
kfree(map->device_parents);
|
||||
kfree(map);
|
||||
}
|
||||
|
||||
|
||||
149
net/ceph/crush/hash.c
Normal file
149
net/ceph/crush/hash.c
Normal file
@@ -0,0 +1,149 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/crush/hash.h>
|
||||
|
||||
/*
|
||||
* Robert Jenkins' function for mixing 32-bit values
|
||||
* http://burtleburtle.net/bob/hash/evahash.html
|
||||
* a, b = random bits, c = input and output
|
||||
*/
|
||||
#define crush_hashmix(a, b, c) do { \
|
||||
a = a-b; a = a-c; a = a^(c>>13); \
|
||||
b = b-c; b = b-a; b = b^(a<<8); \
|
||||
c = c-a; c = c-b; c = c^(b>>13); \
|
||||
a = a-b; a = a-c; a = a^(c>>12); \
|
||||
b = b-c; b = b-a; b = b^(a<<16); \
|
||||
c = c-a; c = c-b; c = c^(b>>5); \
|
||||
a = a-b; a = a-c; a = a^(c>>3); \
|
||||
b = b-c; b = b-a; b = b^(a<<10); \
|
||||
c = c-a; c = c-b; c = c^(b>>15); \
|
||||
} while (0)
|
||||
|
||||
#define crush_hash_seed 1315423911
|
||||
|
||||
static __u32 crush_hash32_rjenkins1(__u32 a)
|
||||
{
|
||||
__u32 hash = crush_hash_seed ^ a;
|
||||
__u32 b = a;
|
||||
__u32 x = 231232;
|
||||
__u32 y = 1232;
|
||||
crush_hashmix(b, x, hash);
|
||||
crush_hashmix(y, a, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
static __u32 crush_hash32_rjenkins1_2(__u32 a, __u32 b)
|
||||
{
|
||||
__u32 hash = crush_hash_seed ^ a ^ b;
|
||||
__u32 x = 231232;
|
||||
__u32 y = 1232;
|
||||
crush_hashmix(a, b, hash);
|
||||
crush_hashmix(x, a, hash);
|
||||
crush_hashmix(b, y, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
static __u32 crush_hash32_rjenkins1_3(__u32 a, __u32 b, __u32 c)
|
||||
{
|
||||
__u32 hash = crush_hash_seed ^ a ^ b ^ c;
|
||||
__u32 x = 231232;
|
||||
__u32 y = 1232;
|
||||
crush_hashmix(a, b, hash);
|
||||
crush_hashmix(c, x, hash);
|
||||
crush_hashmix(y, a, hash);
|
||||
crush_hashmix(b, x, hash);
|
||||
crush_hashmix(y, c, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
static __u32 crush_hash32_rjenkins1_4(__u32 a, __u32 b, __u32 c, __u32 d)
|
||||
{
|
||||
__u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d;
|
||||
__u32 x = 231232;
|
||||
__u32 y = 1232;
|
||||
crush_hashmix(a, b, hash);
|
||||
crush_hashmix(c, d, hash);
|
||||
crush_hashmix(a, x, hash);
|
||||
crush_hashmix(y, b, hash);
|
||||
crush_hashmix(c, x, hash);
|
||||
crush_hashmix(y, d, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
static __u32 crush_hash32_rjenkins1_5(__u32 a, __u32 b, __u32 c, __u32 d,
|
||||
__u32 e)
|
||||
{
|
||||
__u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e;
|
||||
__u32 x = 231232;
|
||||
__u32 y = 1232;
|
||||
crush_hashmix(a, b, hash);
|
||||
crush_hashmix(c, d, hash);
|
||||
crush_hashmix(e, x, hash);
|
||||
crush_hashmix(y, a, hash);
|
||||
crush_hashmix(b, x, hash);
|
||||
crush_hashmix(y, c, hash);
|
||||
crush_hashmix(d, x, hash);
|
||||
crush_hashmix(y, e, hash);
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
__u32 crush_hash32(int type, __u32 a)
|
||||
{
|
||||
switch (type) {
|
||||
case CRUSH_HASH_RJENKINS1:
|
||||
return crush_hash32_rjenkins1(a);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
__u32 crush_hash32_2(int type, __u32 a, __u32 b)
|
||||
{
|
||||
switch (type) {
|
||||
case CRUSH_HASH_RJENKINS1:
|
||||
return crush_hash32_rjenkins1_2(a, b);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
__u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c)
|
||||
{
|
||||
switch (type) {
|
||||
case CRUSH_HASH_RJENKINS1:
|
||||
return crush_hash32_rjenkins1_3(a, b, c);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
__u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d)
|
||||
{
|
||||
switch (type) {
|
||||
case CRUSH_HASH_RJENKINS1:
|
||||
return crush_hash32_rjenkins1_4(a, b, c, d);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
__u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d, __u32 e)
|
||||
{
|
||||
switch (type) {
|
||||
case CRUSH_HASH_RJENKINS1:
|
||||
return crush_hash32_rjenkins1_5(a, b, c, d, e);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *crush_hash_name(int type)
|
||||
{
|
||||
switch (type) {
|
||||
case CRUSH_HASH_RJENKINS1:
|
||||
return "rjenkins1";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
609
net/ceph/crush/mapper.c
Normal file
609
net/ceph/crush/mapper.c
Normal file
@@ -0,0 +1,609 @@
|
||||
|
||||
#ifdef __KERNEL__
|
||||
# include <linux/string.h>
|
||||
# include <linux/slab.h>
|
||||
# include <linux/bug.h>
|
||||
# include <linux/kernel.h>
|
||||
# ifndef dprintk
|
||||
# define dprintk(args...)
|
||||
# endif
|
||||
#else
|
||||
# include <string.h>
|
||||
# include <stdio.h>
|
||||
# include <stdlib.h>
|
||||
# include <assert.h>
|
||||
# define BUG_ON(x) assert(!(x))
|
||||
# define dprintk(args...) /* printf(args) */
|
||||
# define kmalloc(x, f) malloc(x)
|
||||
# define kfree(x) free(x)
|
||||
#endif
|
||||
|
||||
#include <linux/crush/crush.h>
|
||||
#include <linux/crush/hash.h>
|
||||
|
||||
/*
|
||||
* Implement the core CRUSH mapping algorithm.
|
||||
*/
|
||||
|
||||
/**
|
||||
* crush_find_rule - find a crush_rule id for a given ruleset, type, and size.
|
||||
* @map: the crush_map
|
||||
* @ruleset: the storage ruleset id (user defined)
|
||||
* @type: storage ruleset type (user defined)
|
||||
* @size: output set size
|
||||
*/
|
||||
int crush_find_rule(struct crush_map *map, int ruleset, int type, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < map->max_rules; i++) {
|
||||
if (map->rules[i] &&
|
||||
map->rules[i]->mask.ruleset == ruleset &&
|
||||
map->rules[i]->mask.type == type &&
|
||||
map->rules[i]->mask.min_size <= size &&
|
||||
map->rules[i]->mask.max_size >= size)
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* bucket choose methods
|
||||
*
|
||||
* For each bucket algorithm, we have a "choose" method that, given a
|
||||
* crush input @x and replica position (usually, position in output set) @r,
|
||||
* will produce an item in the bucket.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Choose based on a random permutation of the bucket.
|
||||
*
|
||||
* We used to use some prime number arithmetic to do this, but it
|
||||
* wasn't very random, and had some other bad behaviors. Instead, we
|
||||
* calculate an actual random permutation of the bucket members.
|
||||
* Since this is expensive, we optimize for the r=0 case, which
|
||||
* captures the vast majority of calls.
|
||||
*/
|
||||
static int bucket_perm_choose(struct crush_bucket *bucket,
|
||||
int x, int r)
|
||||
{
|
||||
unsigned pr = r % bucket->size;
|
||||
unsigned i, s;
|
||||
|
||||
/* start a new permutation if @x has changed */
|
||||
if (bucket->perm_x != x || bucket->perm_n == 0) {
|
||||
dprintk("bucket %d new x=%d\n", bucket->id, x);
|
||||
bucket->perm_x = x;
|
||||
|
||||
/* optimize common r=0 case */
|
||||
if (pr == 0) {
|
||||
s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
|
||||
bucket->size;
|
||||
bucket->perm[0] = s;
|
||||
bucket->perm_n = 0xffff; /* magic value, see below */
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < bucket->size; i++)
|
||||
bucket->perm[i] = i;
|
||||
bucket->perm_n = 0;
|
||||
} else if (bucket->perm_n == 0xffff) {
|
||||
/* clean up after the r=0 case above */
|
||||
for (i = 1; i < bucket->size; i++)
|
||||
bucket->perm[i] = i;
|
||||
bucket->perm[bucket->perm[0]] = 0;
|
||||
bucket->perm_n = 1;
|
||||
}
|
||||
|
||||
/* calculate permutation up to pr */
|
||||
for (i = 0; i < bucket->perm_n; i++)
|
||||
dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
|
||||
while (bucket->perm_n <= pr) {
|
||||
unsigned p = bucket->perm_n;
|
||||
/* no point in swapping the final entry */
|
||||
if (p < bucket->size - 1) {
|
||||
i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
|
||||
(bucket->size - p);
|
||||
if (i) {
|
||||
unsigned t = bucket->perm[p + i];
|
||||
bucket->perm[p + i] = bucket->perm[p];
|
||||
bucket->perm[p] = t;
|
||||
}
|
||||
dprintk(" perm_choose swap %d with %d\n", p, p+i);
|
||||
}
|
||||
bucket->perm_n++;
|
||||
}
|
||||
for (i = 0; i < bucket->size; i++)
|
||||
dprintk(" perm_choose %d: %d\n", i, bucket->perm[i]);
|
||||
|
||||
s = bucket->perm[pr];
|
||||
out:
|
||||
dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
|
||||
bucket->size, x, r, pr, s);
|
||||
return bucket->items[s];
|
||||
}
|
||||
|
||||
/* uniform */
|
||||
static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
|
||||
int x, int r)
|
||||
{
|
||||
return bucket_perm_choose(&bucket->h, x, r);
|
||||
}
|
||||
|
||||
/* list */
|
||||
static int bucket_list_choose(struct crush_bucket_list *bucket,
|
||||
int x, int r)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = bucket->h.size-1; i >= 0; i--) {
|
||||
__u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i],
|
||||
r, bucket->h.id);
|
||||
w &= 0xffff;
|
||||
dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
|
||||
"sw %x rand %llx",
|
||||
i, x, r, bucket->h.items[i], bucket->item_weights[i],
|
||||
bucket->sum_weights[i], w);
|
||||
w *= bucket->sum_weights[i];
|
||||
w = w >> 16;
|
||||
/*dprintk(" scaled %llx\n", w);*/
|
||||
if (w < bucket->item_weights[i])
|
||||
return bucket->h.items[i];
|
||||
}
|
||||
|
||||
BUG_ON(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* (binary) tree */
|
||||
static int height(int n)
|
||||
{
|
||||
int h = 0;
|
||||
while ((n & 1) == 0) {
|
||||
h++;
|
||||
n = n >> 1;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
static int left(int x)
|
||||
{
|
||||
int h = height(x);
|
||||
return x - (1 << (h-1));
|
||||
}
|
||||
|
||||
static int right(int x)
|
||||
{
|
||||
int h = height(x);
|
||||
return x + (1 << (h-1));
|
||||
}
|
||||
|
||||
static int terminal(int x)
|
||||
{
|
||||
return x & 1;
|
||||
}
|
||||
|
||||
static int bucket_tree_choose(struct crush_bucket_tree *bucket,
|
||||
int x, int r)
|
||||
{
|
||||
int n, l;
|
||||
__u32 w;
|
||||
__u64 t;
|
||||
|
||||
/* start at root */
|
||||
n = bucket->num_nodes >> 1;
|
||||
|
||||
while (!terminal(n)) {
|
||||
/* pick point in [0, w) */
|
||||
w = bucket->node_weights[n];
|
||||
t = (__u64)crush_hash32_4(bucket->h.hash, x, n, r,
|
||||
bucket->h.id) * (__u64)w;
|
||||
t = t >> 32;
|
||||
|
||||
/* descend to the left or right? */
|
||||
l = left(n);
|
||||
if (t < bucket->node_weights[l])
|
||||
n = l;
|
||||
else
|
||||
n = right(n);
|
||||
}
|
||||
|
||||
return bucket->h.items[n >> 1];
|
||||
}
|
||||
|
||||
|
||||
/* straw */
|
||||
|
||||
static int bucket_straw_choose(struct crush_bucket_straw *bucket,
|
||||
int x, int r)
|
||||
{
|
||||
int i;
|
||||
int high = 0;
|
||||
__u64 high_draw = 0;
|
||||
__u64 draw;
|
||||
|
||||
for (i = 0; i < bucket->h.size; i++) {
|
||||
draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r);
|
||||
draw &= 0xffff;
|
||||
draw *= bucket->straws[i];
|
||||
if (i == 0 || draw > high_draw) {
|
||||
high = i;
|
||||
high_draw = draw;
|
||||
}
|
||||
}
|
||||
return bucket->h.items[high];
|
||||
}
|
||||
|
||||
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
|
||||
{
|
||||
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
|
||||
switch (in->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
|
||||
x, r);
|
||||
case CRUSH_BUCKET_LIST:
|
||||
return bucket_list_choose((struct crush_bucket_list *)in,
|
||||
x, r);
|
||||
case CRUSH_BUCKET_TREE:
|
||||
return bucket_tree_choose((struct crush_bucket_tree *)in,
|
||||
x, r);
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
return bucket_straw_choose((struct crush_bucket_straw *)in,
|
||||
x, r);
|
||||
default:
|
||||
BUG_ON(1);
|
||||
return in->items[0];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* true if device is marked "out" (failed, fully offloaded)
|
||||
* of the cluster
|
||||
*/
|
||||
static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
|
||||
{
|
||||
if (weight[item] >= 0x10000)
|
||||
return 0;
|
||||
if (weight[item] == 0)
|
||||
return 1;
|
||||
if ((crush_hash32_2(CRUSH_HASH_RJENKINS1, x, item) & 0xffff)
|
||||
< weight[item])
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* crush_choose - choose numrep distinct items of given type
|
||||
* @map: the crush_map
|
||||
* @bucket: the bucket we are choose an item from
|
||||
* @x: crush input value
|
||||
* @numrep: the number of items to choose
|
||||
* @type: the type of item to choose
|
||||
* @out: pointer to output vector
|
||||
* @outpos: our position in that vector
|
||||
* @firstn: true if choosing "first n" items, false if choosing "indep"
|
||||
* @recurse_to_leaf: true if we want one device under each item of given type
|
||||
* @out2: second output vector for leaf items (if @recurse_to_leaf)
|
||||
*/
|
||||
static int crush_choose(struct crush_map *map,
|
||||
struct crush_bucket *bucket,
|
||||
__u32 *weight,
|
||||
int x, int numrep, int type,
|
||||
int *out, int outpos,
|
||||
int firstn, int recurse_to_leaf,
|
||||
int *out2)
|
||||
{
|
||||
int rep;
|
||||
int ftotal, flocal;
|
||||
int retry_descent, retry_bucket, skip_rep;
|
||||
struct crush_bucket *in = bucket;
|
||||
int r;
|
||||
int i;
|
||||
int item = 0;
|
||||
int itemtype;
|
||||
int collide, reject;
|
||||
const int orig_tries = 5; /* attempts before we fall back to search */
|
||||
|
||||
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
|
||||
bucket->id, x, outpos, numrep);
|
||||
|
||||
for (rep = outpos; rep < numrep; rep++) {
|
||||
/* keep trying until we get a non-out, non-colliding item */
|
||||
ftotal = 0;
|
||||
skip_rep = 0;
|
||||
do {
|
||||
retry_descent = 0;
|
||||
in = bucket; /* initial bucket */
|
||||
|
||||
/* choose through intervening buckets */
|
||||
flocal = 0;
|
||||
do {
|
||||
collide = 0;
|
||||
retry_bucket = 0;
|
||||
r = rep;
|
||||
if (in->alg == CRUSH_BUCKET_UNIFORM) {
|
||||
/* be careful */
|
||||
if (firstn || numrep >= in->size)
|
||||
/* r' = r + f_total */
|
||||
r += ftotal;
|
||||
else if (in->size % numrep == 0)
|
||||
/* r'=r+(n+1)*f_local */
|
||||
r += (numrep+1) *
|
||||
(flocal+ftotal);
|
||||
else
|
||||
/* r' = r + n*f_local */
|
||||
r += numrep * (flocal+ftotal);
|
||||
} else {
|
||||
if (firstn)
|
||||
/* r' = r + f_total */
|
||||
r += ftotal;
|
||||
else
|
||||
/* r' = r + n*f_local */
|
||||
r += numrep * (flocal+ftotal);
|
||||
}
|
||||
|
||||
/* bucket choose */
|
||||
if (in->size == 0) {
|
||||
reject = 1;
|
||||
goto reject;
|
||||
}
|
||||
if (flocal >= (in->size>>1) &&
|
||||
flocal > orig_tries)
|
||||
item = bucket_perm_choose(in, x, r);
|
||||
else
|
||||
item = crush_bucket_choose(in, x, r);
|
||||
BUG_ON(item >= map->max_devices);
|
||||
|
||||
/* desired type? */
|
||||
if (item < 0)
|
||||
itemtype = map->buckets[-1-item]->type;
|
||||
else
|
||||
itemtype = 0;
|
||||
dprintk(" item %d type %d\n", item, itemtype);
|
||||
|
||||
/* keep going? */
|
||||
if (itemtype != type) {
|
||||
BUG_ON(item >= 0 ||
|
||||
(-1-item) >= map->max_buckets);
|
||||
in = map->buckets[-1-item];
|
||||
retry_bucket = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* collision? */
|
||||
for (i = 0; i < outpos; i++) {
|
||||
if (out[i] == item) {
|
||||
collide = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
reject = 0;
|
||||
if (recurse_to_leaf) {
|
||||
if (item < 0) {
|
||||
if (crush_choose(map,
|
||||
map->buckets[-1-item],
|
||||
weight,
|
||||
x, outpos+1, 0,
|
||||
out2, outpos,
|
||||
firstn, 0,
|
||||
NULL) <= outpos)
|
||||
/* didn't get leaf */
|
||||
reject = 1;
|
||||
} else {
|
||||
/* we already have a leaf! */
|
||||
out2[outpos] = item;
|
||||
}
|
||||
}
|
||||
|
||||
if (!reject) {
|
||||
/* out? */
|
||||
if (itemtype == 0)
|
||||
reject = is_out(map, weight,
|
||||
item, x);
|
||||
else
|
||||
reject = 0;
|
||||
}
|
||||
|
||||
reject:
|
||||
if (reject || collide) {
|
||||
ftotal++;
|
||||
flocal++;
|
||||
|
||||
if (collide && flocal < 3)
|
||||
/* retry locally a few times */
|
||||
retry_bucket = 1;
|
||||
else if (flocal < in->size + orig_tries)
|
||||
/* exhaustive bucket search */
|
||||
retry_bucket = 1;
|
||||
else if (ftotal < 20)
|
||||
/* then retry descent */
|
||||
retry_descent = 1;
|
||||
else
|
||||
/* else give up */
|
||||
skip_rep = 1;
|
||||
dprintk(" reject %d collide %d "
|
||||
"ftotal %d flocal %d\n",
|
||||
reject, collide, ftotal,
|
||||
flocal);
|
||||
}
|
||||
} while (retry_bucket);
|
||||
} while (retry_descent);
|
||||
|
||||
if (skip_rep) {
|
||||
dprintk("skip rep\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
dprintk("CHOOSE got %d\n", item);
|
||||
out[outpos] = item;
|
||||
outpos++;
|
||||
}
|
||||
|
||||
dprintk("CHOOSE returns %d\n", outpos);
|
||||
return outpos;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* crush_do_rule - calculate a mapping with the given input and rule
|
||||
* @map: the crush_map
|
||||
* @ruleno: the rule id
|
||||
* @x: hash input
|
||||
* @result: pointer to result vector
|
||||
* @result_max: maximum result size
|
||||
* @force: force initial replica choice; -1 for none
|
||||
*/
|
||||
int crush_do_rule(struct crush_map *map,
|
||||
int ruleno, int x, int *result, int result_max,
|
||||
int force, __u32 *weight)
|
||||
{
|
||||
int result_len;
|
||||
int force_context[CRUSH_MAX_DEPTH];
|
||||
int force_pos = -1;
|
||||
int a[CRUSH_MAX_SET];
|
||||
int b[CRUSH_MAX_SET];
|
||||
int c[CRUSH_MAX_SET];
|
||||
int recurse_to_leaf;
|
||||
int *w;
|
||||
int wsize = 0;
|
||||
int *o;
|
||||
int osize;
|
||||
int *tmp;
|
||||
struct crush_rule *rule;
|
||||
int step;
|
||||
int i, j;
|
||||
int numrep;
|
||||
int firstn;
|
||||
int rc = -1;
|
||||
|
||||
BUG_ON(ruleno >= map->max_rules);
|
||||
|
||||
rule = map->rules[ruleno];
|
||||
result_len = 0;
|
||||
w = a;
|
||||
o = b;
|
||||
|
||||
/*
|
||||
* determine hierarchical context of force, if any. note
|
||||
* that this may or may not correspond to the specific types
|
||||
* referenced by the crush rule.
|
||||
*/
|
||||
if (force >= 0) {
|
||||
if (force >= map->max_devices ||
|
||||
map->device_parents[force] == 0) {
|
||||
/*dprintk("CRUSH: forcefed device dne\n");*/
|
||||
rc = -1; /* force fed device dne */
|
||||
goto out;
|
||||
}
|
||||
if (!is_out(map, weight, force, x)) {
|
||||
while (1) {
|
||||
force_context[++force_pos] = force;
|
||||
if (force >= 0)
|
||||
force = map->device_parents[force];
|
||||
else
|
||||
force = map->bucket_parents[-1-force];
|
||||
if (force == 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (step = 0; step < rule->len; step++) {
|
||||
firstn = 0;
|
||||
switch (rule->steps[step].op) {
|
||||
case CRUSH_RULE_TAKE:
|
||||
w[0] = rule->steps[step].arg1;
|
||||
if (force_pos >= 0) {
|
||||
BUG_ON(force_context[force_pos] != w[0]);
|
||||
force_pos--;
|
||||
}
|
||||
wsize = 1;
|
||||
break;
|
||||
|
||||
case CRUSH_RULE_CHOOSE_LEAF_FIRSTN:
|
||||
case CRUSH_RULE_CHOOSE_FIRSTN:
|
||||
firstn = 1;
|
||||
case CRUSH_RULE_CHOOSE_LEAF_INDEP:
|
||||
case CRUSH_RULE_CHOOSE_INDEP:
|
||||
BUG_ON(wsize == 0);
|
||||
|
||||
recurse_to_leaf =
|
||||
rule->steps[step].op ==
|
||||
CRUSH_RULE_CHOOSE_LEAF_FIRSTN ||
|
||||
rule->steps[step].op ==
|
||||
CRUSH_RULE_CHOOSE_LEAF_INDEP;
|
||||
|
||||
/* reset output */
|
||||
osize = 0;
|
||||
|
||||
for (i = 0; i < wsize; i++) {
|
||||
/*
|
||||
* see CRUSH_N, CRUSH_N_MINUS macros.
|
||||
* basically, numrep <= 0 means relative to
|
||||
* the provided result_max
|
||||
*/
|
||||
numrep = rule->steps[step].arg1;
|
||||
if (numrep <= 0) {
|
||||
numrep += result_max;
|
||||
if (numrep <= 0)
|
||||
continue;
|
||||
}
|
||||
j = 0;
|
||||
if (osize == 0 && force_pos >= 0) {
|
||||
/* skip any intermediate types */
|
||||
while (force_pos &&
|
||||
force_context[force_pos] < 0 &&
|
||||
rule->steps[step].arg2 !=
|
||||
map->buckets[-1 -
|
||||
force_context[force_pos]]->type)
|
||||
force_pos--;
|
||||
o[osize] = force_context[force_pos];
|
||||
if (recurse_to_leaf)
|
||||
c[osize] = force_context[0];
|
||||
j++;
|
||||
force_pos--;
|
||||
}
|
||||
osize += crush_choose(map,
|
||||
map->buckets[-1-w[i]],
|
||||
weight,
|
||||
x, numrep,
|
||||
rule->steps[step].arg2,
|
||||
o+osize, j,
|
||||
firstn,
|
||||
recurse_to_leaf, c+osize);
|
||||
}
|
||||
|
||||
if (recurse_to_leaf)
|
||||
/* copy final _leaf_ values to output set */
|
||||
memcpy(o, c, osize*sizeof(*o));
|
||||
|
||||
/* swap t and w arrays */
|
||||
tmp = o;
|
||||
o = w;
|
||||
w = tmp;
|
||||
wsize = osize;
|
||||
break;
|
||||
|
||||
|
||||
case CRUSH_RULE_EMIT:
|
||||
for (i = 0; i < wsize && result_len < result_max; i++) {
|
||||
result[result_len] = w[i];
|
||||
result_len++;
|
||||
}
|
||||
wsize = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
BUG_ON(1);
|
||||
}
|
||||
}
|
||||
rc = result_len;
|
||||
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
412
net/ceph/crypto.c
Normal file
412
net/ceph/crypto.c
Normal file
@@ -0,0 +1,412 @@
|
||||
|
||||
#include <linux/ceph/ceph_debug.h>
|
||||
|
||||
#include <linux/err.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/slab.h>
|
||||
#include <crypto/hash.h>
|
||||
|
||||
#include <linux/ceph/decode.h>
|
||||
#include "crypto.h"
|
||||
|
||||
int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end)
|
||||
{
|
||||
if (*p + sizeof(u16) + sizeof(key->created) +
|
||||
sizeof(u16) + key->len > end)
|
||||
return -ERANGE;
|
||||
ceph_encode_16(p, key->type);
|
||||
ceph_encode_copy(p, &key->created, sizeof(key->created));
|
||||
ceph_encode_16(p, key->len);
|
||||
ceph_encode_copy(p, key->key, key->len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end)
|
||||
{
|
||||
ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad);
|
||||
key->type = ceph_decode_16(p);
|
||||
ceph_decode_copy(p, &key->created, sizeof(key->created));
|
||||
key->len = ceph_decode_16(p);
|
||||
ceph_decode_need(p, end, key->len, bad);
|
||||
key->key = kmalloc(key->len, GFP_NOFS);
|
||||
if (!key->key)
|
||||
return -ENOMEM;
|
||||
ceph_decode_copy(p, key->key, key->len);
|
||||
return 0;
|
||||
|
||||
bad:
|
||||
dout("failed to decode crypto key\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey)
|
||||
{
|
||||
int inlen = strlen(inkey);
|
||||
int blen = inlen * 3 / 4;
|
||||
void *buf, *p;
|
||||
int ret;
|
||||
|
||||
dout("crypto_key_unarmor %s\n", inkey);
|
||||
buf = kmalloc(blen, GFP_NOFS);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
blen = ceph_unarmor(buf, inkey, inkey+inlen);
|
||||
if (blen < 0) {
|
||||
kfree(buf);
|
||||
return blen;
|
||||
}
|
||||
|
||||
p = buf;
|
||||
ret = ceph_crypto_key_decode(key, &p, p + blen);
|
||||
kfree(buf);
|
||||
if (ret)
|
||||
return ret;
|
||||
dout("crypto_key_unarmor key %p type %d len %d\n", key,
|
||||
key->type, key->len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define AES_KEY_SIZE 16
|
||||
|
||||
static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void)
|
||||
{
|
||||
return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
|
||||
}
|
||||
|
||||
static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
|
||||
|
||||
static int ceph_aes_encrypt(const void *key, int key_len,
|
||||
void *dst, size_t *dst_len,
|
||||
const void *src, size_t src_len)
|
||||
{
|
||||
struct scatterlist sg_in[2], sg_out[1];
|
||||
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
|
||||
struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
|
||||
int ret;
|
||||
void *iv;
|
||||
int ivsize;
|
||||
size_t zero_padding = (0x10 - (src_len & 0x0f));
|
||||
char pad[16];
|
||||
|
||||
if (IS_ERR(tfm))
|
||||
return PTR_ERR(tfm);
|
||||
|
||||
memset(pad, zero_padding, zero_padding);
|
||||
|
||||
*dst_len = src_len + zero_padding;
|
||||
|
||||
crypto_blkcipher_setkey((void *)tfm, key, key_len);
|
||||
sg_init_table(sg_in, 2);
|
||||
sg_set_buf(&sg_in[0], src, src_len);
|
||||
sg_set_buf(&sg_in[1], pad, zero_padding);
|
||||
sg_init_table(sg_out, 1);
|
||||
sg_set_buf(sg_out, dst, *dst_len);
|
||||
iv = crypto_blkcipher_crt(tfm)->iv;
|
||||
ivsize = crypto_blkcipher_ivsize(tfm);
|
||||
|
||||
memcpy(iv, aes_iv, ivsize);
|
||||
/*
|
||||
print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
key, key_len, 1);
|
||||
print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
src, src_len, 1);
|
||||
print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
pad, zero_padding, 1);
|
||||
*/
|
||||
ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
|
||||
src_len + zero_padding);
|
||||
crypto_free_blkcipher(tfm);
|
||||
if (ret < 0)
|
||||
pr_err("ceph_aes_crypt failed %d\n", ret);
|
||||
/*
|
||||
print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
dst, *dst_len, 1);
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ceph_aes_encrypt2(const void *key, int key_len, void *dst,
|
||||
size_t *dst_len,
|
||||
const void *src1, size_t src1_len,
|
||||
const void *src2, size_t src2_len)
|
||||
{
|
||||
struct scatterlist sg_in[3], sg_out[1];
|
||||
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
|
||||
struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 };
|
||||
int ret;
|
||||
void *iv;
|
||||
int ivsize;
|
||||
size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f));
|
||||
char pad[16];
|
||||
|
||||
if (IS_ERR(tfm))
|
||||
return PTR_ERR(tfm);
|
||||
|
||||
memset(pad, zero_padding, zero_padding);
|
||||
|
||||
*dst_len = src1_len + src2_len + zero_padding;
|
||||
|
||||
crypto_blkcipher_setkey((void *)tfm, key, key_len);
|
||||
sg_init_table(sg_in, 3);
|
||||
sg_set_buf(&sg_in[0], src1, src1_len);
|
||||
sg_set_buf(&sg_in[1], src2, src2_len);
|
||||
sg_set_buf(&sg_in[2], pad, zero_padding);
|
||||
sg_init_table(sg_out, 1);
|
||||
sg_set_buf(sg_out, dst, *dst_len);
|
||||
iv = crypto_blkcipher_crt(tfm)->iv;
|
||||
ivsize = crypto_blkcipher_ivsize(tfm);
|
||||
|
||||
memcpy(iv, aes_iv, ivsize);
|
||||
/*
|
||||
print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
key, key_len, 1);
|
||||
print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
src1, src1_len, 1);
|
||||
print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
src2, src2_len, 1);
|
||||
print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
pad, zero_padding, 1);
|
||||
*/
|
||||
ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in,
|
||||
src1_len + src2_len + zero_padding);
|
||||
crypto_free_blkcipher(tfm);
|
||||
if (ret < 0)
|
||||
pr_err("ceph_aes_crypt2 failed %d\n", ret);
|
||||
/*
|
||||
print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
dst, *dst_len, 1);
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ceph_aes_decrypt(const void *key, int key_len,
|
||||
void *dst, size_t *dst_len,
|
||||
const void *src, size_t src_len)
|
||||
{
|
||||
struct scatterlist sg_in[1], sg_out[2];
|
||||
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
|
||||
struct blkcipher_desc desc = { .tfm = tfm };
|
||||
char pad[16];
|
||||
void *iv;
|
||||
int ivsize;
|
||||
int ret;
|
||||
int last_byte;
|
||||
|
||||
if (IS_ERR(tfm))
|
||||
return PTR_ERR(tfm);
|
||||
|
||||
crypto_blkcipher_setkey((void *)tfm, key, key_len);
|
||||
sg_init_table(sg_in, 1);
|
||||
sg_init_table(sg_out, 2);
|
||||
sg_set_buf(sg_in, src, src_len);
|
||||
sg_set_buf(&sg_out[0], dst, *dst_len);
|
||||
sg_set_buf(&sg_out[1], pad, sizeof(pad));
|
||||
|
||||
iv = crypto_blkcipher_crt(tfm)->iv;
|
||||
ivsize = crypto_blkcipher_ivsize(tfm);
|
||||
|
||||
memcpy(iv, aes_iv, ivsize);
|
||||
|
||||
/*
|
||||
print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
key, key_len, 1);
|
||||
print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
src, src_len, 1);
|
||||
*/
|
||||
|
||||
ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
|
||||
crypto_free_blkcipher(tfm);
|
||||
if (ret < 0) {
|
||||
pr_err("ceph_aes_decrypt failed %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (src_len <= *dst_len)
|
||||
last_byte = ((char *)dst)[src_len - 1];
|
||||
else
|
||||
last_byte = pad[src_len - *dst_len - 1];
|
||||
if (last_byte <= 16 && src_len >= last_byte) {
|
||||
*dst_len = src_len - last_byte;
|
||||
} else {
|
||||
pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
|
||||
last_byte, (int)src_len);
|
||||
return -EPERM; /* bad padding */
|
||||
}
|
||||
/*
|
||||
print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
dst, *dst_len, 1);
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ceph_aes_decrypt2(const void *key, int key_len,
|
||||
void *dst1, size_t *dst1_len,
|
||||
void *dst2, size_t *dst2_len,
|
||||
const void *src, size_t src_len)
|
||||
{
|
||||
struct scatterlist sg_in[1], sg_out[3];
|
||||
struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher();
|
||||
struct blkcipher_desc desc = { .tfm = tfm };
|
||||
char pad[16];
|
||||
void *iv;
|
||||
int ivsize;
|
||||
int ret;
|
||||
int last_byte;
|
||||
|
||||
if (IS_ERR(tfm))
|
||||
return PTR_ERR(tfm);
|
||||
|
||||
sg_init_table(sg_in, 1);
|
||||
sg_set_buf(sg_in, src, src_len);
|
||||
sg_init_table(sg_out, 3);
|
||||
sg_set_buf(&sg_out[0], dst1, *dst1_len);
|
||||
sg_set_buf(&sg_out[1], dst2, *dst2_len);
|
||||
sg_set_buf(&sg_out[2], pad, sizeof(pad));
|
||||
|
||||
crypto_blkcipher_setkey((void *)tfm, key, key_len);
|
||||
iv = crypto_blkcipher_crt(tfm)->iv;
|
||||
ivsize = crypto_blkcipher_ivsize(tfm);
|
||||
|
||||
memcpy(iv, aes_iv, ivsize);
|
||||
|
||||
/*
|
||||
print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
key, key_len, 1);
|
||||
print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
src, src_len, 1);
|
||||
*/
|
||||
|
||||
ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len);
|
||||
crypto_free_blkcipher(tfm);
|
||||
if (ret < 0) {
|
||||
pr_err("ceph_aes_decrypt failed %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (src_len <= *dst1_len)
|
||||
last_byte = ((char *)dst1)[src_len - 1];
|
||||
else if (src_len <= *dst1_len + *dst2_len)
|
||||
last_byte = ((char *)dst2)[src_len - *dst1_len - 1];
|
||||
else
|
||||
last_byte = pad[src_len - *dst1_len - *dst2_len - 1];
|
||||
if (last_byte <= 16 && src_len >= last_byte) {
|
||||
src_len -= last_byte;
|
||||
} else {
|
||||
pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n",
|
||||
last_byte, (int)src_len);
|
||||
return -EPERM; /* bad padding */
|
||||
}
|
||||
|
||||
if (src_len < *dst1_len) {
|
||||
*dst1_len = src_len;
|
||||
*dst2_len = 0;
|
||||
} else {
|
||||
*dst2_len = src_len - *dst1_len;
|
||||
}
|
||||
/*
|
||||
print_hex_dump(KERN_ERR, "dec out1: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
dst1, *dst1_len, 1);
|
||||
print_hex_dump(KERN_ERR, "dec out2: ", DUMP_PREFIX_NONE, 16, 1,
|
||||
dst2, *dst2_len, 1);
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
|
||||
const void *src, size_t src_len)
|
||||
{
|
||||
switch (secret->type) {
|
||||
case CEPH_CRYPTO_NONE:
|
||||
if (*dst_len < src_len)
|
||||
return -ERANGE;
|
||||
memcpy(dst, src, src_len);
|
||||
*dst_len = src_len;
|
||||
return 0;
|
||||
|
||||
case CEPH_CRYPTO_AES:
|
||||
return ceph_aes_decrypt(secret->key, secret->len, dst,
|
||||
dst_len, src, src_len);
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
int ceph_decrypt2(struct ceph_crypto_key *secret,
|
||||
void *dst1, size_t *dst1_len,
|
||||
void *dst2, size_t *dst2_len,
|
||||
const void *src, size_t src_len)
|
||||
{
|
||||
size_t t;
|
||||
|
||||
switch (secret->type) {
|
||||
case CEPH_CRYPTO_NONE:
|
||||
if (*dst1_len + *dst2_len < src_len)
|
||||
return -ERANGE;
|
||||
t = min(*dst1_len, src_len);
|
||||
memcpy(dst1, src, t);
|
||||
*dst1_len = t;
|
||||
src += t;
|
||||
src_len -= t;
|
||||
if (src_len) {
|
||||
t = min(*dst2_len, src_len);
|
||||
memcpy(dst2, src, t);
|
||||
*dst2_len = t;
|
||||
}
|
||||
return 0;
|
||||
|
||||
case CEPH_CRYPTO_AES:
|
||||
return ceph_aes_decrypt2(secret->key, secret->len,
|
||||
dst1, dst1_len, dst2, dst2_len,
|
||||
src, src_len);
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
|
||||
const void *src, size_t src_len)
|
||||
{
|
||||
switch (secret->type) {
|
||||
case CEPH_CRYPTO_NONE:
|
||||
if (*dst_len < src_len)
|
||||
return -ERANGE;
|
||||
memcpy(dst, src, src_len);
|
||||
*dst_len = src_len;
|
||||
return 0;
|
||||
|
||||
case CEPH_CRYPTO_AES:
|
||||
return ceph_aes_encrypt(secret->key, secret->len, dst,
|
||||
dst_len, src, src_len);
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
|
||||
const void *src1, size_t src1_len,
|
||||
const void *src2, size_t src2_len)
|
||||
{
|
||||
switch (secret->type) {
|
||||
case CEPH_CRYPTO_NONE:
|
||||
if (*dst_len < src1_len + src2_len)
|
||||
return -ERANGE;
|
||||
memcpy(dst, src1, src1_len);
|
||||
memcpy(dst + src1_len, src2, src2_len);
|
||||
*dst_len = src1_len + src2_len;
|
||||
return 0;
|
||||
|
||||
case CEPH_CRYPTO_AES:
|
||||
return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len,
|
||||
src1, src1_len, src2, src2_len);
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
48
net/ceph/crypto.h
Normal file
48
net/ceph/crypto.h
Normal file
@@ -0,0 +1,48 @@
|
||||
#ifndef _FS_CEPH_CRYPTO_H
|
||||
#define _FS_CEPH_CRYPTO_H
|
||||
|
||||
#include <linux/ceph/types.h>
|
||||
#include <linux/ceph/buffer.h>
|
||||
|
||||
/*
|
||||
* cryptographic secret
|
||||
*/
|
||||
struct ceph_crypto_key {
|
||||
int type;
|
||||
struct ceph_timespec created;
|
||||
int len;
|
||||
void *key;
|
||||
};
|
||||
|
||||
static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
|
||||
{
|
||||
kfree(key->key);
|
||||
}
|
||||
|
||||
extern int ceph_crypto_key_encode(struct ceph_crypto_key *key,
|
||||
void **p, void *end);
|
||||
extern int ceph_crypto_key_decode(struct ceph_crypto_key *key,
|
||||
void **p, void *end);
|
||||
extern int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in);
|
||||
|
||||
/* crypto.c */
|
||||
extern int ceph_decrypt(struct ceph_crypto_key *secret,
|
||||
void *dst, size_t *dst_len,
|
||||
const void *src, size_t src_len);
|
||||
extern int ceph_encrypt(struct ceph_crypto_key *secret,
|
||||
void *dst, size_t *dst_len,
|
||||
const void *src, size_t src_len);
|
||||
extern int ceph_decrypt2(struct ceph_crypto_key *secret,
|
||||
void *dst1, size_t *dst1_len,
|
||||
void *dst2, size_t *dst2_len,
|
||||
const void *src, size_t src_len);
|
||||
extern int ceph_encrypt2(struct ceph_crypto_key *secret,
|
||||
void *dst, size_t *dst_len,
|
||||
const void *src1, size_t src1_len,
|
||||
const void *src2, size_t src2_len);
|
||||
|
||||
/* armor.c */
|
||||
extern int ceph_armor(char *dst, const char *src, const char *end);
|
||||
extern int ceph_unarmor(char *dst, const char *src, const char *end);
|
||||
|
||||
#endif
|
||||
267
net/ceph/debugfs.c
Normal file
267
net/ceph/debugfs.c
Normal file
@@ -0,0 +1,267 @@
|
||||
#include <linux/ceph/ceph_debug.h>
|
||||
|
||||
#include <linux/device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include <linux/ceph/libceph.h>
|
||||
#include <linux/ceph/mon_client.h>
|
||||
#include <linux/ceph/auth.h>
|
||||
#include <linux/ceph/debugfs.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
|
||||
/*
|
||||
* Implement /sys/kernel/debug/ceph fun
|
||||
*
|
||||
* /sys/kernel/debug/ceph/client* - an instance of the ceph client
|
||||
* .../osdmap - current osdmap
|
||||
* .../monmap - current monmap
|
||||
* .../osdc - active osd requests
|
||||
* .../monc - mon client state
|
||||
* .../dentry_lru - dump contents of dentry lru
|
||||
* .../caps - expose cap (reservation) stats
|
||||
* .../bdi - symlink to ../../bdi/something
|
||||
*/
|
||||
|
||||
static struct dentry *ceph_debugfs_dir;
|
||||
|
||||
static int monmap_show(struct seq_file *s, void *p)
|
||||
{
|
||||
int i;
|
||||
struct ceph_client *client = s->private;
|
||||
|
||||
if (client->monc.monmap == NULL)
|
||||
return 0;
|
||||
|
||||
seq_printf(s, "epoch %d\n", client->monc.monmap->epoch);
|
||||
for (i = 0; i < client->monc.monmap->num_mon; i++) {
|
||||
struct ceph_entity_inst *inst =
|
||||
&client->monc.monmap->mon_inst[i];
|
||||
|
||||
seq_printf(s, "\t%s%lld\t%s\n",
|
||||
ENTITY_NAME(inst->name),
|
||||
ceph_pr_addr(&inst->addr.in_addr));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int osdmap_show(struct seq_file *s, void *p)
|
||||
{
|
||||
int i;
|
||||
struct ceph_client *client = s->private;
|
||||
struct rb_node *n;
|
||||
|
||||
if (client->osdc.osdmap == NULL)
|
||||
return 0;
|
||||
seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch);
|
||||
seq_printf(s, "flags%s%s\n",
|
||||
(client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ?
|
||||
" NEARFULL" : "",
|
||||
(client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ?
|
||||
" FULL" : "");
|
||||
for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) {
|
||||
struct ceph_pg_pool_info *pool =
|
||||
rb_entry(n, struct ceph_pg_pool_info, node);
|
||||
seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n",
|
||||
pool->id, pool->v.pg_num, pool->pg_num_mask,
|
||||
pool->v.lpg_num, pool->lpg_num_mask);
|
||||
}
|
||||
for (i = 0; i < client->osdc.osdmap->max_osd; i++) {
|
||||
struct ceph_entity_addr *addr =
|
||||
&client->osdc.osdmap->osd_addr[i];
|
||||
int state = client->osdc.osdmap->osd_state[i];
|
||||
char sb[64];
|
||||
|
||||
seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n",
|
||||
i, ceph_pr_addr(&addr->in_addr),
|
||||
((client->osdc.osdmap->osd_weight[i]*100) >> 16),
|
||||
ceph_osdmap_state_str(sb, sizeof(sb), state));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int monc_show(struct seq_file *s, void *p)
|
||||
{
|
||||
struct ceph_client *client = s->private;
|
||||
struct ceph_mon_generic_request *req;
|
||||
struct ceph_mon_client *monc = &client->monc;
|
||||
struct rb_node *rp;
|
||||
|
||||
mutex_lock(&monc->mutex);
|
||||
|
||||
if (monc->have_mdsmap)
|
||||
seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap);
|
||||
if (monc->have_osdmap)
|
||||
seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap);
|
||||
if (monc->want_next_osdmap)
|
||||
seq_printf(s, "want next osdmap\n");
|
||||
|
||||
for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) {
|
||||
__u16 op;
|
||||
req = rb_entry(rp, struct ceph_mon_generic_request, node);
|
||||
op = le16_to_cpu(req->request->hdr.type);
|
||||
if (op == CEPH_MSG_STATFS)
|
||||
seq_printf(s, "%lld statfs\n", req->tid);
|
||||
else
|
||||
seq_printf(s, "%lld unknown\n", req->tid);
|
||||
}
|
||||
|
||||
mutex_unlock(&monc->mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int osdc_show(struct seq_file *s, void *pp)
|
||||
{
|
||||
struct ceph_client *client = s->private;
|
||||
struct ceph_osd_client *osdc = &client->osdc;
|
||||
struct rb_node *p;
|
||||
|
||||
mutex_lock(&osdc->request_mutex);
|
||||
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
|
||||
struct ceph_osd_request *req;
|
||||
struct ceph_osd_request_head *head;
|
||||
struct ceph_osd_op *op;
|
||||
int num_ops;
|
||||
int opcode, olen;
|
||||
int i;
|
||||
|
||||
req = rb_entry(p, struct ceph_osd_request, r_node);
|
||||
|
||||
seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid,
|
||||
req->r_osd ? req->r_osd->o_osd : -1,
|
||||
le32_to_cpu(req->r_pgid.pool),
|
||||
le16_to_cpu(req->r_pgid.ps));
|
||||
|
||||
head = req->r_request->front.iov_base;
|
||||
op = (void *)(head + 1);
|
||||
|
||||
num_ops = le16_to_cpu(head->num_ops);
|
||||
olen = le32_to_cpu(head->object_len);
|
||||
seq_printf(s, "%.*s", olen,
|
||||
(const char *)(head->ops + num_ops));
|
||||
|
||||
if (req->r_reassert_version.epoch)
|
||||
seq_printf(s, "\t%u'%llu",
|
||||
(unsigned)le32_to_cpu(req->r_reassert_version.epoch),
|
||||
le64_to_cpu(req->r_reassert_version.version));
|
||||
else
|
||||
seq_printf(s, "\t");
|
||||
|
||||
for (i = 0; i < num_ops; i++) {
|
||||
opcode = le16_to_cpu(op->op);
|
||||
seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
|
||||
op++;
|
||||
}
|
||||
|
||||
seq_printf(s, "\n");
|
||||
}
|
||||
mutex_unlock(&osdc->request_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
CEPH_DEFINE_SHOW_FUNC(monmap_show)
|
||||
CEPH_DEFINE_SHOW_FUNC(osdmap_show)
|
||||
CEPH_DEFINE_SHOW_FUNC(monc_show)
|
||||
CEPH_DEFINE_SHOW_FUNC(osdc_show)
|
||||
|
||||
int ceph_debugfs_init(void)
|
||||
{
|
||||
ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
|
||||
if (!ceph_debugfs_dir)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ceph_debugfs_cleanup(void)
|
||||
{
|
||||
debugfs_remove(ceph_debugfs_dir);
|
||||
}
|
||||
|
||||
int ceph_debugfs_client_init(struct ceph_client *client)
|
||||
{
|
||||
int ret = -ENOMEM;
|
||||
char name[80];
|
||||
|
||||
snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
|
||||
client->monc.auth->global_id);
|
||||
|
||||
client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
|
||||
if (!client->debugfs_dir)
|
||||
goto out;
|
||||
|
||||
client->monc.debugfs_file = debugfs_create_file("monc",
|
||||
0600,
|
||||
client->debugfs_dir,
|
||||
client,
|
||||
&monc_show_fops);
|
||||
if (!client->monc.debugfs_file)
|
||||
goto out;
|
||||
|
||||
client->osdc.debugfs_file = debugfs_create_file("osdc",
|
||||
0600,
|
||||
client->debugfs_dir,
|
||||
client,
|
||||
&osdc_show_fops);
|
||||
if (!client->osdc.debugfs_file)
|
||||
goto out;
|
||||
|
||||
client->debugfs_monmap = debugfs_create_file("monmap",
|
||||
0600,
|
||||
client->debugfs_dir,
|
||||
client,
|
||||
&monmap_show_fops);
|
||||
if (!client->debugfs_monmap)
|
||||
goto out;
|
||||
|
||||
client->debugfs_osdmap = debugfs_create_file("osdmap",
|
||||
0600,
|
||||
client->debugfs_dir,
|
||||
client,
|
||||
&osdmap_show_fops);
|
||||
if (!client->debugfs_osdmap)
|
||||
goto out;
|
||||
|
||||
return 0;
|
||||
|
||||
out:
|
||||
ceph_debugfs_client_cleanup(client);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ceph_debugfs_client_cleanup(struct ceph_client *client)
|
||||
{
|
||||
debugfs_remove(client->debugfs_osdmap);
|
||||
debugfs_remove(client->debugfs_monmap);
|
||||
debugfs_remove(client->osdc.debugfs_file);
|
||||
debugfs_remove(client->monc.debugfs_file);
|
||||
debugfs_remove(client->debugfs_dir);
|
||||
}
|
||||
|
||||
#else /* CONFIG_DEBUG_FS */
|
||||
|
||||
int ceph_debugfs_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ceph_debugfs_cleanup(void)
|
||||
{
|
||||
}
|
||||
|
||||
int ceph_debugfs_client_init(struct ceph_client *client)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ceph_debugfs_client_cleanup(struct ceph_client *client)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_DEBUG_FS */
|
||||
|
||||
EXPORT_SYMBOL(ceph_debugfs_init);
|
||||
EXPORT_SYMBOL(ceph_debugfs_cleanup);
|
||||
2453
net/ceph/messenger.c
Normal file
2453
net/ceph/messenger.c
Normal file
File diff suppressed because it is too large
Load Diff
1027
net/ceph/mon_client.c
Normal file
1027
net/ceph/mon_client.c
Normal file
File diff suppressed because it is too large
Load Diff
64
net/ceph/msgpool.c
Normal file
64
net/ceph/msgpool.c
Normal file
@@ -0,0 +1,64 @@
|
||||
#include <linux/ceph/ceph_debug.h>
|
||||
|
||||
#include <linux/err.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <linux/ceph/msgpool.h>
|
||||
|
||||
static void *alloc_fn(gfp_t gfp_mask, void *arg)
|
||||
{
|
||||
struct ceph_msgpool *pool = arg;
|
||||
void *p;
|
||||
|
||||
p = ceph_msg_new(0, pool->front_len, gfp_mask);
|
||||
if (!p)
|
||||
pr_err("msgpool %s alloc failed\n", pool->name);
|
||||
return p;
|
||||
}
|
||||
|
||||
static void free_fn(void *element, void *arg)
|
||||
{
|
||||
ceph_msg_put(element);
|
||||
}
|
||||
|
||||
int ceph_msgpool_init(struct ceph_msgpool *pool,
|
||||
int front_len, int size, bool blocking, const char *name)
|
||||
{
|
||||
pool->front_len = front_len;
|
||||
pool->pool = mempool_create(size, alloc_fn, free_fn, pool);
|
||||
if (!pool->pool)
|
||||
return -ENOMEM;
|
||||
pool->name = name;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ceph_msgpool_destroy(struct ceph_msgpool *pool)
|
||||
{
|
||||
mempool_destroy(pool->pool);
|
||||
}
|
||||
|
||||
struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
|
||||
int front_len)
|
||||
{
|
||||
if (front_len > pool->front_len) {
|
||||
pr_err("msgpool_get pool %s need front %d, pool size is %d\n",
|
||||
pool->name, front_len, pool->front_len);
|
||||
WARN_ON(1);
|
||||
|
||||
/* try to alloc a fresh message */
|
||||
return ceph_msg_new(0, front_len, GFP_NOFS);
|
||||
}
|
||||
|
||||
return mempool_alloc(pool->pool, GFP_NOFS);
|
||||
}
|
||||
|
||||
void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
|
||||
{
|
||||
/* reset msg front_len; user may have changed it */
|
||||
msg->front.iov_len = pool->front_len;
|
||||
msg->hdr.front_len = cpu_to_le32(pool->front_len);
|
||||
|
||||
kref_init(&msg->kref); /* retake single ref */
|
||||
}
|
||||
1773
net/ceph/osd_client.c
Normal file
1773
net/ceph/osd_client.c
Normal file
File diff suppressed because it is too large
Load Diff
1128
net/ceph/osdmap.c
Normal file
1128
net/ceph/osdmap.c
Normal file
File diff suppressed because it is too large
Load Diff
154
net/ceph/pagelist.c
Normal file
154
net/ceph/pagelist.c
Normal file
@@ -0,0 +1,154 @@
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/ceph/pagelist.h>
|
||||
|
||||
static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
|
||||
{
|
||||
if (pl->mapped_tail) {
|
||||
struct page *page = list_entry(pl->head.prev, struct page, lru);
|
||||
kunmap(page);
|
||||
pl->mapped_tail = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int ceph_pagelist_release(struct ceph_pagelist *pl)
|
||||
{
|
||||
ceph_pagelist_unmap_tail(pl);
|
||||
while (!list_empty(&pl->head)) {
|
||||
struct page *page = list_first_entry(&pl->head, struct page,
|
||||
lru);
|
||||
list_del(&page->lru);
|
||||
__free_page(page);
|
||||
}
|
||||
ceph_pagelist_free_reserve(pl);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_pagelist_release);
|
||||
|
||||
static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
if (!pl->num_pages_free) {
|
||||
page = __page_cache_alloc(GFP_NOFS);
|
||||
} else {
|
||||
page = list_first_entry(&pl->free_list, struct page, lru);
|
||||
list_del(&page->lru);
|
||||
--pl->num_pages_free;
|
||||
}
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
pl->room += PAGE_SIZE;
|
||||
ceph_pagelist_unmap_tail(pl);
|
||||
list_add_tail(&page->lru, &pl->head);
|
||||
pl->mapped_tail = kmap(page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
|
||||
{
|
||||
while (pl->room < len) {
|
||||
size_t bit = pl->room;
|
||||
int ret;
|
||||
|
||||
memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK),
|
||||
buf, bit);
|
||||
pl->length += bit;
|
||||
pl->room -= bit;
|
||||
buf += bit;
|
||||
len -= bit;
|
||||
ret = ceph_pagelist_addpage(pl);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len);
|
||||
pl->length += len;
|
||||
pl->room -= len;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_pagelist_append);
|
||||
|
||||
/**
|
||||
* Allocate enough pages for a pagelist to append the given amount
|
||||
* of data without without allocating.
|
||||
* Returns: 0 on success, -ENOMEM on error.
|
||||
*/
|
||||
int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)
|
||||
{
|
||||
if (space <= pl->room)
|
||||
return 0;
|
||||
space -= pl->room;
|
||||
space = (space + PAGE_SIZE - 1) >> PAGE_SHIFT; /* conv to num pages */
|
||||
|
||||
while (space > pl->num_pages_free) {
|
||||
struct page *page = __page_cache_alloc(GFP_NOFS);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
list_add_tail(&page->lru, &pl->free_list);
|
||||
++pl->num_pages_free;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_pagelist_reserve);
|
||||
|
||||
/**
|
||||
* Free any pages that have been preallocated.
|
||||
*/
|
||||
int ceph_pagelist_free_reserve(struct ceph_pagelist *pl)
|
||||
{
|
||||
while (!list_empty(&pl->free_list)) {
|
||||
struct page *page = list_first_entry(&pl->free_list,
|
||||
struct page, lru);
|
||||
list_del(&page->lru);
|
||||
__free_page(page);
|
||||
--pl->num_pages_free;
|
||||
}
|
||||
BUG_ON(pl->num_pages_free);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_pagelist_free_reserve);
|
||||
|
||||
/**
|
||||
* Create a truncation point.
|
||||
*/
|
||||
void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
|
||||
struct ceph_pagelist_cursor *c)
|
||||
{
|
||||
c->pl = pl;
|
||||
c->page_lru = pl->head.prev;
|
||||
c->room = pl->room;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_pagelist_set_cursor);
|
||||
|
||||
/**
|
||||
* Truncate a pagelist to the given point. Move extra pages to reserve.
|
||||
* This won't sleep.
|
||||
* Returns: 0 on success,
|
||||
* -EINVAL if the pagelist doesn't match the trunc point pagelist
|
||||
*/
|
||||
int ceph_pagelist_truncate(struct ceph_pagelist *pl,
|
||||
struct ceph_pagelist_cursor *c)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
if (pl != c->pl)
|
||||
return -EINVAL;
|
||||
ceph_pagelist_unmap_tail(pl);
|
||||
while (pl->head.prev != c->page_lru) {
|
||||
page = list_entry(pl->head.prev, struct page, lru);
|
||||
list_del(&page->lru); /* remove from pagelist */
|
||||
list_add_tail(&page->lru, &pl->free_list); /* add to reserve */
|
||||
++pl->num_pages_free;
|
||||
}
|
||||
pl->room = c->room;
|
||||
if (!list_empty(&pl->head)) {
|
||||
page = list_entry(pl->head.prev, struct page, lru);
|
||||
pl->mapped_tail = kmap(page);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_pagelist_truncate);
|
||||
223
net/ceph/pagevec.c
Normal file
223
net/ceph/pagevec.c
Normal file
@@ -0,0 +1,223 @@
|
||||
#include <linux/ceph/ceph_debug.h>
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/writeback.h>
|
||||
|
||||
#include <linux/ceph/libceph.h>
|
||||
|
||||
/*
|
||||
* build a vector of user pages
|
||||
*/
|
||||
struct page **ceph_get_direct_page_vector(const char __user *data,
|
||||
int num_pages,
|
||||
loff_t off, size_t len)
|
||||
{
|
||||
struct page **pages;
|
||||
int rc;
|
||||
|
||||
pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
|
||||
if (!pages)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
rc = get_user_pages(current, current->mm, (unsigned long)data,
|
||||
num_pages, 0, 0, pages, NULL);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (rc < 0)
|
||||
goto fail;
|
||||
return pages;
|
||||
|
||||
fail:
|
||||
kfree(pages);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_get_direct_page_vector);
|
||||
|
||||
void ceph_put_page_vector(struct page **pages, int num_pages)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_pages; i++)
|
||||
put_page(pages[i]);
|
||||
kfree(pages);
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_put_page_vector);
|
||||
|
||||
void ceph_release_page_vector(struct page **pages, int num_pages)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_pages; i++)
|
||||
__free_pages(pages[i], 0);
|
||||
kfree(pages);
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_release_page_vector);
|
||||
|
||||
/*
|
||||
* allocate a vector new pages
|
||||
*/
|
||||
struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags)
|
||||
{
|
||||
struct page **pages;
|
||||
int i;
|
||||
|
||||
pages = kmalloc(sizeof(*pages) * num_pages, flags);
|
||||
if (!pages)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
pages[i] = __page_cache_alloc(flags);
|
||||
if (pages[i] == NULL) {
|
||||
ceph_release_page_vector(pages, i);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
}
|
||||
return pages;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_alloc_page_vector);
|
||||
|
||||
/*
|
||||
* copy user data into a page vector
|
||||
*/
|
||||
int ceph_copy_user_to_page_vector(struct page **pages,
|
||||
const char __user *data,
|
||||
loff_t off, size_t len)
|
||||
{
|
||||
int i = 0;
|
||||
int po = off & ~PAGE_CACHE_MASK;
|
||||
int left = len;
|
||||
int l, bad;
|
||||
|
||||
while (left > 0) {
|
||||
l = min_t(int, PAGE_CACHE_SIZE-po, left);
|
||||
bad = copy_from_user(page_address(pages[i]) + po, data, l);
|
||||
if (bad == l)
|
||||
return -EFAULT;
|
||||
data += l - bad;
|
||||
left -= l - bad;
|
||||
po += l - bad;
|
||||
if (po == PAGE_CACHE_SIZE) {
|
||||
po = 0;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_copy_user_to_page_vector);
|
||||
|
||||
int ceph_copy_to_page_vector(struct page **pages,
|
||||
const char *data,
|
||||
loff_t off, size_t len)
|
||||
{
|
||||
int i = 0;
|
||||
size_t po = off & ~PAGE_CACHE_MASK;
|
||||
size_t left = len;
|
||||
size_t l;
|
||||
|
||||
while (left > 0) {
|
||||
l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
|
||||
memcpy(page_address(pages[i]) + po, data, l);
|
||||
data += l;
|
||||
left -= l;
|
||||
po += l;
|
||||
if (po == PAGE_CACHE_SIZE) {
|
||||
po = 0;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_copy_to_page_vector);
|
||||
|
||||
int ceph_copy_from_page_vector(struct page **pages,
|
||||
char *data,
|
||||
loff_t off, size_t len)
|
||||
{
|
||||
int i = 0;
|
||||
size_t po = off & ~PAGE_CACHE_MASK;
|
||||
size_t left = len;
|
||||
size_t l;
|
||||
|
||||
while (left > 0) {
|
||||
l = min_t(size_t, PAGE_CACHE_SIZE-po, left);
|
||||
memcpy(data, page_address(pages[i]) + po, l);
|
||||
data += l;
|
||||
left -= l;
|
||||
po += l;
|
||||
if (po == PAGE_CACHE_SIZE) {
|
||||
po = 0;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_copy_from_page_vector);
|
||||
|
||||
/*
|
||||
* copy user data from a page vector into a user pointer
|
||||
*/
|
||||
int ceph_copy_page_vector_to_user(struct page **pages,
|
||||
char __user *data,
|
||||
loff_t off, size_t len)
|
||||
{
|
||||
int i = 0;
|
||||
int po = off & ~PAGE_CACHE_MASK;
|
||||
int left = len;
|
||||
int l, bad;
|
||||
|
||||
while (left > 0) {
|
||||
l = min_t(int, left, PAGE_CACHE_SIZE-po);
|
||||
bad = copy_to_user(data, page_address(pages[i]) + po, l);
|
||||
if (bad == l)
|
||||
return -EFAULT;
|
||||
data += l - bad;
|
||||
left -= l - bad;
|
||||
if (po) {
|
||||
po += l - bad;
|
||||
if (po == PAGE_CACHE_SIZE)
|
||||
po = 0;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
|
||||
|
||||
/*
|
||||
* Zero an extent within a page vector. Offset is relative to the
|
||||
* start of the first page.
|
||||
*/
|
||||
void ceph_zero_page_vector_range(int off, int len, struct page **pages)
|
||||
{
|
||||
int i = off >> PAGE_CACHE_SHIFT;
|
||||
|
||||
off &= ~PAGE_CACHE_MASK;
|
||||
|
||||
dout("zero_page_vector_page %u~%u\n", off, len);
|
||||
|
||||
/* leading partial page? */
|
||||
if (off) {
|
||||
int end = min((int)PAGE_CACHE_SIZE, off + len);
|
||||
dout("zeroing %d %p head from %d\n", i, pages[i],
|
||||
(int)off);
|
||||
zero_user_segment(pages[i], off, end);
|
||||
len -= (end - off);
|
||||
i++;
|
||||
}
|
||||
while (len >= PAGE_CACHE_SIZE) {
|
||||
dout("zeroing %d %p len=%d\n", i, pages[i], len);
|
||||
zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
|
||||
len -= PAGE_CACHE_SIZE;
|
||||
i++;
|
||||
}
|
||||
/* trailing partial page? */
|
||||
if (len) {
|
||||
dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len);
|
||||
zero_user_segment(pages[i], 0, len);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_zero_page_vector_range);
|
||||
|
||||
Reference in New Issue
Block a user