Add linux kernel memory support

Required kmem/vmem changes

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Brian Behlendorf 2010-08-26 11:46:09 -07:00
parent 60101509ee
commit 00b46022c6
14 changed files with 77 additions and 41 deletions

View File

@ -349,10 +349,14 @@ extern void kstat_delete(kstat_t *);
#define KM_SLEEP UMEM_NOFAIL
#define KM_PUSHPAGE KM_SLEEP
#define KM_NOSLEEP UMEM_DEFAULT
#define KM_NODEBUG 0x0
#define KMC_NODEBUG UMC_NODEBUG
#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
#define kmem_zalloc(_s, _f) umem_zalloc(_s, _f)
#define kmem_free(_b, _s) umem_free(_b, _s)
#define vmem_alloc(_s, _f) kmem_alloc(_s, _f)
#define vmem_zalloc(_s, _f) kmem_zalloc(_s, _f)
#define vmem_free(_b, _s) kmem_free(_b, _s)
#define kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
#define kmem_cache_destroy(_c) umem_cache_destroy(_c)
@ -575,6 +579,7 @@ typedef struct callb_cpr {
#define zone_dataset_visible(x, y) (1)
#define INGLOBALZONE(z) (1)
extern char *kmem_vasprintf(const char *fmt, va_list adx);
extern char *kmem_asprintf(const char *fmt, ...);
#define strfree(str) kmem_free((str), strlen(str)+1)

View File

@ -1107,25 +1107,27 @@ ksiddomain_rele(ksiddomain_t *ksid)
umem_free(ksid, sizeof (ksiddomain_t));
}
/*
* Do not change the length of the returned string; it must be freed
* with strfree().
*/
char *
kmem_vasprintf(const char *fmt, va_list adx)
{
char *buf = NULL;
va_list adx_copy;
va_copy(adx_copy, adx);
VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
va_end(adx_copy);
return (buf);
}
char *
kmem_asprintf(const char *fmt, ...)
{
int size;
char *buf = NULL;
va_list adx;
char *buf;
va_start(adx, fmt);
size = vsnprintf(NULL, 0, fmt, adx) + 1;
va_end(adx);
buf = kmem_alloc(size, KM_SLEEP);
va_start(adx, fmt);
size = vsnprintf(buf, size, fmt, adx);
VERIFY(vasprintf(&buf, fmt, adx) != -1);
va_end(adx);
return (buf);

View File

@ -523,12 +523,13 @@ static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
* Hash table routines
*/
#define HT_LOCK_PAD 64
#define HT_LOCK_ALIGN 64
#define HT_LOCK_PAD (P2NPHASE(sizeof (kmutex_t), (HT_LOCK_ALIGN)))
struct ht_lock {
kmutex_t ht_lock;
#ifdef _KERNEL
unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))];
unsigned char pad[HT_LOCK_PAD];
#endif
};
@ -772,8 +773,15 @@ buf_fini(void)
{
int i;
#if defined(_KERNEL) && defined(HAVE_SPL)
/* Large allocations which do not require contiguous pages
* should be using vmem_free() in the linux kernel */
vmem_free(buf_hash_table.ht_table,
(buf_hash_table.ht_mask + 1) * sizeof (void *));
#else
kmem_free(buf_hash_table.ht_table,
(buf_hash_table.ht_mask + 1) * sizeof (void *));
#endif
for (i = 0; i < BUF_LOCKS; i++)
mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
kmem_cache_destroy(hdr_cache);
@ -875,8 +883,15 @@ buf_init(void)
hsize <<= 1;
retry:
buf_hash_table.ht_mask = hsize - 1;
#if defined(_KERNEL) && defined(HAVE_SPL)
/* Large allocations which do not require contiguous pages
* should be using vmem_alloc() in the linux kernel */
buf_hash_table.ht_table =
vmem_zalloc(hsize * sizeof (void*), KM_SLEEP);
#else
buf_hash_table.ht_table =
kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP);
#endif
if (buf_hash_table.ht_table == NULL) {
ASSERT(hsize > (1ULL << 8));
hsize >>= 1;

View File

@ -293,7 +293,13 @@ dbuf_init(void)
retry:
h->hash_table_mask = hsize - 1;
#if defined(_KERNEL) && defined(HAVE_SPL)
/* Large allocations which do not require contiguous pages
* should be using vmem_alloc() in the linux kernel */
h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_SLEEP);
#else
h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP);
#endif
if (h->hash_table == NULL) {
/* XXX - we should really return an error instead of assert */
ASSERT(hsize > (1ULL << 10));
@ -317,7 +323,13 @@ dbuf_fini(void)
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_destroy(&h->hash_mutexes[i]);
#if defined(_KERNEL) && defined(HAVE_SPL)
/* Large allocations which do not require contiguous pages
* should be using vmem_free() in the linux kernel */
vmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
#else
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
#endif
kmem_cache_destroy(dbuf_cache);
}

View File

@ -501,6 +501,7 @@ ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
{
ddt_histogram_t *ddh_total;
/* XXX: Move to a slab */
ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
ddt_get_dedup_histogram(spa, ddh_total);
ddt_histogram_stat(dds_total, ddh_total);
@ -649,6 +650,7 @@ ddt_alloc(const ddt_key_t *ddk)
{
ddt_entry_t *dde;
/* XXX: Move to a slab */
dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP);
cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL);
@ -797,7 +799,8 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c)
{
ddt_t *ddt;
ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP);
/* XXX: Move to a slab */
ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP | KM_NODEBUG);
mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&ddt->ddt_tree, ddt_entry_compare,

View File

@ -1337,7 +1337,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
ra.vp = vp;
ra.voff = *voffp;
ra.bufsize = 1<<20;
ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP);
/* these were verified in dmu_recv_begin */
ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) ==
@ -1486,7 +1486,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
}
}
kmem_free(ra.buf, ra.bufsize);
vmem_free(ra.buf, ra.bufsize);
*voffp = ra.voff;
return (ra.err);
}

View File

@ -1270,7 +1270,7 @@ load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
nvsize = *(uint64_t *)db->db_data;
dmu_buf_rele(db, FTAG);
packed = kmem_alloc(nvsize, KM_SLEEP);
packed = kmem_alloc(nvsize, KM_SLEEP | KM_NODEBUG);
error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
DMU_READ_PREFETCH);
if (error == 0)
@ -5217,7 +5217,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
* saves us a pre-read to get data we don't actually care about.
*/
bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
packed = kmem_alloc(bufsize, KM_SLEEP);
packed = vmem_alloc(bufsize, KM_SLEEP);
VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
KM_SLEEP) == 0);
@ -5225,7 +5225,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
kmem_free(packed, bufsize);
vmem_free(packed, bufsize);
VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
dmu_buf_will_dirty(db, tx);

View File

@ -96,7 +96,7 @@ spa_config_load(void)
if (kobj_get_filesize(file, &fsize) != 0)
goto out;
buf = kmem_alloc(fsize, KM_SLEEP);
buf = kmem_alloc(fsize, KM_SLEEP | KM_NODEBUG);
/*
* Read the nvlist from the file.
@ -159,7 +159,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
*/
VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0);
buf = kmem_alloc(buflen, KM_SLEEP);
buf = kmem_alloc(buflen, KM_SLEEP | KM_NODEBUG);
temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR,

View File

@ -428,6 +428,7 @@ log_internal(history_internal_events_t event, spa_t *spa,
dmu_tx_t *tx, const char *fmt, va_list adx)
{
history_arg_t *ha;
va_list adx_copy;
/*
* If this is part of creating a pool, not everything is
@ -437,11 +438,9 @@ log_internal(history_internal_events_t event, spa_t *spa,
return;
ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1,
KM_SLEEP);
(void) vsprintf(ha->ha_history_str, fmt, adx);
va_copy(adx_copy, adx);
ha->ha_history_str = kmem_vasprintf(fmt, adx_copy);
va_end(adx_copy);
ha->ha_log_type = LOG_INTERNAL;
ha->ha_event = event;
ha->ha_zone = NULL;

View File

@ -438,7 +438,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
ASSERT(MUTEX_HELD(&spa_namespace_lock));
spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP | KM_NODEBUG);
mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);

View File

@ -49,7 +49,7 @@ txg_init(dsl_pool_t *dp, uint64_t txg)
int c;
bzero(tx, sizeof (tx_state_t));
tx->tx_cpu = kmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP);
tx->tx_cpu = vmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP);
for (c = 0; c < max_ncpus; c++) {
int i;
@ -107,7 +107,7 @@ txg_fini(dsl_pool_t *dp)
if (tx->tx_commit_cb_taskq != NULL)
taskq_destroy(tx->tx_commit_cb_taskq);
kmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t));
vmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t));
bzero(tx, sizeof (tx_state_t));
}

View File

@ -532,7 +532,7 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
sz = zap->zap_dbuf->db_size;
mzp = kmem_alloc(sz, KM_SLEEP);
mzp = vmem_alloc(sz, KM_SLEEP);
bcopy(zap->zap_dbuf->db_data, mzp, sz);
nchunks = zap->zap_m.zap_num_chunks;
@ -540,7 +540,7 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
1ULL << fzap_default_block_shift, 0, tx);
if (err) {
kmem_free(mzp, sz);
vmem_free(mzp, sz);
return (err);
}
}
@ -566,7 +566,7 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
if (err)
break;
}
kmem_free(mzp, sz);
vmem_free(mzp, sz);
*zapp = zap;
return (err);
}

View File

@ -170,7 +170,7 @@ history_str_get(zfs_cmd_t *zc)
if (zc->zc_history == 0)
return (NULL);
buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP | KM_NODEBUG);
if (copyinstr((void *)(uintptr_t)zc->zc_history,
buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
history_str_free(buf);
@ -1027,7 +1027,7 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
if (size == 0)
return (EINVAL);
packed = kmem_alloc(size, KM_SLEEP);
packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
iflag)) != 0) {
@ -1093,7 +1093,7 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
if (size > zc->zc_nvlist_dst_size) {
error = ENOMEM;
} else {
packed = kmem_alloc(size, KM_SLEEP);
packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
KM_SLEEP) == 0);
if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
@ -5081,7 +5081,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
return (-EINVAL);
zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
if (error != 0)

View File

@ -1075,7 +1075,7 @@ zil_itx_create(uint64_t txtype, size_t lrsize)
lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t);
itx = kmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_SLEEP);
itx = kmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_SLEEP|KM_NODEBUG);
itx->itx_lr.lrc_txtype = txtype;
itx->itx_lr.lrc_reclen = lrsize;
itx->itx_sod = lrsize; /* if write & WR_NEED_COPY will be increased */
@ -1939,7 +1939,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
zr.zr_replay = replay_func;
zr.zr_arg = arg;
zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log);
zr.zr_lr = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP);
zr.zr_lr = vmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP);
/*
* Wait for in-progress removes to sync before starting replay.
@ -1951,7 +1951,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
ASSERT(zilog->zl_replay_blks == 0);
(void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr,
zh->zh_claim_txg);
kmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE);
vmem_free(zr.zr_lr, 2 * SPA_MAXBLOCKSIZE);
zil_destroy(zilog, B_FALSE);
txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);