Files
ubports_kernel_google_msm/include/linux/writeback.h
Jens Axboe 03ba3782e8 writeback: switch to per-bdi threads for flushing data
This gets rid of pdflush for bdi writeout and kupdated style cleaning.
pdflush writeout suffers from lack of locality and also requires more
threads to handle the same workload, since it has to work in a
non-blocking fashion against each queue. This also introduces lumpy
behaviour and potential request starvation, since pdflush can be starved
for queue access if others are accessing it. A sample ffsb workload that
does random writes to files is about 8% faster here on a simple SATA drive
during the benchmark phase. File layout also seems a LOT more smooth in
vmstat:

 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
 0  1      0 608848   2652 375372    0    0     0 71024  604    24  1 10 48 42
 0  1      0 549644   2712 433736    0    0     0 60692  505    27  1  8 48 44
 1  0      0 476928   2784 505192    0    0     4 29540  553    24  0  9 53 37
 0  1      0 457972   2808 524008    0    0     0 54876  331    16  0  4 38 58
 0  1      0 366128   2928 614284    0    0     4 92168  710    58  0 13 53 34
 0  1      0 295092   3000 684140    0    0     0 62924  572    23  0  9 53 37
 0  1      0 236592   3064 741704    0    0     4 58256  523    17  0  8 48 44
 0  1      0 165608   3132 811464    0    0     0 57460  560    21  0  8 54 38
 0  1      0 102952   3200 873164    0    0     4 74748  540    29  1 10 48 41
 0  1      0  48604   3252 926472    0    0     0 53248  469    29  0  7 47 45

where vanilla tends to fluctuate a lot in the creation phase:

 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
 1  1      0 678716   5792 303380    0    0     0 74064  565    50  1 11 52 36
 1  0      0 662488   5864 319396    0    0     4   352  302   329  0  2 47 51
 0  1      0 599312   5924 381468    0    0     0 78164  516    55  0  9 51 40
 0  1      0 519952   6008 459516    0    0     4 78156  622    56  1 11 52 37
 1  1      0 436640   6092 541632    0    0     0 82244  622    54  0 11 48 41
 0  1      0 436640   6092 541660    0    0     0     8  152    39  0  0 51 49
 0  1      0 332224   6200 644252    0    0     4 102800  728    46  1 13 49 36
 1  0      0 274492   6260 701056    0    0     4 12328  459    49  0  7 50 43
 0  1      0 211220   6324 763356    0    0     0 106940  515    37  1 10 51 39
 1  0      0 160412   6376 813468    0    0     0  8224  415    43  0  6 49 45
 1  1      0  85980   6452 886556    0    0     4 113516  575    39  1 11 54 34
 0  2      0  85968   6452 886620    0    0     0  1640  158   211  0  0 46 54

A 10 disk test with btrfs performs 26% faster with per-bdi flushing. A
SSD based writeback test on XFS performs over 20% better as well, with
the throughput being very stable around 1GB/sec, where pdflush only
manages 750MB/sec and fluctuates wildly while doing so. Random buffered
writes to many files behave a lot better as well, as does random mmap'ed
writes.

A separate thread is added to sync the super blocks. In the long term,
adding sync_supers_bdi() functionality could get rid of this thread again.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
2009-09-11 09:20:25 +02:00

178 lines
5.6 KiB
C

/*
* include/linux/writeback.h
*/
#ifndef WRITEBACK_H
#define WRITEBACK_H
#include <linux/sched.h>
#include <linux/fs.h>
struct backing_dev_info;
extern spinlock_t inode_lock;
extern struct list_head inode_in_use;
extern struct list_head inode_unused;
/*
* Yes, writeback.h requires sched.h
* No, sched.h is not included from here.
*/
static inline int task_is_pdflush(struct task_struct *task)
{
return task->flags & PF_FLUSHER;
}
#define current_is_pdflush() task_is_pdflush(current)
/*
* fs/fs-writeback.c
*/
enum writeback_sync_modes {
WB_SYNC_NONE, /* Don't wait on anything */
WB_SYNC_ALL, /* Wait on every mapping */
};
/*
* A control structure which tells the writeback code what to do. These are
* always on the stack, and hence need no locking. They are always initialised
* in a manner such that unspecified fields are set to zero.
*/
struct writeback_control {
struct backing_dev_info *bdi; /* If !NULL, only write back this
queue */
struct super_block *sb; /* if !NULL, only write inodes from
this super_block */
enum writeback_sync_modes sync_mode;
unsigned long *older_than_this; /* If !NULL, only write back inodes
older than this */
long nr_to_write; /* Write this many pages, and decrement
this for each page written */
long pages_skipped; /* Pages which were not written */
/*
* For a_ops->writepages(): is start or end are non-zero then this is
* a hint that the filesystem need only write out the pages inside that
* byterange. The byte at `end' is included in the writeout request.
*/
loff_t range_start;
loff_t range_end;
unsigned nonblocking:1; /* Don't get stuck on request queues */
unsigned encountered_congestion:1; /* An output: a queue is full */
unsigned for_kupdate:1; /* A kupdate writeback */
unsigned for_reclaim:1; /* Invoked from the page allocator */
unsigned for_writepages:1; /* This is a writepages() call */
unsigned range_cyclic:1; /* range_start is cyclic */
unsigned more_io:1; /* more io to be dispatched */
/*
* write_cache_pages() won't update wbc->nr_to_write and
* mapping->writeback_index if no_nrwrite_index_update
* is set. write_cache_pages() may write more than we
* requested and we want to make sure nr_to_write and
* writeback_index are updated in a consistent manner
* so we use a single control to update them
*/
unsigned no_nrwrite_index_update:1;
};
/*
* fs/fs-writeback.c
*/
struct bdi_writeback;
int inode_wait(void *);
long writeback_inodes_sb(struct super_block *);
long sync_inodes_sb(struct super_block *);
void writeback_inodes_wbc(struct writeback_control *wbc);
long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
void wakeup_flusher_threads(long nr_pages);
/* writeback.h requires fs.h; it, too, is not included from here. */
static inline void wait_on_inode(struct inode *inode)
{
might_sleep();
wait_on_bit(&inode->i_state, __I_LOCK, inode_wait,
TASK_UNINTERRUPTIBLE);
}
static inline void inode_sync_wait(struct inode *inode)
{
might_sleep();
wait_on_bit(&inode->i_state, __I_SYNC, inode_wait,
TASK_UNINTERRUPTIBLE);
}
/*
* mm/page-writeback.c
*/
void laptop_io_completion(void);
void laptop_sync_completion(void);
void throttle_vm_writeout(gfp_t gfp_mask);
/* These are exported to sysctl. */
extern int dirty_background_ratio;
extern unsigned long dirty_background_bytes;
extern int vm_dirty_ratio;
extern unsigned long vm_dirty_bytes;
extern unsigned int dirty_writeback_interval;
extern unsigned int dirty_expire_interval;
extern int vm_highmem_is_dirtyable;
extern int block_dump;
extern int laptop_mode;
extern unsigned long determine_dirtyable_memory(void);
extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int dirty_ratio_handler(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos);
extern int dirty_bytes_handler(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos);
struct ctl_table;
struct file;
int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
unsigned long *pbdi_dirty, struct backing_dev_info *bdi);
void page_writeback_init(void);
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
unsigned long nr_pages_dirtied);
static inline void
balance_dirty_pages_ratelimited(struct address_space *mapping)
{
balance_dirty_pages_ratelimited_nr(mapping, 1);
}
typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
void *data);
int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
int generic_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
int sync_page_range(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
void set_page_dirty_balance(struct page *page, int page_mkwrite);
void writeback_set_ratelimit(void);
/* pdflush.c */
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
read-only. */
#endif /* WRITEBACK_H */