Free Electrons

Embedded Linux Experts

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
/*
 * GPL HEADER START
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 only,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License version 2 for more details (a copy is included
 * in the LICENSE file that accompanied this code).
 *
 * You should have received a copy of the GNU General Public License
 * version 2 along with this program; If not, see
 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 *
 * GPL HEADER END
 */
/*
 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
 * Use is subject to license terms.
 *
 * Copyright (c) 2012, 2015, Intel Corporation.
 */
/*
 * This file is part of Lustre, http://www.lustre.org/
 * Lustre is a trademark of Sun Microsystems, Inc.
 */
/*
 * This file is part of Lustre, http://www.lustre.org/
 * Lustre is a trademark of Sun Microsystems, Inc.
 *
 * Internal interfaces of OSC layer.
 *
 *   Author: Nikita Danilov <nikita.danilov@sun.com>
 *   Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
 */

#ifndef OSC_CL_INTERNAL_H
#define OSC_CL_INTERNAL_H

#include "../../include/linux/libcfs/libcfs.h"

#include "../include/obd.h"
/* osc_build_res_name() */
#include "../include/cl_object.h"
#include "../include/lclient.h"
#include "osc_internal.h"

/** \defgroup osc osc
 *  @{
 */

struct osc_extent;

/**
 * State maintained by osc layer for each IO context.
 */
struct osc_io {
	/** super class */
	struct cl_io_slice oi_cl;
	/** true if this io is lockless. */
	int		oi_lockless;
	/** active extents, we know how many bytes is going to be written,
	 * so having an active extent will prevent it from being fragmented
	 */
	struct osc_extent *oi_active;
	/** partially truncated extent, we need to hold this extent to prevent
	 * page writeback from happening.
	 */
	struct osc_extent *oi_trunc;

	struct obd_info    oi_info;
	struct obdo	oi_oa;
	struct osc_async_cbargs {
		bool		  opc_rpc_sent;
		int	       opc_rc;
		struct completion	opc_sync;
	} oi_cbarg;
};

/**
 * State of transfer for osc.
 */
struct osc_req {
	struct cl_req_slice    or_cl;
};

/**
 * State maintained by osc layer for the duration of a system call.
 */
struct osc_session {
	struct osc_io       os_io;
};

#define OTI_PVEC_SIZE 64
struct osc_thread_info {
	struct ldlm_res_id      oti_resname;
	ldlm_policy_data_t      oti_policy;
	struct cl_lock_descr    oti_descr;
	struct cl_attr	  oti_attr;
	struct lustre_handle    oti_handle;
	struct cl_page_list     oti_plist;
	struct cl_io		oti_io;
	struct cl_page	       *oti_pvec[OTI_PVEC_SIZE];
};

struct osc_object {
	struct cl_object   oo_cl;
	struct lov_oinfo  *oo_oinfo;
	/**
	 * True if locking against this stripe got -EUSERS.
	 */
	int		oo_contended;
	unsigned long	 oo_contention_time;
	/**
	 * List of pages in transfer.
	 */
	struct list_head	 oo_inflight[CRT_NR];
	/**
	 * Lock, protecting ccc_object::cob_inflight, because a seat-belt is
	 * locked during take-off and landing.
	 */
	spinlock_t	   oo_seatbelt;

	/**
	 * used by the osc to keep track of what objects to build into rpcs.
	 * Protected by client_obd->cli_loi_list_lock.
	 */
	struct list_head	   oo_ready_item;
	struct list_head	   oo_hp_ready_item;
	struct list_head	   oo_write_item;
	struct list_head	   oo_read_item;

	/**
	 * extent is a red black tree to manage (async) dirty pages.
	 */
	struct rb_root       oo_root;
	/**
	 * Manage write(dirty) extents.
	 */
	struct list_head	   oo_hp_exts; /* list of hp extents */
	struct list_head	   oo_urgent_exts; /* list of writeback extents */
	struct list_head	   oo_rpc_exts;

	struct list_head	   oo_reading_exts;

	atomic_t	 oo_nr_reads;
	atomic_t	 oo_nr_writes;

	/** Protect extent tree. Will be used to protect
	 * oo_{read|write}_pages soon.
	 */
	spinlock_t	    oo_lock;
};

static inline void osc_object_lock(struct osc_object *obj)
{
	spin_lock(&obj->oo_lock);
}

static inline int osc_object_trylock(struct osc_object *obj)
{
	return spin_trylock(&obj->oo_lock);
}

static inline void osc_object_unlock(struct osc_object *obj)
{
	spin_unlock(&obj->oo_lock);
}

static inline int osc_object_is_locked(struct osc_object *obj)
{
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
	return spin_is_locked(&obj->oo_lock);
#else
	/*
	 * It is not perfect to return true all the time.
	 * But since this function is only used for assertion
	 * and checking, it seems OK.
	 */
	return 1;
#endif
}

/*
 * Lock "micro-states" for osc layer.
 */
enum osc_lock_state {
	OLS_NEW,
	OLS_ENQUEUED,
	OLS_UPCALL_RECEIVED,
	OLS_GRANTED,
	OLS_RELEASED,
	OLS_BLOCKED,
	OLS_CANCELLED
};

/**
 * osc-private state of cl_lock.
 *
 * Interaction with DLM.
 *
 * CLIO enqueues all DLM locks through ptlrpcd (that is, in "async" mode).
 *
 * Once receive upcall is invoked, osc_lock remembers a handle of DLM lock in
 * osc_lock::ols_handle and a pointer to that lock in osc_lock::ols_lock.
 *
 * This pointer is protected through a reference, acquired by
 * osc_lock_upcall0(). Also, an additional reference is acquired by
 * ldlm_lock_addref() call protecting the lock from cancellation, until
 * osc_lock_unuse() releases it.
 *
 * Below is a description of how lock references are acquired and released
 * inside of DLM.
 *
 * - When new lock is created and enqueued to the server (ldlm_cli_enqueue())
 *      - ldlm_lock_create()
 *	  - ldlm_lock_new(): initializes a lock with 2 references. One for
 *	    the caller (released when reply from the server is received, or on
 *	    error), and another for the hash table.
 *      - ldlm_lock_addref_internal(): protects the lock from cancellation.
 *
 * - When reply is received from the server (osc_enqueue_interpret())
 *      - ldlm_cli_enqueue_fini()
 *	  - LDLM_LOCK_PUT(): releases caller reference acquired by
 *	    ldlm_lock_new().
 *	  - if (rc != 0)
 *		ldlm_lock_decref(): error case: matches ldlm_cli_enqueue().
 *      - ldlm_lock_decref(): for async locks, matches ldlm_cli_enqueue().
 *
 * - When lock is being cancelled (ldlm_lock_cancel())
 *      - ldlm_lock_destroy()
 *	  - LDLM_LOCK_PUT(): releases hash-table reference acquired by
 *	    ldlm_lock_new().
 *
 * osc_lock is detached from ldlm_lock by osc_lock_detach() that is called
 * either when lock is cancelled (osc_lock_blocking()), or when locks is
 * deleted without cancellation (e.g., from cl_locks_prune()). In the latter
 * case ldlm lock remains in memory, and can be re-attached to osc_lock in the
 * future.
 */
struct osc_lock {
	struct cl_lock_slice     ols_cl;
	/** underlying DLM lock */
	struct ldlm_lock	*ols_lock;
	/** lock value block */
	struct ost_lvb	   ols_lvb;
	/** DLM flags with which osc_lock::ols_lock was enqueued */
	__u64		    ols_flags;
	/** osc_lock::ols_lock handle */
	struct lustre_handle     ols_handle;
	struct ldlm_enqueue_info ols_einfo;
	enum osc_lock_state      ols_state;

	/**
	 * How many pages are using this lock for io, currently only used by
	 * read-ahead. If non-zero, the underlying dlm lock won't be cancelled
	 * during recovery to avoid deadlock. see bz16774.
	 *
	 * \see osc_page::ops_lock
	 * \see osc_page_addref_lock(), osc_page_putref_lock()
	 */
	atomic_t	     ols_pageref;

	/**
	 * true, if ldlm_lock_addref() was called against
	 * osc_lock::ols_lock. This is used for sanity checking.
	 *
	 * \see osc_lock::ols_has_ref
	 */
	unsigned		  ols_hold :1,
	/**
	 * this is much like osc_lock::ols_hold, except that this bit is
	 * cleared _after_ reference in released in osc_lock_unuse(). This
	 * fine distinction is needed because:
	 *
	 *     - if ldlm lock still has a reference, osc_ast_data_get() needs
	 *       to return associated cl_lock (so that a flag is needed that is
	 *       cleared after ldlm_lock_decref() returned), and
	 *
	 *     - ldlm_lock_decref() can invoke blocking ast (for a
	 *       LDLM_FL_CBPENDING lock), and osc_lock functions like
	 *       osc_lock_cancel() called from there need to know whether to
	 *       release lock reference (so that a flag is needed that is
	 *       cleared before ldlm_lock_decref() is called).
	 */
				 ols_has_ref:1,
	/**
	 * inherit the lockless attribute from top level cl_io.
	 * If true, osc_lock_enqueue is able to tolerate the -EUSERS error.
	 */
				 ols_locklessable:1,
	/**
	 * set by osc_lock_use() to wait until blocking AST enters into
	 * osc_ldlm_blocking_ast0(), so that cl_lock mutex can be used for
	 * further synchronization.
	 */
				 ols_ast_wait:1,
	/**
	 * If the data of this lock has been flushed to server side.
	 */
				 ols_flush:1,
	/**
	 * if set, the osc_lock is a glimpse lock. For glimpse locks, we treat
	 * the EVAVAIL error as tolerable, this will make upper logic happy
	 * to wait all glimpse locks to each OSTs to be completed.
	 * Glimpse lock converts to normal lock if the server lock is
	 * granted.
	 * Glimpse lock should be destroyed immediately after use.
	 */
				 ols_glimpse:1,
	/**
	 * For async glimpse lock.
	 */
				 ols_agl:1;
	/**
	 * IO that owns this lock. This field is used for a dead-lock
	 * avoidance by osc_lock_enqueue_wait().
	 *
	 * XXX: unfortunately, the owner of a osc_lock is not unique,
	 * the lock may have multiple users, if the lock is granted and
	 * then matched.
	 */
	struct osc_io	   *ols_owner;
};

/**
 * Page state private for osc layer.
 */
struct osc_page {
	struct cl_page_slice  ops_cl;
	/**
	 * Page queues used by osc to detect when RPC can be formed.
	 */
	struct osc_async_page ops_oap;
	/**
	 * An offset within page from which next transfer starts. This is used
	 * by cl_page_clip() to submit partial page transfers.
	 */
	int		   ops_from;
	/**
	 * An offset within page at which next transfer ends.
	 *
	 * \see osc_page::ops_from.
	 */
	int		   ops_to;
	/**
	 * Boolean, true iff page is under transfer. Used for sanity checking.
	 */
	unsigned	      ops_transfer_pinned:1,
	/**
	 * True for a `temporary page' created by read-ahead code, probably
	 * outside of any DLM lock.
	 */
			      ops_temp:1,
	/**
	 * in LRU?
	 */
			      ops_in_lru:1,
	/**
	 * Set if the page must be transferred with OBD_BRW_SRVLOCK.
	 */
			      ops_srvlock:1;
	union {
		/**
		 * lru page list. ops_inflight and ops_lru are exclusive so
		 * that they can share the same data.
		 */
		struct list_head	      ops_lru;
		/**
		 * Linkage into a per-osc_object list of pages in flight. For
		 * debugging.
		 */
		struct list_head	    ops_inflight;
	};
	/**
	 * Thread that submitted this page for transfer. For debugging.
	 */
	struct task_struct	*ops_submitter;
	/**
	 * Submit time - the time when the page is starting RPC. For debugging.
	 */
	unsigned long	    ops_submit_time;

	/**
	 * A lock of which we hold a reference covers this page. Only used by
	 * read-ahead: for a readahead page, we hold it's covering lock to
	 * prevent it from being canceled during recovery.
	 *
	 * \see osc_lock::ols_pageref
	 * \see osc_page_addref_lock(), osc_page_putref_lock().
	 */
	struct cl_lock       *ops_lock;
};

extern struct kmem_cache *osc_lock_kmem;
extern struct kmem_cache *osc_object_kmem;
extern struct kmem_cache *osc_thread_kmem;
extern struct kmem_cache *osc_session_kmem;
extern struct kmem_cache *osc_req_kmem;
extern struct kmem_cache *osc_extent_kmem;

extern struct lu_device_type osc_device_type;
extern struct lu_context_key osc_key;
extern struct lu_context_key osc_session_key;

#define OSC_FLAGS (ASYNC_URGENT|ASYNC_READY)

int osc_lock_init(const struct lu_env *env,
		  struct cl_object *obj, struct cl_lock *lock,
		  const struct cl_io *io);
int osc_io_init  (const struct lu_env *env,
		  struct cl_object *obj, struct cl_io *io);
int osc_req_init (const struct lu_env *env, struct cl_device *dev,
		  struct cl_req *req);
struct lu_object *osc_object_alloc(const struct lu_env *env,
				   const struct lu_object_header *hdr,
				   struct lu_device *dev);
int osc_page_init(const struct lu_env *env, struct cl_object *obj,
		  struct cl_page *page, struct page *vmpage);

void osc_index2policy  (ldlm_policy_data_t *policy, const struct cl_object *obj,
			pgoff_t start, pgoff_t end);
int  osc_lvb_print     (const struct lu_env *env, void *cookie,
			lu_printer_t p, const struct ost_lvb *lvb);

void osc_page_submit(const struct lu_env *env, struct osc_page *opg,
		     enum cl_req_type crt, int brw_flags);
int osc_cancel_async_page(const struct lu_env *env, struct osc_page *ops);
int osc_set_async_flags(struct osc_object *obj, struct osc_page *opg,
			u32 async_flags);
int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
			struct page *page, loff_t offset);
int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
		       struct osc_page *ops);
int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj,
			    struct osc_page *ops);
int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
			 struct osc_page *ops);
int osc_queue_sync_pages(const struct lu_env *env, struct osc_object *obj,
			 struct list_head *list, int cmd, int brw_flags);
int osc_cache_truncate_start(const struct lu_env *env, struct osc_io *oio,
			     struct osc_object *obj, __u64 size);
void osc_cache_truncate_end(const struct lu_env *env, struct osc_io *oio,
			    struct osc_object *obj);
int osc_cache_writeback_range(const struct lu_env *env, struct osc_object *obj,
			      pgoff_t start, pgoff_t end, int hp, int discard);
int osc_cache_wait_range(const struct lu_env *env, struct osc_object *obj,
			 pgoff_t start, pgoff_t end);
void osc_io_unplug(const struct lu_env *env, struct client_obd *cli,
		   struct osc_object *osc);

void osc_object_set_contended  (struct osc_object *obj);
void osc_object_clear_contended(struct osc_object *obj);
int  osc_object_is_contended   (struct osc_object *obj);

int  osc_lock_is_lockless      (const struct osc_lock *olck);

/*****************************************************************************
 *
 * Accessors.
 *
 */

static inline struct osc_thread_info *osc_env_info(const struct lu_env *env)
{
	struct osc_thread_info *info;

	info = lu_context_key_get(&env->le_ctx, &osc_key);
	LASSERT(info);
	return info;
}

static inline struct osc_session *osc_env_session(const struct lu_env *env)
{
	struct osc_session *ses;

	ses = lu_context_key_get(env->le_ses, &osc_session_key);
	LASSERT(ses);
	return ses;
}

static inline struct osc_io *osc_env_io(const struct lu_env *env)
{
	return &osc_env_session(env)->os_io;
}

static inline int osc_is_object(const struct lu_object *obj)
{
	return obj->lo_dev->ld_type == &osc_device_type;
}

static inline struct osc_device *lu2osc_dev(const struct lu_device *d)
{
	LINVRNT(d->ld_type == &osc_device_type);
	return container_of0(d, struct osc_device, od_cl.cd_lu_dev);
}

static inline struct obd_export *osc_export(const struct osc_object *obj)
{
	return lu2osc_dev(obj->oo_cl.co_lu.lo_dev)->od_exp;
}

static inline struct client_obd *osc_cli(const struct osc_object *obj)
{
	return &osc_export(obj)->exp_obd->u.cli;
}

static inline struct osc_object *cl2osc(const struct cl_object *obj)
{
	LINVRNT(osc_is_object(&obj->co_lu));
	return container_of0(obj, struct osc_object, oo_cl);
}

static inline struct cl_object *osc2cl(const struct osc_object *obj)
{
	return (struct cl_object *)&obj->oo_cl;
}

static inline enum ldlm_mode osc_cl_lock2ldlm(enum cl_lock_mode mode)
{
	LASSERT(mode == CLM_READ || mode == CLM_WRITE || mode == CLM_GROUP);
	if (mode == CLM_READ)
		return LCK_PR;
	else if (mode == CLM_WRITE)
		return LCK_PW;
	else
		return LCK_GROUP;
}

static inline enum cl_lock_mode osc_ldlm2cl_lock(enum ldlm_mode mode)
{
	LASSERT(mode == LCK_PR || mode == LCK_PW || mode == LCK_GROUP);
	if (mode == LCK_PR)
		return CLM_READ;
	else if (mode == LCK_PW)
		return CLM_WRITE;
	else
		return CLM_GROUP;
}

static inline struct osc_page *cl2osc_page(const struct cl_page_slice *slice)
{
	LINVRNT(osc_is_object(&slice->cpl_obj->co_lu));
	return container_of0(slice, struct osc_page, ops_cl);
}

static inline struct osc_page *oap2osc(struct osc_async_page *oap)
{
	return container_of0(oap, struct osc_page, ops_oap);
}

static inline struct cl_page *oap2cl_page(struct osc_async_page *oap)
{
	return oap2osc(oap)->ops_cl.cpl_page;
}

static inline struct osc_page *oap2osc_page(struct osc_async_page *oap)
{
	return (struct osc_page *)container_of(oap, struct osc_page, ops_oap);
}

static inline struct osc_lock *cl2osc_lock(const struct cl_lock_slice *slice)
{
	LINVRNT(osc_is_object(&slice->cls_obj->co_lu));
	return container_of0(slice, struct osc_lock, ols_cl);
}

static inline struct osc_lock *osc_lock_at(const struct cl_lock *lock)
{
	return cl2osc_lock(cl_lock_at(lock, &osc_device_type));
}

static inline int osc_io_srvlock(struct osc_io *oio)
{
	return (oio->oi_lockless && !oio->oi_cl.cis_io->ci_no_srvlock);
}

enum osc_extent_state {
	OES_INV       = 0, /** extent is just initialized or destroyed */
	OES_ACTIVE    = 1, /** process is using this extent */
	OES_CACHE     = 2, /** extent is ready for IO */
	OES_LOCKING   = 3, /** locking page to prepare IO */
	OES_LOCK_DONE = 4, /** locking finished, ready to send */
	OES_RPC       = 5, /** in RPC */
	OES_TRUNC     = 6, /** being truncated */
	OES_STATE_MAX
};

/**
 * osc_extent data to manage dirty pages.
 * osc_extent has the following attributes:
 * 1. all pages in the same must be in one RPC in write back;
 * 2. # of pages must be less than max_pages_per_rpc - implied by 1;
 * 3. must be covered by only 1 osc_lock;
 * 4. exclusive. It's impossible to have overlapped osc_extent.
 *
 * The lifetime of an extent is from when the 1st page is dirtied to when
 * all pages inside it are written out.
 *
 * LOCKING ORDER
 * =============
 * page lock -> client_obd_list_lock -> object lock(osc_object::oo_lock)
 */
struct osc_extent {
	/** red-black tree node */
	struct rb_node     oe_node;
	/** osc_object of this extent */
	struct osc_object *oe_obj;
	/** refcount, removed from red-black tree if reaches zero. */
	atomic_t       oe_refc;
	/** busy if non-zero */
	atomic_t       oe_users;
	/** link list of osc_object's oo_{hp|urgent|locking}_exts. */
	struct list_head	 oe_link;
	/** state of this extent */
	unsigned int       oe_state;
	/** flags for this extent. */
	unsigned int       oe_intree:1,
	/** 0 is write, 1 is read */
			   oe_rw:1,
			   oe_srvlock:1,
			   oe_memalloc:1,
	/** an ACTIVE extent is going to be truncated, so when this extent
	 * is released, it will turn into TRUNC state instead of CACHE.
	 */
			   oe_trunc_pending:1,
	/** this extent should be written asap and someone may wait for the
	 * write to finish. This bit is usually set along with urgent if
	 * the extent was CACHE state.
	 * fsync_wait extent can't be merged because new extent region may
	 * exceed fsync range.
	 */
			   oe_fsync_wait:1,
	/** covering lock is being canceled */
			   oe_hp:1,
	/** this extent should be written back asap. set if one of pages is
	 * called by page WB daemon, or sync write or reading requests.
	 */
			   oe_urgent:1;
	/** how many grants allocated for this extent.
	 *  Grant allocated for this extent. There is no grant allocated
	 *  for reading extents and sync write extents.
	 */
	unsigned int       oe_grants;
	/** # of dirty pages in this extent */
	unsigned int       oe_nr_pages;
	/** list of pending oap pages. Pages in this list are NOT sorted. */
	struct list_head	 oe_pages;
	/** Since an extent has to be written out in atomic, this is used to
	 * remember the next page need to be locked to write this extent out.
	 * Not used right now.
	 */
	struct osc_page   *oe_next_page;
	/** start and end index of this extent, include start and end
	 * themselves. Page offset here is the page index of osc_pages.
	 * oe_start is used as keyword for red-black tree.
	 */
	pgoff_t	    oe_start;
	pgoff_t	    oe_end;
	/** maximum ending index of this extent, this is limited by
	 * max_pages_per_rpc, lock extent and chunk size.
	 */
	pgoff_t	    oe_max_end;
	/** waitqueue - for those who want to be notified if this extent's
	 * state has changed.
	 */
	wait_queue_head_t	oe_waitq;
	/** lock covering this extent */
	struct cl_lock    *oe_osclock;
	/** terminator of this extent. Must be true if this extent is in IO. */
	struct task_struct	*oe_owner;
	/** return value of writeback. If somebody is waiting for this extent,
	 * this value can be known by outside world.
	 */
	int		oe_rc;
	/** max pages per rpc when this extent was created */
	unsigned int       oe_mppr;
};

int osc_extent_finish(const struct lu_env *env, struct osc_extent *ext,
		      int sent, int rc);
void osc_extent_release(const struct lu_env *env, struct osc_extent *ext);

/** @} osc */

#endif /* OSC_CL_INTERNAL_H */