patch-2.4.20 linux-2.4.20/fs/jfs/jfs_logmgr.h

Next file: linux-2.4.20/fs/jfs/jfs_metapage.c
Previous file: linux-2.4.20/fs/jfs/jfs_logmgr.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.19/fs/jfs/jfs_logmgr.h linux-2.4.20/fs/jfs/jfs_logmgr.h
@@ -0,0 +1,512 @@
+/*
+ *   Copyright (c) International Business Machines Corp., 2000-2002
+ *   Portions Copyright (c) Christoph Hellwig, 2001-2002
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef	_H_JFS_LOGMGR
+#define _H_JFS_LOGMGR
+
+#include "jfs_filsys.h"
+#include "jfs_lock.h"
+
+/*
+ *	log manager configuration parameters
+ */
+
+/* log page size */
+#define	LOGPSIZE	4096
+#define	L2LOGPSIZE	12
+
+#define LOGPAGES	16	/* Log pages per mounted file system */
+
+/*
+ *	log logical volume
+ *
+ * a log is used to make the commit operation on journalled 
+ * files within the same logical volume group atomic.
+ * a log is implemented with a logical volume.
+ * there is one log per logical volume group. 
+ *
+ * block 0 of the log logical volume is not used (ipl etc).
+ * block 1 contains a log "superblock" and is used by logFormat(),
+ * lmLogInit(), lmLogShutdown(), and logRedo() to record status 
+ * of the log but is not otherwise used during normal processing. 
+ * blocks 2 - (N-1) are used to contain log records.
+ *
+ * when a volume group is varied-on-line, logRedo() must have 
+ * been executed before the file systems (logical volumes) in 
+ * the volume group can be mounted.
+ */
+/*
+ *	log superblock (block 1 of logical volume)
+ */
+#define	LOGSUPER_B	1
+#define	LOGSTART_B	2
+
+#define	LOGMAGIC	0x87654321
+#define	LOGVERSION	1
+
+#define MAX_ACTIVE	128	/* Max active file systems sharing log */
+
+struct logsuper {
+	u32 magic;		/* 4: log lv identifier */
+	s32 version;		/* 4: version number */
+	s32 serial;		/* 4: log open/mount counter */
+	s32 size;		/* 4: size in number of LOGPSIZE blocks */
+	s32 bsize;		/* 4: logical block size in byte */
+	s32 l2bsize;		/* 4: log2 of bsize */
+
+	u32 flag;		/* 4: option */
+	u32 state;		/* 4: state - see below */
+
+	s32 end;		/* 4: addr of last log record set by logredo */
+	char uuid[16];		/* 16: 128-bit journal uuid */
+	char label[16];		/* 16: journal label */
+	struct {
+		char uuid[16];
+	} active[MAX_ACTIVE];	/* 2048: active file systems list */
+};
+
+#define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+
+/* log flag: commit option (see jfs_filsys.h) */
+
+/* log state */
+#define	LOGMOUNT	0	/* log mounted by lmLogInit() */
+#define LOGREDONE	1	/* log shutdown by lmLogShutdown().
+				 * log redo completed by logredo().
+				 */
+#define LOGWRAP		2	/* log wrapped */
+#define LOGREADERR	3	/* log read error detected in logredo() */
+
+
+/*
+ *	log logical page
+ *
+ * (this comment should be rewritten !)
+ * the header and trailer structures (h,t) will normally have 
+ * the same page and eor value.
+ * An exception to this occurs when a complete page write is not 
+ * accomplished on a power failure. Since the hardware may "split write"
+ * sectors in the page, any out of order sequence may occur during powerfail 
+ * and needs to be recognized during log replay.  The xor value is
+ * an "exclusive or" of all log words in the page up to eor.  This
+ * 32 bit eor is stored with the top 16 bits in the header and the
+ * bottom 16 bits in the trailer.  logredo can easily recognize pages
+ * that were not completed by reconstructing this eor and checking 
+ * the log page.
+ *
+ * Previous versions of the operating system did not allow split 
+ * writes and detected partially written records in logredo by 
+ * ordering the updates to the header, trailer, and the move of data 
+ * into the logdata area.  The order: (1) data is moved (2) header 
+ * is updated (3) trailer is updated.  In logredo, when the header 
+ * differed from the trailer, the header and trailer were reconciled 
+ * as follows: if h.page != t.page they were set to the smaller of 
+ * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) 
+ * h.eor != t.eor they were set to the smaller of their two values.
+ */
+struct logpage {
+	struct {		/* header */
+		s32 page;	/* 4: log sequence page number */
+		s16 rsrvd;	/* 2: */
+		s16 eor;	/* 2: end-of-log offset of lasrt record write */
+	} h;
+
+	s32 data[LOGPSIZE / 4 - 4];	/* log record area */
+
+	struct {		/* trailer */
+		s32 page;	/* 4: normally the same as h.page */
+		s16 rsrvd;	/* 2: */
+		s16 eor;	/* 2: normally the same as h.eor */
+	} t;
+};
+
+#define LOGPHDRSIZE	8	/* log page header size */
+#define LOGPTLRSIZE	8	/* log page trailer size */
+
+
+/*
+ *	log record
+ *
+ * (this comment should be rewritten !)
+ * jfs uses only "after" log records (only a single writer is allowed
+ * in a  page, pages are written to temporary paging space if
+ * if they must be written to disk before commit, and i/o is
+ * scheduled for modified pages to their home location after
+ * the log records containing the after values and the commit 
+ * record is written to the log on disk, undo discards the copy
+ * in main-memory.)
+ *
+ * a log record consists of a data area of variable length followed by 
+ * a descriptor of fixed size LOGRDSIZE bytes.
+ * the  data area is rounded up to an integral number of 4-bytes and 
+ * must be no longer than LOGPSIZE.
+ * the descriptor is of size of multiple of 4-bytes and aligned on a 
+ * 4-byte boundary. 
+ * records are packed one after the other in the data area of log pages.
+ * (sometimes a DUMMY record is inserted so that at least one record ends 
+ * on every page or the longest record is placed on at most two pages).
+ * the field eor in page header/trailer points to the byte following 
+ * the last record on a page.
+ */
+
+/* log record types */
+#define LOG_COMMIT		0x8000
+#define LOG_SYNCPT		0x4000
+#define LOG_MOUNT		0x2000
+#define LOG_REDOPAGE		0x0800
+#define LOG_NOREDOPAGE		0x0080
+#define LOG_NOREDOINOEXT	0x0040
+#define LOG_UPDATEMAP		0x0008
+#define LOG_NOREDOFILE		0x0001
+
+/* REDOPAGE/NOREDOPAGE log record data type */
+#define	LOG_INODE		0x0001
+#define	LOG_XTREE		0x0002
+#define	LOG_DTREE		0x0004
+#define	LOG_BTROOT		0x0010
+#define	LOG_EA			0x0020
+#define	LOG_ACL			0x0040
+#define	LOG_DATA		0x0080
+#define	LOG_NEW			0x0100
+#define	LOG_EXTEND		0x0200
+#define LOG_RELOCATE		0x0400
+#define LOG_DIR_XTREE		0x0800	/* Xtree is in directory inode */
+
+/* UPDATEMAP log record descriptor type */
+#define	LOG_ALLOCXADLIST	0x0080
+#define	LOG_ALLOCPXDLIST	0x0040
+#define	LOG_ALLOCXAD		0x0020
+#define	LOG_ALLOCPXD		0x0010
+#define	LOG_FREEXADLIST		0x0008
+#define	LOG_FREEPXDLIST		0x0004
+#define	LOG_FREEXAD		0x0002
+#define	LOG_FREEPXD		0x0001
+
+
+struct lrd {
+	/*
+	 * type independent area
+	 */
+	s32 logtid;		/* 4: log transaction identifier */
+	s32 backchain;		/* 4: ptr to prev record of same transaction */
+	u16 type;		/* 2: record type */
+	s16 length;		/* 2: length of data in record (in byte) */
+	u32 aggregate;		/* 4: file system lv/aggregate */
+	/* (16) */
+
+	/*
+	 * type dependent area (20)
+	 */
+	union {
+
+		/*
+		 *      COMMIT: commit
+		 *
+		 * transaction commit: no type-dependent information;
+		 */
+
+		/*
+		 *      REDOPAGE: after-image
+		 *
+		 * apply after-image;
+		 *
+		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
+		 */
+		struct {
+			u32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+			u16 type;	/* 2: REDOPAGE record type */
+			s16 l2linesize;	/* 2: log2 of line size */
+			pxd_t pxd;	/* 8: on-disk page pxd */
+		} redopage;	/* (20) */
+
+		/*
+		 *      NOREDOPAGE: the page is freed
+		 *
+		 * do not apply after-image records which precede this record
+		 * in the log with the same page block number to this page.
+		 *
+		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
+		 */
+		struct {
+			s32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+			u16 type;	/* 2: NOREDOPAGE record type */
+			s16 rsrvd;	/* 2: reserved */
+			pxd_t pxd;	/* 8: on-disk page pxd */
+		} noredopage;	/* (20) */
+
+		/*
+		 *      UPDATEMAP: update block allocation map
+		 *
+		 * either in-line PXD,
+		 * or     out-of-line  XADLIST;
+		 *
+		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
+		 */
+		struct {
+			u32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+			u16 type;	/* 2: UPDATEMAP record type */
+			s16 nxd;	/* 2: number of extents */
+			pxd_t pxd;	/* 8: pxd */
+		} updatemap;	/* (20) */
+
+		/*
+		 *      NOREDOINOEXT: the inode extent is freed
+		 *
+		 * do not apply after-image records which precede this 
+		 * record in the log with the any of the 4 page block 
+		 * numbers in this inode extent. 
+		 * 
+		 * NOTE: The fileset and pxd fields MUST remain in 
+		 *       the same fields in the REDOPAGE record format.
+		 *
+		 */
+		struct {
+			s32 fileset;	/* 4: fileset number */
+			s32 iagnum;	/* 4: IAG number     */
+			s32 inoext_idx;	/* 4: inode extent index */
+			pxd_t pxd;	/* 8: on-disk page pxd */
+		} noredoinoext;	/* (20) */
+
+		/*
+		 *      SYNCPT: log sync point
+		 *
+		 * replay log upto syncpt address specified;
+		 */
+		struct {
+			s32 sync;	/* 4: syncpt address (0 = here) */
+		} syncpt;
+
+		/*
+		 *      MOUNT: file system mount
+		 *
+		 * file system mount: no type-dependent information;
+		 */
+
+		/*
+		 *      ? FREEXTENT: free specified extent(s)
+		 *
+		 * free specified extent(s) from block allocation map
+		 * N.B.: nextents should be length of data/sizeof(xad_t)
+		 */
+		struct {
+			s32 type;	/* 4: FREEXTENT record type */
+			s32 nextent;	/* 4: number of extents */
+
+			/* data: PXD or XAD list */
+		} freextent;
+
+		/*
+		 *      ? NOREDOFILE: this file is freed
+		 *
+		 * do not apply records which precede this record in the log
+		 * with the same inode number.
+		 *
+		 * NOREDILE must be the first to be written at commit
+		 * (last to be read in logredo()) - it prevents
+		 * replay of preceding updates of all preceding generations
+		 * of the inumber esp. the on-disk inode itself, 
+		 * but does NOT prevent
+		 * replay of the 
+		 */
+		struct {
+			s32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+		} noredofile;
+
+		/*
+		 *      ? NEWPAGE: 
+		 *
+		 * metadata type dependent
+		 */
+		struct {
+			s32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+			s32 type;	/* 4: NEWPAGE record type */
+			pxd_t pxd;	/* 8: on-disk page pxd */
+		} newpage;
+
+		/*
+		 *      ? DUMMY: filler
+		 *
+		 * no type-dependent information
+		 */
+	} log;
+};					/* (36) */
+
+#define	LOGRDSIZE	(sizeof(struct lrd))
+
+/*
+ *	line vector descriptor
+ */
+struct lvd {
+	s16 offset;
+	s16 length;
+};
+
+
+/*
+ *	log logical volume
+ */
+struct jfs_log {
+
+	struct super_block *sb;	/* 4: This is used to sync metadata
+				 *    before writing syncpt.  Will
+				 *    need to be a list if we share
+				 *    the log between fs's
+				 */
+	struct block_device *bdev; /* 4: log lv pointer */
+	s32 serial;		/* 4: log mount serial number */
+
+	s64 base;		/* @8: log extent address (inline log ) */
+	int size;		/* 4: log size in log page (in page) */
+	int l2bsize;		/* 4: log2 of bsize */
+
+	long flag;		/* 4: flag */
+
+	struct lbuf *lbuf_free;	/* 4: free lbufs */
+	wait_queue_head_t free_wait;	/* 4: */
+
+	/* log write */
+	int logtid;		/* 4: log tid */
+	int page;		/* 4: page number of eol page */
+	int eor;		/* 4: eor of last record in eol page */
+	struct lbuf *bp;	/* 4: current log page buffer */
+
+	struct semaphore loglock;	/* 4: log write serialization lock */
+
+	/* syncpt */
+	int nextsync;		/* 4: bytes to write before next syncpt */
+	int active;		/* 4: */
+	wait_queue_head_t syncwait;	/* 4: */
+
+	/* commit */
+	uint cflag;		/* 4: */
+	struct {		/* 8: FIFO commit queue header */
+		struct tblock *head;
+		struct tblock *tail;
+	} cqueue;
+	int gcrtc;		/* 4: GC_READY transaction count */
+	struct tblock *gclrt;	/* 4: latest GC_READY transaction */
+	spinlock_t gclock;	/* 4: group commit lock */
+	int logsize;		/* 4: log data area size in byte */
+	int lsn;		/* 4: end-of-log */
+	int clsn;		/* 4: clsn */
+	int syncpt;		/* 4: addr of last syncpt record */
+	int sync;		/* 4: addr from last logsync() */
+	struct list_head synclist;	/* 8: logsynclist anchor */
+	spinlock_t synclock;	/* 4: synclist lock */
+	struct lbuf *wqueue;	/* 4: log pageout queue */
+	int count;		/* 4: count */
+	char uuid[16];		/* 16: 128-bit uuid of log device */
+};
+
+/*
+ * Log flag
+ */
+#define log_INLINELOG	1
+#define log_SYNCBARRIER	2
+#define log_QUIESCE	3
+
+/*
+ * group commit flag
+ */
+/* jfs_log */
+#define logGC_PAGEOUT	0x00000001
+
+/* tblock/lbuf */
+#define tblkGC_QUEUE		0x0001
+#define tblkGC_READY		0x0002
+#define tblkGC_COMMIT		0x0004
+#define tblkGC_COMMITTED	0x0008
+#define tblkGC_EOP		0x0010
+#define tblkGC_FREE		0x0020
+#define tblkGC_LEADER		0x0040
+#define tblkGC_ERROR		0x0080
+#define tblkGC_LAZY		0x0100	// D230860
+#define tblkGC_UNLOCKED		0x0200	// D230860
+
+/*
+ *		log cache buffer header
+ */
+struct lbuf {
+	struct buffer_head l_bh;	/* for doing I/O */
+	struct jfs_log *l_log;		/* 4: log associated with buffer */
+
+	/*
+	 * data buffer base area
+	 */
+	uint l_flag;		/* 4: pageout control flags */
+
+	struct lbuf *l_wqnext;	/* 4: write queue link */
+	struct lbuf *l_freelist;	/* 4: freelistlink */
+
+	int l_pn;		/* 4: log page number */
+	int l_eor;		/* 4: log record eor */
+	int l_ceor;		/* 4: committed log record eor */
+
+	s64 l_blkno;		/* 8: log page block number */
+	caddr_t l_ldata;	/* 4: data page */
+
+	wait_queue_head_t l_ioevent;	/* 4: i/o done event */
+	struct page *l_page;	/* The page itself */
+};
+
+/* Reuse l_freelist for redrive list */
+#define l_redrive_next l_freelist
+
+/*
+ *	logsynclist block
+ *
+ * common logsyncblk prefix for jbuf_t and tblock
+ */
+struct logsyncblk {
+	u16 xflag;		/* flags */
+	u16 flag;		/* only meaninful in tblock */
+	lid_t lid;		/* lock id */
+	s32 lsn;		/* log sequence number */
+	struct list_head synclist;	/* log sync list link */
+};
+
+/*
+ *	logsynclist serialization (per log)
+ */
+
+#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
+#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock)
+#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock)
+
+/* compute the difference in bytes of lsn from sync point */
+#define logdiff(diff, lsn, log)\
+{\
+	diff = (lsn) - (log)->syncpt;\
+	if (diff < 0)\
+		diff += (log)->logsize;\
+}
+
+extern int lmLogOpen(struct super_block *sb, struct jfs_log ** log);
+extern void lmLogWait(struct jfs_log * log);
+extern int lmLogClose(struct super_block *sb, struct jfs_log * log);
+extern int lmLogSync(struct jfs_log * log, int nosyncwait);
+extern int lmLogShutdown(struct jfs_log * log);
+extern int lmLogInit(struct jfs_log * log);
+extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
+
+#endif				/* _H_JFS_LOGMGR */

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)