[LogFS] Split large truncated into smaller chunks
[kernel.git] / fs / logfs / super.c
1 /*
2  * fs/logfs/super.c
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7  *
8  * Generally contains mount/umount code and also serves as a dump area for
9  * any functions that don't fit elsewhere and neither justify a file of their
10  * own.
11  */
12 #include "logfs.h"
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/mtd/mtd.h>
16 #include <linux/statfs.h>
17 #include <linux/buffer_head.h>
18
19 static DEFINE_MUTEX(emergency_mutex);
20 static struct page *emergency_page;
21
22 struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
23 {
24         filler_t *filler = (filler_t *)mapping->a_ops->readpage;
25         struct page *page;
26         int err;
27
28         page = read_cache_page(mapping, index, filler, NULL);
29         if (page)
30                 return page;
31
32         /* No more pages available, switch to emergency page */
33         printk(KERN_INFO"Logfs: Using emergency page\n");
34         mutex_lock(&emergency_mutex);
35         err = filler(NULL, emergency_page);
36         if (err) {
37                 mutex_unlock(&emergency_mutex);
38                 printk(KERN_EMERG"Logfs: Error reading emergency page\n");
39                 return ERR_PTR(err);
40         }
41         return emergency_page;
42 }
43
44 void emergency_read_end(struct page *page)
45 {
46         if (page == emergency_page)
47                 mutex_unlock(&emergency_mutex);
48         else
49                 page_cache_release(page);
50 }
51
52 static void dump_segfile(struct super_block *sb)
53 {
54         struct logfs_super *super = logfs_super(sb);
55         struct logfs_segment_entry se;
56         u32 segno;
57
58         for (segno = 0; segno < super->s_no_segs; segno++) {
59                 logfs_get_segment_entry(sb, segno, &se);
60                 printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
61                                 be32_to_cpu(se.valid));
62                 if (++segno < super->s_no_segs) {
63                         logfs_get_segment_entry(sb, segno, &se);
64                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
65                                         be32_to_cpu(se.valid));
66                 }
67                 if (++segno < super->s_no_segs) {
68                         logfs_get_segment_entry(sb, segno, &se);
69                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
70                                         be32_to_cpu(se.valid));
71                 }
72                 if (++segno < super->s_no_segs) {
73                         logfs_get_segment_entry(sb, segno, &se);
74                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
75                                         be32_to_cpu(se.valid));
76                 }
77                 printk("\n");
78         }
79 }
80
81 /*
82  * logfs_crash_dump - dump debug information to device
83  *
84  * The LogFS superblock only occupies part of a segment.  This function will
85  * write as much debug information as it can gather into the spare space.
86  */
87 void logfs_crash_dump(struct super_block *sb)
88 {
89         dump_segfile(sb);
90 }
91
92 /*
93  * TODO: move to lib/string.c
94  */
95 /**
96  * memchr_inv - Find a character in an area of memory.
97  * @s: The memory area
98  * @c: The byte to search for
99  * @n: The size of the area.
100  *
101  * returns the address of the first character other than @c, or %NULL
102  * if the whole buffer contains just @c.
103  */
104 void *memchr_inv(const void *s, int c, size_t n)
105 {
106         const unsigned char *p = s;
107         while (n-- != 0)
108                 if ((unsigned char)c != *p++)
109                         return (void *)(p - 1);
110
111         return NULL;
112 }
113
114 /*
115  * FIXME: There should be a reserve for root, similar to ext2.
116  */
117 int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
118 {
119         struct super_block *sb = dentry->d_sb;
120         struct logfs_super *super = logfs_super(sb);
121
122         stats->f_type           = LOGFS_MAGIC_U32;
123         stats->f_bsize          = sb->s_blocksize;
124         stats->f_blocks         = super->s_size >> LOGFS_BLOCK_BITS >> 3;
125         stats->f_bfree          = super->s_free_bytes >> sb->s_blocksize_bits;
126         stats->f_bavail         = super->s_free_bytes >> sb->s_blocksize_bits;
127         stats->f_files          = 0;
128         stats->f_ffree          = 0;
129         stats->f_namelen        = LOGFS_MAX_NAMELEN;
130         return 0;
131 }
132
133 static int logfs_sb_set(struct super_block *sb, void *_super)
134 {
135         struct logfs_super *super = _super;
136
137         sb->s_fs_info = super;
138         sb->s_mtd = super->s_mtd;
139         sb->s_bdev = super->s_bdev;
140         if (sb->s_bdev)
141                 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
142         if (sb->s_mtd)
143                 sb->s_bdi = sb->s_mtd->backing_dev_info;
144         return 0;
145 }
146
147 static int logfs_sb_test(struct super_block *sb, void *_super)
148 {
149         struct logfs_super *super = _super;
150         struct mtd_info *mtd = super->s_mtd;
151
152         if (mtd && sb->s_mtd == mtd)
153                 return 1;
154         if (super->s_bdev && sb->s_bdev == super->s_bdev)
155                 return 1;
156         return 0;
157 }
158
159 static void set_segment_header(struct logfs_segment_header *sh, u8 type,
160                 u8 level, u32 segno, u32 ec)
161 {
162         sh->pad = 0;
163         sh->type = type;
164         sh->level = level;
165         sh->segno = cpu_to_be32(segno);
166         sh->ec = cpu_to_be32(ec);
167         sh->gec = cpu_to_be64(segno);
168         sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
169 }
170
171 static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
172                 u32 segno, u32 ec)
173 {
174         struct logfs_super *super = logfs_super(sb);
175         struct logfs_segment_header *sh = &ds->ds_sh;
176         int i;
177
178         memset(ds, 0, sizeof(*ds));
179         set_segment_header(sh, SEG_SUPER, 0, segno, ec);
180
181         ds->ds_ifile_levels     = super->s_ifile_levels;
182         ds->ds_iblock_levels    = super->s_iblock_levels;
183         ds->ds_data_levels      = super->s_data_levels; /* XXX: Remove */
184         ds->ds_segment_shift    = super->s_segshift;
185         ds->ds_block_shift      = sb->s_blocksize_bits;
186         ds->ds_write_shift      = super->s_writeshift;
187         ds->ds_filesystem_size  = cpu_to_be64(super->s_size);
188         ds->ds_segment_size     = cpu_to_be32(super->s_segsize);
189         ds->ds_bad_seg_reserve  = cpu_to_be32(super->s_bad_seg_reserve);
190         ds->ds_feature_incompat = cpu_to_be64(super->s_feature_incompat);
191         ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
192         ds->ds_feature_compat   = cpu_to_be64(super->s_feature_compat);
193         ds->ds_feature_flags    = cpu_to_be64(super->s_feature_flags);
194         ds->ds_root_reserve     = cpu_to_be64(super->s_root_reserve);
195         ds->ds_speed_reserve    = cpu_to_be64(super->s_speed_reserve);
196         journal_for_each(i)
197                 ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
198         ds->ds_magic            = cpu_to_be64(LOGFS_MAGIC);
199         ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
200                         LOGFS_SEGMENT_HEADERSIZE + 12);
201 }
202
203 static int write_one_sb(struct super_block *sb,
204                 struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
205 {
206         struct logfs_super *super = logfs_super(sb);
207         struct logfs_disk_super *ds;
208         struct logfs_segment_entry se;
209         struct page *page;
210         u64 ofs;
211         u32 ec, segno;
212         int err;
213
214         page = find_sb(sb, &ofs);
215         if (!page)
216                 return -EIO;
217         ds = page_address(page);
218         segno = seg_no(sb, ofs);
219         logfs_get_segment_entry(sb, segno, &se);
220         ec = be32_to_cpu(se.ec_level) >> 4;
221         ec++;
222         logfs_set_segment_erased(sb, segno, ec, 0);
223         logfs_write_ds(sb, ds, segno, ec);
224         err = super->s_devops->write_sb(sb, page);
225         page_cache_release(page);
226         return err;
227 }
228
229 int logfs_write_sb(struct super_block *sb)
230 {
231         struct logfs_super *super = logfs_super(sb);
232         int err;
233
234         /* First superblock */
235         err = write_one_sb(sb, super->s_devops->find_first_sb);
236         if (err)
237                 return err;
238
239         /* Last superblock */
240         err = write_one_sb(sb, super->s_devops->find_last_sb);
241         if (err)
242                 return err;
243         return 0;
244 }
245
246 static int ds_cmp(const void *ds0, const void *ds1)
247 {
248         size_t len = sizeof(struct logfs_disk_super);
249
250         /* We know the segment headers differ, so ignore them */
251         len -= LOGFS_SEGMENT_HEADERSIZE;
252         ds0 += LOGFS_SEGMENT_HEADERSIZE;
253         ds1 += LOGFS_SEGMENT_HEADERSIZE;
254         return memcmp(ds0, ds1, len);
255 }
256
257 static int logfs_recover_sb(struct super_block *sb)
258 {
259         struct logfs_super *super = logfs_super(sb);
260         struct logfs_disk_super _ds0, *ds0 = &_ds0;
261         struct logfs_disk_super _ds1, *ds1 = &_ds1;
262         int err, valid0, valid1;
263
264         /* read first superblock */
265         err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
266         if (err)
267                 return err;
268         /* read last superblock */
269         err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
270         if (err)
271                 return err;
272         valid0 = logfs_check_ds(ds0) == 0;
273         valid1 = logfs_check_ds(ds1) == 0;
274
275         if (!valid0 && valid1) {
276                 printk(KERN_INFO"First superblock is invalid - fixing.\n");
277                 return write_one_sb(sb, super->s_devops->find_first_sb);
278         }
279         if (valid0 && !valid1) {
280                 printk(KERN_INFO"Last superblock is invalid - fixing.\n");
281                 return write_one_sb(sb, super->s_devops->find_last_sb);
282         }
283         if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
284                 printk(KERN_INFO"Superblocks don't match - fixing.\n");
285                 return logfs_write_sb(sb);
286         }
287         /* If neither is valid now, something's wrong.  Didn't we properly
288          * check them before?!? */
289         BUG_ON(!valid0 && !valid1);
290         return 0;
291 }
292
293 static int logfs_make_writeable(struct super_block *sb)
294 {
295         int err;
296
297         err = logfs_open_segfile(sb);
298         if (err)
299                 return err;
300
301         /* Repair any broken superblock copies */
302         err = logfs_recover_sb(sb);
303         if (err)
304                 return err;
305
306         /* Check areas for trailing unaccounted data */
307         err = logfs_check_areas(sb);
308         if (err)
309                 return err;
310
311         /* Do one GC pass before any data gets dirtied */
312         logfs_gc_pass(sb);
313
314         /* after all initializations are done, replay the journal
315          * for rw-mounts, if necessary */
316         err = logfs_replay_journal(sb);
317         if (err)
318                 return err;
319
320         return 0;
321 }
322
323 static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
324 {
325         struct logfs_super *super = logfs_super(sb);
326         struct inode *rootdir;
327         int err;
328
329         /* root dir */
330         rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
331         if (IS_ERR(rootdir))
332                 goto fail;
333
334         sb->s_root = d_alloc_root(rootdir);
335         if (!sb->s_root)
336                 goto fail2;
337
338         super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
339         if (!super->s_erase_page)
340                 goto fail2;
341         memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
342
343         /* FIXME: check for read-only mounts */
344         err = logfs_make_writeable(sb);
345         if (err)
346                 goto fail3;
347
348         log_super("LogFS: Finished mounting\n");
349         simple_set_mnt(mnt, sb);
350         return 0;
351
352 fail3:
353         __free_page(super->s_erase_page);
354 fail2:
355         iput(rootdir);
356 fail:
357         iput(logfs_super(sb)->s_master_inode);
358         return -EIO;
359 }
360
361 int logfs_check_ds(struct logfs_disk_super *ds)
362 {
363         struct logfs_segment_header *sh = &ds->ds_sh;
364
365         if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
366                 return -EINVAL;
367         if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
368                 return -EINVAL;
369         if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
370                                 LOGFS_SEGMENT_HEADERSIZE + 12))
371                 return -EINVAL;
372         return 0;
373 }
374
375 static struct page *find_super_block(struct super_block *sb)
376 {
377         struct logfs_super *super = logfs_super(sb);
378         struct page *first, *last;
379
380         first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
381         if (!first || IS_ERR(first))
382                 return NULL;
383         last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
384         if (!last || IS_ERR(first)) {
385                 page_cache_release(first);
386                 return NULL;
387         }
388
389         if (!logfs_check_ds(page_address(first))) {
390                 page_cache_release(last);
391                 return first;
392         }
393
394         /* First one didn't work, try the second superblock */
395         if (!logfs_check_ds(page_address(last))) {
396                 page_cache_release(first);
397                 return last;
398         }
399
400         /* Neither worked, sorry folks */
401         page_cache_release(first);
402         page_cache_release(last);
403         return NULL;
404 }
405
406 static int __logfs_read_sb(struct super_block *sb)
407 {
408         struct logfs_super *super = logfs_super(sb);
409         struct page *page;
410         struct logfs_disk_super *ds;
411         int i;
412
413         page = find_super_block(sb);
414         if (!page)
415                 return -EIO;
416
417         ds = page_address(page);
418         super->s_size = be64_to_cpu(ds->ds_filesystem_size);
419         super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
420         super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
421         super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
422         super->s_segsize = 1 << ds->ds_segment_shift;
423         super->s_segmask = (1 << ds->ds_segment_shift) - 1;
424         super->s_segshift = ds->ds_segment_shift;
425         sb->s_blocksize = 1 << ds->ds_block_shift;
426         sb->s_blocksize_bits = ds->ds_block_shift;
427         super->s_writesize = 1 << ds->ds_write_shift;
428         super->s_writeshift = ds->ds_write_shift;
429         super->s_no_segs = super->s_size >> super->s_segshift;
430         super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
431         super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
432         super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
433         super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
434         super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
435
436         journal_for_each(i)
437                 super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
438
439         super->s_ifile_levels = ds->ds_ifile_levels;
440         super->s_iblock_levels = ds->ds_iblock_levels;
441         super->s_data_levels = ds->ds_data_levels;
442         super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
443                 + super->s_data_levels;
444         page_cache_release(page);
445         return 0;
446 }
447
448 static int logfs_read_sb(struct super_block *sb, int read_only)
449 {
450         struct logfs_super *super = logfs_super(sb);
451         int ret;
452
453         super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
454         if (!super->s_btree_pool)
455                 return -ENOMEM;
456
457         btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
458         btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
459         btree_init_mempool32(&super->s_shadow_tree.segment_map,
460                         super->s_btree_pool);
461
462         ret = logfs_init_mapping(sb);
463         if (ret)
464                 return ret;
465
466         ret = __logfs_read_sb(sb);
467         if (ret)
468                 return ret;
469
470         if (super->s_feature_incompat & ~LOGFS_FEATURES_INCOMPAT)
471                 return -EIO;
472         if ((super->s_feature_ro_compat & ~LOGFS_FEATURES_RO_COMPAT) &&
473                         !read_only)
474                 return -EIO;
475
476         mutex_init(&super->s_dirop_mutex);
477         mutex_init(&super->s_object_alias_mutex);
478         INIT_LIST_HEAD(&super->s_freeing_list);
479
480         ret = logfs_init_rw(sb);
481         if (ret)
482                 return ret;
483
484         ret = logfs_init_areas(sb);
485         if (ret)
486                 return ret;
487
488         ret = logfs_init_gc(sb);
489         if (ret)
490                 return ret;
491
492         ret = logfs_init_journal(sb);
493         if (ret)
494                 return ret;
495
496         return 0;
497 }
498
499 static void logfs_kill_sb(struct super_block *sb)
500 {
501         struct logfs_super *super = logfs_super(sb);
502
503         log_super("LogFS: Start unmounting\n");
504         /* Alias entries slow down mount, so evict as many as possible */
505         sync_filesystem(sb);
506         logfs_write_anchor(sb);
507
508         /*
509          * From this point on alias entries are simply dropped - and any
510          * writes to the object store are considered bugs.
511          */
512         super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
513         log_super("LogFS: Now in shutdown\n");
514         generic_shutdown_super(sb);
515
516         BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
517
518         logfs_cleanup_gc(sb);
519         logfs_cleanup_journal(sb);
520         logfs_cleanup_areas(sb);
521         logfs_cleanup_rw(sb);
522         if (super->s_erase_page)
523                 __free_page(super->s_erase_page);
524         super->s_devops->put_device(sb);
525         logfs_mempool_destroy(super->s_btree_pool);
526         logfs_mempool_destroy(super->s_alias_pool);
527         kfree(super);
528         log_super("LogFS: Finished unmounting\n");
529 }
530
531 int logfs_get_sb_device(struct file_system_type *type, int flags,
532                 struct mtd_info *mtd, struct block_device *bdev,
533                 const struct logfs_device_ops *devops, struct vfsmount *mnt)
534 {
535         struct logfs_super *super;
536         struct super_block *sb;
537         int err = -ENOMEM;
538         static int mount_count;
539
540         log_super("LogFS: Start mount %x\n", mount_count++);
541         super = kzalloc(sizeof(*super), GFP_KERNEL);
542         if (!super)
543                 goto err0;
544
545         super->s_mtd    = mtd;
546         super->s_bdev   = bdev;
547         err = -EINVAL;
548         sb = sget(type, logfs_sb_test, logfs_sb_set, super);
549         if (IS_ERR(sb))
550                 goto err0;
551
552         if (sb->s_root) {
553                 /* Device is already in use */
554                 err = 0;
555                 simple_set_mnt(mnt, sb);
556                 goto err0;
557         }
558
559         super->s_devops = devops;
560
561         /*
562          * sb->s_maxbytes is limited to 8TB.  On 32bit systems, the page cache
563          * only covers 16TB and the upper 8TB are used for indirect blocks.
564          * On 64bit system we could bump up the limit, but that would make
565          * the filesystem incompatible with 32bit systems.
566          */
567         sb->s_maxbytes  = (1ull << 43) - 1;
568         sb->s_op        = &logfs_super_operations;
569         sb->s_flags     = flags | MS_NOATIME;
570
571         err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY);
572         if (err)
573                 goto err1;
574
575         sb->s_flags |= MS_ACTIVE;
576         err = logfs_get_sb_final(sb, mnt);
577         if (err)
578                 goto err1;
579         return 0;
580
581 err1:
582         deactivate_locked_super(sb);
583         return err;
584 err0:
585         kfree(super);
586         //devops->put_device(sb);
587         return err;
588 }
589
590 static int logfs_get_sb(struct file_system_type *type, int flags,
591                 const char *devname, void *data, struct vfsmount *mnt)
592 {
593         ulong mtdnr;
594
595         if (!devname)
596                 return logfs_get_sb_bdev(type, flags, devname, mnt);
597         if (strncmp(devname, "mtd", 3))
598                 return logfs_get_sb_bdev(type, flags, devname, mnt);
599
600         {
601                 char *garbage;
602                 mtdnr = simple_strtoul(devname+3, &garbage, 0);
603                 if (*garbage)
604                         return -EINVAL;
605         }
606
607         return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
608 }
609
610 static struct file_system_type logfs_fs_type = {
611         .owner          = THIS_MODULE,
612         .name           = "logfs",
613         .get_sb         = logfs_get_sb,
614         .kill_sb        = logfs_kill_sb,
615         .fs_flags       = FS_REQUIRES_DEV,
616
617 };
618
619 static int __init logfs_init(void)
620 {
621         int ret;
622
623         emergency_page = alloc_pages(GFP_KERNEL, 0);
624         if (!emergency_page)
625                 return -ENOMEM;
626
627         ret = logfs_compr_init();
628         if (ret)
629                 goto out1;
630
631         ret = logfs_init_inode_cache();
632         if (ret)
633                 goto out2;
634
635         return register_filesystem(&logfs_fs_type);
636 out2:
637         logfs_compr_exit();
638 out1:
639         __free_pages(emergency_page, 0);
640         return ret;
641 }
642
643 static void __exit logfs_exit(void)
644 {
645         unregister_filesystem(&logfs_fs_type);
646         logfs_destroy_inode_cache();
647         logfs_compr_exit();
648         __free_pages(emergency_page, 0);
649 }
650
651 module_init(logfs_init);
652 module_exit(logfs_exit);
653
654 MODULE_LICENSE("GPL v2");
655 MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
656 MODULE_DESCRIPTION("scalable flash filesystem");