Universal File System
- Kevin McGuire
- Member
- Posts: 843
- Joined: Tue Nov 09, 2004 12:00 am
- Location: United States
- Contact:
Re:Universal File System
[font=Lucida Console]
#define ENTRYTYPE_UNUSED 0x00
#define ENTRYTYPE_DIRECTORY 0x01
#define ENTRYTYPE_FILE 0x10
#define ENTRYTYPE_UNUSABLE 0x80
#define ENTRYTYPE_DELETEDDIR 0x81
#define ENTRYTYPE_DELETEDFILE 0x90
struct tMagic{
unsigned char m0;
unsigned short m1;
};
struct tSuperBlock{
unsigned char bootCode0[11];
unsigned char legacyBIOS[21];
unsigned char bootCode1[372];
__int64 timeStamp;
__int64 dataCount;
__int64 indexCount;
tMagic magic;
unsigned char version;
__int64 blockTotal;
unsigned int blockReserved;
unsigned char blockSize;
unsigned char checksum;
unsigned char compParitionTable[64];
unsigned char compBootSignature[2];
};
struct tIndexVolumeIndentiferEntry{
unsigned char type;
struct{
unsigned short reserved0;
unsigned char reserved1;
}reserved;
tTimeDate timeDate;
unsigned char name[52];
};
struct tIndexStandardEntry{
unsigned char type;
unsigned char flags;
unsigned short continuationCount;
__int64 timeDate;
__int64 blockStart;
__int64 blockEnd;
__int64 fileLength;
unsigned char name[20];
};
struct tIndexContinuationEntry{
unsigned char name[64];
};
struct tIndexStartingMarkerEntry{
unsigned char type;
unsigned char reserved[63];
};
unsigned int mmAllocPage(void); // allocates page
void mmAllocFreePage(unsigned int page); // free page
unsigned int fsReadBlock(__int64 blockIndex, unsigned int address); // read fs block
unsigned int fsWriteBlock(__int64 blockIndex, unsigned int address);// write fs block
unsigned int fsDefragment(tSuperBlock * superBlock){
tIndexStandardEntry * stdEntry;
__int64 dataBlockIndex = superBlock->blockReserved;
__int64 dataBlockClosest = 0;
__int64 dataBlockMove;
__int64 found;
unsigned int memData = mmAllocPage();
unsigned int tmpData = mmAllocPage();
__int64 bi;
for(;;){
found = superBlock->blockTotal; /* We use this to compute a between range of last defragment block, and the largest block. */
/* cycle through all blocks in the FS using the index area. */
for(bi = superBlock->blockTotal - superBlock->indexCount; bi < superBlock->blockTotal; bi++){
if(!fsReadBlock(bi, memData))
return 0; /* error reading block */
stdEntry = (tIndexStandardEntry*)memData;
/* Remove deleted things. This can corrupt the FS, after defrag. */
if((stdEntry->type == ENTRYTYPE_DELETEDDIR) | (stdEntry->type == ENTRYTYPE_DELETEDFILE)){
stdEntry->type = ENTRYTYPE_UNUSED;
if(!fsWriteBlock(bi, memData))
return 0;
}
/* make sure the entry is a directory or file */
if((stdEntry->type == ENTRYTYPE_DIRECTORY)|(stdEntry->type = ENTRYTYPE_FILE))
/* if we have previously found one, make sure this one is closer to dataBlockClosest than
our previous find. We assume that the index entrys are in ANY order!
*/
if((stdEntry->blockStart >= dataBlockClosest) & (stdEntry->blockStart < found))
found = stdEntry->blockStart;
/*
Jump past any continuation blocks.
*/
bi += stdEntry->continuationCount;
}
/*
This means that dataBlockClosest was so big. It exceeded any known file in the system. This means we
have defragmented every file possible.
*/
if(found == superBlock->blockTotal)
break; /* we are done. */
/*
Add one to jump out of the actual data block for a file, and into the next avaible block.
The next availible block could be used. It might not. Hopefully it points to some slacks pace
so the next cycle of for(BI.. will find it.
*/
dataBlockClosest = stdEntry->blockEnd + 1;
/* --------------- defragment found entry -------------------- */
/* How many blocks down should we shift? */
dataBlockMove = stdEntry->blockStart - dataBlockClosest;
/* None. The file's data was already contingious with the last. */
if(dataBlockMove > 0){
/* Lets move the data using dataBlockMove as the shift down count. */
for(__int64 mi = 0; mi < stdEntry->blockEnd - stdEntry->blockStart; mi++){
/* read it */
if(!fsReadBlock(bi + mi, tmpData))
return 0;
/* write it - shifted down #datablockMoves# number of times. */
if(!fsWriteBlock(bi + mi - dataBlockMove, tmpData))
return 0;
}
/* update the index block on what we did. */
dataBlockClosest -= dataBlockMove; /* dont forget we changed the disk structor */
stdEntry->blockStart -= dataBlockMove;
stdEntry->blockEnd -= dataBlockMove;
if(!fsWriteBlock(bi, memData)) /* update stdEntry */
return 0;
}
/* try another */
}
return 1; /* we are all done. */
}
[/font]
#define ENTRYTYPE_UNUSED 0x00
#define ENTRYTYPE_DIRECTORY 0x01
#define ENTRYTYPE_FILE 0x10
#define ENTRYTYPE_UNUSABLE 0x80
#define ENTRYTYPE_DELETEDDIR 0x81
#define ENTRYTYPE_DELETEDFILE 0x90
struct tMagic{
unsigned char m0;
unsigned short m1;
};
struct tSuperBlock{
unsigned char bootCode0[11];
unsigned char legacyBIOS[21];
unsigned char bootCode1[372];
__int64 timeStamp;
__int64 dataCount;
__int64 indexCount;
tMagic magic;
unsigned char version;
__int64 blockTotal;
unsigned int blockReserved;
unsigned char blockSize;
unsigned char checksum;
unsigned char compParitionTable[64];
unsigned char compBootSignature[2];
};
struct tIndexVolumeIndentiferEntry{
unsigned char type;
struct{
unsigned short reserved0;
unsigned char reserved1;
}reserved;
tTimeDate timeDate;
unsigned char name[52];
};
struct tIndexStandardEntry{
unsigned char type;
unsigned char flags;
unsigned short continuationCount;
__int64 timeDate;
__int64 blockStart;
__int64 blockEnd;
__int64 fileLength;
unsigned char name[20];
};
struct tIndexContinuationEntry{
unsigned char name[64];
};
struct tIndexStartingMarkerEntry{
unsigned char type;
unsigned char reserved[63];
};
unsigned int mmAllocPage(void); // allocates page
void mmAllocFreePage(unsigned int page); // free page
unsigned int fsReadBlock(__int64 blockIndex, unsigned int address); // read fs block
unsigned int fsWriteBlock(__int64 blockIndex, unsigned int address);// write fs block
unsigned int fsDefragment(tSuperBlock * superBlock){
tIndexStandardEntry * stdEntry;
__int64 dataBlockIndex = superBlock->blockReserved;
__int64 dataBlockClosest = 0;
__int64 dataBlockMove;
__int64 found;
unsigned int memData = mmAllocPage();
unsigned int tmpData = mmAllocPage();
__int64 bi;
for(;;){
found = superBlock->blockTotal; /* We use this to compute a between range of last defragment block, and the largest block. */
/* cycle through all blocks in the FS using the index area. */
for(bi = superBlock->blockTotal - superBlock->indexCount; bi < superBlock->blockTotal; bi++){
if(!fsReadBlock(bi, memData))
return 0; /* error reading block */
stdEntry = (tIndexStandardEntry*)memData;
/* Remove deleted things. This can corrupt the FS, after defrag. */
if((stdEntry->type == ENTRYTYPE_DELETEDDIR) | (stdEntry->type == ENTRYTYPE_DELETEDFILE)){
stdEntry->type = ENTRYTYPE_UNUSED;
if(!fsWriteBlock(bi, memData))
return 0;
}
/* make sure the entry is a directory or file */
if((stdEntry->type == ENTRYTYPE_DIRECTORY)|(stdEntry->type = ENTRYTYPE_FILE))
/* if we have previously found one, make sure this one is closer to dataBlockClosest than
our previous find. We assume that the index entrys are in ANY order!
*/
if((stdEntry->blockStart >= dataBlockClosest) & (stdEntry->blockStart < found))
found = stdEntry->blockStart;
/*
Jump past any continuation blocks.
*/
bi += stdEntry->continuationCount;
}
/*
This means that dataBlockClosest was so big. It exceeded any known file in the system. This means we
have defragmented every file possible.
*/
if(found == superBlock->blockTotal)
break; /* we are done. */
/*
Add one to jump out of the actual data block for a file, and into the next avaible block.
The next availible block could be used. It might not. Hopefully it points to some slacks pace
so the next cycle of for(BI.. will find it.
*/
dataBlockClosest = stdEntry->blockEnd + 1;
/* --------------- defragment found entry -------------------- */
/* How many blocks down should we shift? */
dataBlockMove = stdEntry->blockStart - dataBlockClosest;
/* None. The file's data was already contingious with the last. */
if(dataBlockMove > 0){
/* Lets move the data using dataBlockMove as the shift down count. */
for(__int64 mi = 0; mi < stdEntry->blockEnd - stdEntry->blockStart; mi++){
/* read it */
if(!fsReadBlock(bi + mi, tmpData))
return 0;
/* write it - shifted down #datablockMoves# number of times. */
if(!fsWriteBlock(bi + mi - dataBlockMove, tmpData))
return 0;
}
/* update the index block on what we did. */
dataBlockClosest -= dataBlockMove; /* dont forget we changed the disk structor */
stdEntry->blockStart -= dataBlockMove;
stdEntry->blockEnd -= dataBlockMove;
if(!fsWriteBlock(bi, memData)) /* update stdEntry */
return 0;
}
/* try another */
}
return 1; /* we are all done. */
}
[/font]
- Kevin McGuire
- Member
- Posts: 843
- Joined: Tue Nov 09, 2004 12:00 am
- Location: United States
- Contact:
Re:Universal File System
I also wanted to note:
I used the design of reading and writting one block at a time, to show the arithogram working using the smallest amount of memory. However, the entire image could be read into memory and manipulated much faster. Im sorry. I should have done it using the entire image loaded into memory. That would have simplified it alot.
I used the design of reading and writting one block at a time, to show the arithogram working using the smallest amount of memory. However, the entire image could be read into memory and manipulated much faster. Im sorry. I should have done it using the entire image loaded into memory. That would have simplified it alot.
Re:Universal File System
My "thoughts" on the new proposal:
1. index entries are actually not 64 bytes long but 56 in
the current implementation. So entry name needs to grow
to 28 bytes! However, I'm wondering the following:
0x00 - 1 - Entry type
0x01 - 1 - Number of continuation entries following this entry
0x02 - 8 - Time stamp
0x0A - 8 - Starting block number in the data area
0x12 - 8 - Ending block number in the data area
0x1A - 8 - File length in bytes
0x22 - 30 - Entry name in UTF8
0x00 Unused entry
0x01 for the Volume Identifier
0x02 Directory entry
0x03 Starting marker entry
0x10 File entry
0x80 Unusable entry
0x82 Deleted directory entry
0x90 Deleted file
This gets us two extra bytes for the entry name and
a total of 30 + (255 * 64) = 16350 which is imho more
than enough. However, if you truly want unlimited
path names why not simply stop at the continuation
entry that has the \0 terminator in it? No real reason
to store the count in that case. If you decide to go
down that road the second byte could be turned into
a reserved byte (0 for now), that could in the future
incorporate perhaps something else (like attribute flags?).
2. when I create a file in windows it says the following characters are not allowed (from what I can generate on my keyboard): / \ : * ? " < > |
so I can make a file "=;,+"! Which would not fit in this filesystem. Why aren't those characters allowed?
personally I actually use comma and plus sign sometimes
3. just to be sure, did you see my previous remark on a timestamp for directories? This so a rename updates the timestamp on the directory as well. Or a tool could touch it to let you know the contents has been updated. Since the bytes are there anyway....
4. the continuation entries most be consecutive, right? That isn't mentioned
5. the idea (I assume) is that the index area can grow down
when you are running out of room?
6. I'm thinking the starting marker should be a required thing
to implement. Here's why. If the superblock would be corrupt
or unreadable you don't know if there is a starting marker.
So recovery software could find a starting marker where there
might not be one (this may lead to corruption). It should be
a relative easy feature to implement. Just put it in the first
entry, and if you need to grow the index fill that entry with
normal data and move the marker to the new place (you need to
update the super block anyway)
7. "where every file has one entry" should read "where every file
has one or more entries"
I'm off to bed
1. index entries are actually not 64 bytes long but 56 in
the current implementation. So entry name needs to grow
to 28 bytes! However, I'm wondering the following:
0x00 - 1 - Entry type
0x01 - 1 - Number of continuation entries following this entry
0x02 - 8 - Time stamp
0x0A - 8 - Starting block number in the data area
0x12 - 8 - Ending block number in the data area
0x1A - 8 - File length in bytes
0x22 - 30 - Entry name in UTF8
0x00 Unused entry
0x01 for the Volume Identifier
0x02 Directory entry
0x03 Starting marker entry
0x10 File entry
0x80 Unusable entry
0x82 Deleted directory entry
0x90 Deleted file
This gets us two extra bytes for the entry name and
a total of 30 + (255 * 64) = 16350 which is imho more
than enough. However, if you truly want unlimited
path names why not simply stop at the continuation
entry that has the \0 terminator in it? No real reason
to store the count in that case. If you decide to go
down that road the second byte could be turned into
a reserved byte (0 for now), that could in the future
incorporate perhaps something else (like attribute flags?).
2. when I create a file in windows it says the following characters are not allowed (from what I can generate on my keyboard): / \ : * ? " < > |
so I can make a file "=;,+"! Which would not fit in this filesystem. Why aren't those characters allowed?
personally I actually use comma and plus sign sometimes
3. just to be sure, did you see my previous remark on a timestamp for directories? This so a rename updates the timestamp on the directory as well. Or a tool could touch it to let you know the contents has been updated. Since the bytes are there anyway....
4. the continuation entries most be consecutive, right? That isn't mentioned
5. the idea (I assume) is that the index area can grow down
when you are running out of room?
6. I'm thinking the starting marker should be a required thing
to implement. Here's why. If the superblock would be corrupt
or unreadable you don't know if there is a starting marker.
So recovery software could find a starting marker where there
might not be one (this may lead to corruption). It should be
a relative easy feature to implement. Just put it in the first
entry, and if you need to grow the index fill that entry with
normal data and move the marker to the new place (you need to
update the super block anyway)
7. "where every file has one entry" should read "where every file
has one or more entries"
I'm off to bed
- Pype.Clicker
- Member
- Posts: 5964
- Joined: Wed Oct 18, 2006 2:31 am
- Location: In a galaxy, far, far away
- Contact:
Re:Universal File System
considering the repeated concerns with the file names stuff, i'd tend to suggest to follow the kind of approach that one can find in an executable file, that is
- the 'file entries' are compact. They only contain 'fixed size' stuff such as timestamps, file size, sectors information, etc.
- all the file names are collated together into the "names section" (that could be, for instance, another "special area" or just a specific file entry such as the first one, or the file entry with type==X).
A file entry would therefore not carry the filename but instead the offset of its name in the "names area". That makes reading from the camel ultra-simple: load all the "names file", browse the files entries using names[entry->fname] rather than entry->fname for comparison.
Extending the idea, you could have a couple of "names" rather than a single one: the "prefix" name (reused by all the files in the same directory) and the "own" name (e.g. the filename "per se").
For systems short on RAM, the offset within the name area can be directly translated into a single block to be loaded.
anyone interested ?
- the 'file entries' are compact. They only contain 'fixed size' stuff such as timestamps, file size, sectors information, etc.
- all the file names are collated together into the "names section" (that could be, for instance, another "special area" or just a specific file entry such as the first one, or the file entry with type==X).
A file entry would therefore not carry the filename but instead the offset of its name in the "names area". That makes reading from the camel ultra-simple: load all the "names file", browse the files entries using names[entry->fname] rather than entry->fname for comparison.
Extending the idea, you could have a couple of "names" rather than a single one: the "prefix" name (reused by all the files in the same directory) and the "own" name (e.g. the filename "per se").
For systems short on RAM, the offset within the name area can be directly translated into a single block to be loaded.
anyone interested ?
Re:Universal File System
Before we continue, could we set up a list of requirements for this file system? I expect that would allow us to cancel out a few discussions directly, such as supporting fragmentation versus not supporting it, variable inode count or static, fixed file name length or variable, stuff like that.
Re:Universal File System
Hi,
The previous version of the specification prevented this by making sure that the first byte of any entry (except for continuation entries) is a value that isn't possible in a valid file name. Due to the way UTF8 encoding works, this can be any value between 0x01 and 0x1F (inclusive).
However, you are correct in that the entry type and the flags for standard entries could be combined into a single byte (as long as the only values used are between 0x01 and 0x1F), which would give an extra 2 bytes for entry names (the "number of continuation entries" was only really 2 bytes for alignment).
I'm not so sure this was a good idea now, as an old OS like DOS will have huge problems with UTF8 and file name lengths anyway. Linux, OS X and modern versions of Windows can handle these characters...
Still, there isn't really any reason not to put a starting marker or to make the starting marker a requirement.
Thanks,
Brendan
The problem here is detecting if an entry is a continuation entry or not (if you're unlucky, some file names would cause a continuation entry to look like a standard entry). For example, the UTF character code 0x00C2 (Latin capital letter 'A' with circumflex) is encoded as 2 bytes, 0xC3, 0x82. If the second byte of the character happens to be the first byte in a continuation entry, then software could interpret it as the type byte for a deleted directory entry. In the same way the UTF character code 0x00D0 (Latin capital letter ETH) could look like an entry for a deleted file.Rob wrote:This gets us two extra bytes for the entry name and
a total of 30 + (255 * 64) = 16350 which is imho more
than enough. However, if you truly want unlimited
path names why not simply stop at the continuation
entry that has the \0 terminator in it? No real reason
to store the count in that case. If you decide to go
down that road the second byte could be turned into
a reserved byte (0 for now), that could in the future
incorporate perhaps something else (like attribute flags?).
The previous version of the specification prevented this by making sure that the first byte of any entry (except for continuation entries) is a value that isn't possible in a valid file name. Due to the way UTF8 encoding works, this can be any value between 0x01 and 0x1F (inclusive).
However, you are correct in that the entry type and the flags for standard entries could be combined into a single byte (as long as the only values used are between 0x01 and 0x1F), which would give an extra 2 bytes for entry names (the "number of continuation entries" was only really 2 bytes for alignment).
Originally I thought that it'd be better to limit the characters when a file is created, rather than allowing characters that some OSs (like DOS) would have problems with. For example, in DOS the command "copy foo.txt+bar.txt new.txt" will combine both files ("foo.txt" and "bar.txt") into a single file ("new.txt").Rob wrote:2. when I create a file in windows it says the following characters are not allowed (from what I can generate on my keyboard): / \ : * ? " < > |
so I can make a file "=;,+"! Which would not fit in this filesystem. Why aren't those characters allowed?
I'm not so sure this was a good idea now, as an old OS like DOS will have huge problems with UTF8 and file name lengths anyway. Linux, OS X and modern versions of Windows can handle these characters...
I'm not sure the benefits of this would justify the extra code it'd take or the extra writes it'd need to implement. Can you think of a situation where this would be useful?Rob wrote:3. just to be sure, did you see my previous remark on a timestamp for directories? This so a rename updates the timestamp on the directory as well. Or a tool could touch it to let you know the contents has been updated. Since the bytes are there anyway....
Yes and yes - I'll clarify this for the next version of the specification.Rob wrote:4. the continuation entries most be consecutive, right? That isn't mentioned
5. the idea (I assume) is that the index area can grow down
when you are running out of room?
File recovery isn't something that would actually happen often. For e.g. most people would throw the floppy in the bin and just copy the orginal files to a different floppy and try again. When file recovery is done the recovery utility will need to handle read errors from all over the media - for example, the super-block, several sectors in the index area and several sectors in the data area might be unreadable. In this case the sector containing the starting marker may be unreadable.Rob wrote:6. I'm thinking the starting marker should be a required thing
to implement. Here's why. If the superblock would be corrupt
or unreadable you don't know if there is a starting marker.
So recovery software could find a starting marker where there
might not be one (this may lead to corruption). It should be
a relative easy feature to implement. Just put it in the first
entry, and if you need to grow the index fill that entry with
normal data and move the marker to the new place (you need to
update the super block anyway)
Still, there isn't really any reason not to put a starting marker or to make the starting marker a requirement.
Being fixed!Rob wrote:7. "where every file has one entry" should read "where every file has one or more entries"
Thanks,
Brendan
For all things; perfection is, and will always remain, impossible to achieve in practice. However; by striving for perfection we create things that are as perfect as practically possible. Let the pursuit of perfection be our guide.
Re:Universal File System
That came to my mind too, but that means that you'll have to calculate the string length (iterate until the NULL-byte is reached) everytime you iterate over an index entry. This could become a serious performance hog, so during a simple optimization you'll end up calculating the name length once and store it in memory - which is quite exactly what we are doing here in the index entry itself.Rob wrote: However, if you truly want unlimited
path names why not simply stop at the continuation
entry that has the \0 terminator in it? No real reason
to store the count in that case. If you decide to go
down that road the second byte could be turned into
a reserved byte (0 for now), that could in the future
incorporate perhaps something else (like attribute flags?).
cheers Joe
Re:Universal File System
Hi,
The easiest way to implement three variable sized areas (without over-complicating the code necessary to manage them) would be to find some media with 3 ends. Instead of having a long line of blocks you'd want a triangle, so that each variable sized area can grow from it's corner towards the middle, and the middle would contain the free space.
I'd assume the requirements would be derived from the purpose of the file system. The purpose of the file system would be to provide a modern common format that allows files to be transfered between different "systems" (where a "system" is a computer with an OS and a pile of resources, or an embedded device without an OS and very limited resources, or anything else that deals with files). It is not in any way intended to be a replacement for any OS's "native" file system (e.g. NTFS, ext3, reiserFS, etc).
IMHO the requirements would be:
Not sure if any of this helps though....
Cheers,
Brendan
Typical disk media is like a long line of blocks (or sectors), with 2 ends. With 2 variable sized areas this works well - e.g. a variable size "data area" at one end with a variable sized "index area" at the other, with free space in the middle to allow both to grow.Pype.Clicker wrote: considering the repeated concerns with the file names stuff, i'd tend to suggest to follow the kind of approach that one can find in an executable file, that is
- the 'file entries' are compact. They only contain 'fixed size' stuff such as timestamps, file size, sectors information, etc.
- all the file names are collated together into the "names section" (that could be, for instance, another "special area" or just a specific file entry such as the first one, or the file entry with type==X).
A file entry would therefore not carry the filename but instead the offset of its name in the "names area". That makes reading from the camel ultra-simple: load all the "names file", browse the files entries using names[entry->fname] rather than entry->fname for comparison.
Extending the idea, you could have a couple of "names" rather than a single one: the "prefix" name (reused by all the files in the same directory) and the "own" name (e.g. the filename "per se").
For systems short on RAM, the offset within the name area can be directly translated into a single block to be loaded.
anyone interested ?
The easiest way to implement three variable sized areas (without over-complicating the code necessary to manage them) would be to find some media with 3 ends. Instead of having a long line of blocks you'd want a triangle, so that each variable sized area can grow from it's corner towards the middle, and the middle would contain the free space.
Candy wrote:Before we continue, could we set up a list of requirements for this file system? I expect that would allow us to cancel out a few discussions directly, such as supporting fragmentation versus not supporting it, variable inode count or static, fixed file name length or variable, stuff like that.
I'd assume the requirements would be derived from the purpose of the file system. The purpose of the file system would be to provide a modern common format that allows files to be transfered between different "systems" (where a "system" is a computer with an OS and a pile of resources, or an embedded device without an OS and very limited resources, or anything else that deals with files). It is not in any way intended to be a replacement for any OS's "native" file system (e.g. NTFS, ext3, reiserFS, etc).
IMHO the requirements would be:
- - Simplicity of both file system design and the code needed to support the file system. This is partly because of those embedded devices with very limited resources.
- International. The number of people in the world who use English is smaller than the number of people who don't.
- Free. This means no patents, no copyrights, no restrictions, etc. It also means that a developer working by themselves should be able to implement it without costing them too much time (something that costs too much to implement or support is not "free to use" for all people, as only those who can afford to implement and support it would be able to use it).
- "System" independance. The file system shouldn't need any facility that isn't present in all "systems". I can't think of an example here, but it'd be a bit silly if it needed something that could only be found in OS/2 or something.
- Future proof. This means support for large media that doesn't currently exist, time stamps that can be used for more than 32 years, etc.
- - Security. If you're worried about security then don't put your files on a file system designed for everything to read, or alternatively make sure no-one else can get hold of the media once you have.
- "Advanced" features. Things like supporting sparse files, journalling, indexing, meta-data, soft links and hard links (or shortcuts), special file types (named pipes, devices, etc), file encryption, file compression, etc aren't needed. If you do need these things then you should be using someting like NTFS, ext3, ReiserFS, etc.
Not sure if any of this helps though....
Cheers,
Brendan
For all things; perfection is, and will always remain, impossible to achieve in practice. However; by striving for perfection we create things that are as perfect as practically possible. Let the pursuit of perfection be our guide.
- Pype.Clicker
- Member
- Posts: 5964
- Joined: Wed Oct 18, 2006 2:31 am
- Location: In a galaxy, far, far away
- Contact:
Re:Universal File System
Okay. I better see it now. i guess that makes your design decisions clearer to understand.Brendan wrote: The easiest way to implement three variable sized areas (without over-complicating the code necessary to manage them) would be to find some media with 3 ends. Instead of having a long line of blocks you'd want a triangle, so that each variable sized area can grow from it's corner towards the middle, and the middle would contain the free space.
Oh, btw, i can give you a media with 3 ends if you want: area 'A' grows from sector 0 and extends upwards only on even sectors, area 'B' grows from sector 1 and extends upwards only on odd sectors while area 'X' grows from the last sector downwards using both odd and even sectors
It would probably not be so practical to use, however ... don't take it seriously ;D
Re:Universal File System
It's asking for a 1-dimensional objects with three limits. There are only a maximum and a minimum, and that's all you get.
Re:Universal File System
Hi,
I've updated the draft specification again to take into account the recent discussions.
I still haven't done anything in the "algorithms" part yet - I think I'll wait until the main part of the specification stops changing..
It's available from my web site at:
http://bcos.hopto.org/sfs.html
Cheers,
Brendan
I've updated the draft specification again to take into account the recent discussions.
I still haven't done anything in the "algorithms" part yet - I think I'll wait until the main part of the specification stops changing..
It's available from my web site at:
http://bcos.hopto.org/sfs.html
Cheers,
Brendan
For all things; perfection is, and will always remain, impossible to achieve in practice. However; by striving for perfection we create things that are as perfect as practically possible. Let the pursuit of perfection be our guide.
Re:Universal File System
I'll wait for making the driver until the specification itself is set. At least, until it's somewhat certain.Brendan wrote: I still haven't done anything in the "algorithms" part yet - I think I'll wait until the main part of the specification stops changing..
Re:Universal File System
ive been too busy lately, so i'm a bit late
i'd base it like minix.
this is not designed for large disk FS replacement, for your 256TB rack... im thinking more floppies, flash etc. it set to handle >4gb but not 2^64... max file size is 2gb (for 1kb blocks)... bad block bitmap, good for bad flash etc...
layout;
superblock
none / some reserved space
bad block bitmap
used block bitmap
inodes
root
the superblock has LBA pointers to all things so it can be in any order in any part of disk, thus you could resize inode data etc.
minix does inodes = blocks/3 thing, which is pretty general and seems to work well. if its not enough you can always grow it (as long as its contiguous)
superblock
- int64 - magic
- int64 - number of blocks on disk
- int64 - reserved blocks following first block
- int64 logical block address (LBA) to bitmap of used blocks
- int64 LBA to bitmap of bad blocks
- int64 LBA to inodes
- int16 - block size / 512 (1=512, 2=1k, 8=4k, 16=16k)
32 bytes - utf8 volume name
byte - flags (0x01 - dirty)
id rather have volumename in the superblock than waste an attribute bit just to store it in a directory as a single use 1 off item.
inode
int64 - filesize
int32- last mod datetime stamp (seconds since.. 1900)
1 byte - attributes
int64[3] - direct blocks LBA
int64 - single indirect block LBA
int64 - double indirect block LBA
int64 - triple indirect block LBA
reserved 3 bytes (rounds out to 64)
attributes
0x01 - directory
0x02 - exec
0x04 - read
0x08 - write
0x10 - na
0x20 - na
0x40 - hidden
0x80 - system
directory entry
int32 - inode number
62 bytes - utf8 filename
the first inode (0) is always root directory inode.
the first directory entry is always a backlink to parent directory inode the ".."
i'd base it like minix.
this is not designed for large disk FS replacement, for your 256TB rack... im thinking more floppies, flash etc. it set to handle >4gb but not 2^64... max file size is 2gb (for 1kb blocks)... bad block bitmap, good for bad flash etc...
layout;
superblock
none / some reserved space
bad block bitmap
used block bitmap
inodes
root
the superblock has LBA pointers to all things so it can be in any order in any part of disk, thus you could resize inode data etc.
minix does inodes = blocks/3 thing, which is pretty general and seems to work well. if its not enough you can always grow it (as long as its contiguous)
superblock
- int64 - magic
- int64 - number of blocks on disk
- int64 - reserved blocks following first block
- int64 logical block address (LBA) to bitmap of used blocks
- int64 LBA to bitmap of bad blocks
- int64 LBA to inodes
- int16 - block size / 512 (1=512, 2=1k, 8=4k, 16=16k)
32 bytes - utf8 volume name
byte - flags (0x01 - dirty)
id rather have volumename in the superblock than waste an attribute bit just to store it in a directory as a single use 1 off item.
inode
int64 - filesize
int32- last mod datetime stamp (seconds since.. 1900)
1 byte - attributes
int64[3] - direct blocks LBA
int64 - single indirect block LBA
int64 - double indirect block LBA
int64 - triple indirect block LBA
reserved 3 bytes (rounds out to 64)
attributes
0x01 - directory
0x02 - exec
0x04 - read
0x08 - write
0x10 - na
0x20 - na
0x40 - hidden
0x80 - system
directory entry
int32 - inode number
62 bytes - utf8 filename
the first inode (0) is always root directory inode.
the first directory entry is always a backlink to parent directory inode the ".."
-- Stu --
Re:Universal File System
I totally agree with pype, an I-node based fs will be better it will allow hard links & symbolic links.Pype.Clicker wrote: considering the repeated concerns with the file names stuff, i'd tend to suggest to follow the kind of approach that one can find in an executable file, that is
- the 'file entries' are compact. They only contain 'fixed size' stuff such as timestamps, file size, sectors information, etc.
- all the file names are collated together into the "names section" (that could be, for instance, another "special area" or just a specific file entry such as the first one, or the file entry with type==X).
A file entry would therefore not carry the filename but instead the offset of its name in the "names area". That makes reading from the camel ultra-simple: load all the "names file", browse the files entries using names[entry->fname] rather than entry->fname for comparison.
Extending the idea, you could have a couple of "names" rather than a single one: the "prefix" name (reused by all the files in the same directory) and the "own" name (e.g. the filename "per se").
For systems short on RAM, the offset within the name area can be directly translated into a single block to be loaded.
anyone interested ?
And adding a new block for filename will allow for better naming schemes we will not tied up for 8 bytes for name or so...
This will also make it easy for UTF-8 or any other encoded filenames.
Assembler,
Re:Universal File System
I would prefer that soft-links (that's what windows uses for shortcuts right?) were supported. Though doesn't windows just store them as a file that tells it where to go anyway?