APPENDIX D –  Interchange File Format (IFF) 1.0.

The Interchange File Format (IFF) is a binary file with an AmigaOS chunk structure (like IFF-ILBM, AIFF, etc). All chunks are optional (with the exception of the first) and the structure is totally expandable.

Conventions for numeric formats:

Type

Size (bit)

Description

BYTE

8

Byte integer

UBYTE

8

Unsigned byte integer

WORD

16

Two byte integer with in little endian format

UWORD

16

Two byte unsigned integer in little endian format

LONG

32

Four byte integer with in little endian format

ULONG

32

Four byte unsigned integer with in little endian format

FLOAT

32

Single precision IEEE float (4 bytes) in little endian format

DOUBLE

64

Double precision IEEE float (8 bytes) in little endian format

If your hardware don’t support the little endian numeric format, you must change it swapping all bytes.
A chunk is a data block with a 8 byte header.

Chunk {
    HEADER (8 bytes)
    Data
    …
}

The first 4 bytes are a string that identify the chunk type and the last 4 bytes (an ULONG) are the size of Data block, like showed in the following C structure:

typedef struct {
    UBYTE Hdr[4];
    ULONG Size;
} HEADER;

 

IFF file structure:

IFF_File {
    HEADER {
        "FORM",
        8 + sizeof(Chunk_1) + sizeof(Chunk_1) + … + sizeof(Chunk_N)
    }

    HEADER {
        "MOLE",
        sizeof(Chunk_1) + sizeof(Chunk_1) + … + sizeof(Chunk_N)
    }

    Chunk_1
    Chunk_2
    …
    Chunk_N

}

All IFF files are a sequence of chunks with a 8 bytes standard. The recognition string (Hdr field) is FORM and the Size field contains the size of all chunks and the size of subformat recognition header (8 bytes). The IFF is a family of binary files that can store a variety of data like audio, image, video and molecule. To recognize the subformat you must read the next 8 byte header. This second header has the sub-recognition string MOLE and the size field with the value of the size of all data chunks.

 

ATOM – Atom name chunk:

ATOM {
    HEADER {
        "ATOM",
        2 * TotAtm
    }
    ULONG TotAtm;
    UBYTE Element[2][TotAtm];
}

This is the only non-optional chunk. TotAtm is the total number of atoms stored in the file. Element is a two byte matrix that contains the chemical element name for each atom. If the element name has the size of one character (e.g. H, C, O, S, etc), the second byte must be a space. For the chlorine benzene (C6H5Cl, 12 atoms) molecule the ATOM chunk must be:

ATOM {
    HEADER {
        "ATOM",
        24,
    }
    "C ", "C ", "C ", "C ", "C ",
    "H ", "H ", "H ", "H ", "Cl",
}

 

XYZ1 – Single precision cartesian coordinate chunk:

XYZ1 {
    HEADER {
        "XYZ1",
        TotAtm * 12
    }
    XYZ[TotAtm] {
        FLOAT x;
        FLOAT y;
        FLOAT z;
    }
}

In this chunk are reported the cartesian coordinates for each atom in single precision floating-point format. For benzene (C6H6) this chunk can be:

XYZ1 {
    HEADER {
        "XYZ1",
        144
    }
    XYZ {
        { 0.695,  1.203,  0.000 },
        {-0.695,  1.203, -0.002 },
        {-1.389,  0.000, -0.006 },
        {-0.695, -1.203, -0.007 },
        { 0.695, -1.203, -0.006 },
        { 1.389,  0.000, -0.002 },
        { 1.235,  2.139,  0.003 },
        {-1.235,  2.139, -0.001 },
        {-2.470,  0.000, -0.007 },
        {-1.235, -2.139, -0.010 },
        { 1.235, -2.139, -0.007 },
        { 2.470,  0.000, -0.001 }
    }
}

 

XYZ2 – Double precision cartesian coordinate chunk:

XYZ2 {
    HEADER {
        "XYZ1",
        TotAtm * 24
    }
    XYZ[TotAtm] {
        DOUBLE x;
        DOUBLE y;
        DOUBLE z;
    }
}

This chunk is very similar to XYZ1, but the coordinates are stored in double precision floating point format.

 

CONX – Connectivity chunk:

CONX {
    HEADER {
        "CONX",
        TotBond * 9
    }
    ULONG TotBond;
    CONN[TotBond] {
        ULONG Atom1;
        ULONG Atom2;
        UBYTE BondOrder;
    }
}

This chunk is needed to indicate the connectivity between pairs of atoms. TotBond is the number of bonds, Atom1 and Atom2 is the pair of connected atom and BondOrder specifies the bond order.

 

IIUB – IUPAC IUB atom name chunk:

IIUB {
    HEADER {
        "IIUB",
        TotAtm * IUB_Len
    }
    UBYTE IUB_Len;
    UBYTE Name[IUB_Len][TotAtm];
}

In this chunk you can find the IUPAC IUB atom names. IUB_Len is the length of Name record.

 

CALC – Potential and atomic charges chunk:

CALC {
    HEADER {
        "CALC",
        18 + sizeof(ForceFieldName[ ]) + TotAtm * (4 + ATPY_Len)
    }
    UBYTE ForceFieldName[ ];
    HEADER {
        "CHRG",
        TotAtm * 4
    }
    FLOAT AtmCharge[TotAtm];

    HEADER {
        "ATYP",
        TotAtm * ATPY_Len
    }
    UBYTE ATPY_Len;
    UBYTE AtmType[ATPY_Len][TotAtm];
}

 

RSNU – Residue number chunk:

RSNU {
    HEADER {
        "RSNU",
        TotAtm * 4
    }
    UBYTE ResNum[4][TotAtm]
}

It's the residue number for each atom. The number ResNum is a four character string, because in this way you can include the chain indicator (e.g. "99 A").

 

RSNA – Residue name chunk:

RSNA {
    HEADER {
        "RSNA",
        TotRes * 4
    }
    UWORD TotRes;
    RESNAME[TotRes] {
        UBYTE ResNum[4];
        UBYTE ResName[4];
    }
}

This chunk is useful to translate the residue number ResNum into residue name ResName. TotRes is the total number of residue in the file.

 

MOLN – Name of the molecules chunk:

MOLN {
    HEADER {
        "MOLN",
        sizeof(MOLNAME[ ])
    }
    UWORD TotMol;
    MOLNAME[TotMol] {
        ULONG AtmStart;
        ULONG AtmNum;
        UBYTE MolName[ ];
    }
}

The IFF file format can include more than one molecule. TotMol is the total number of molecules, AtmStart is the first atom number of the molecule, AtmNum is the number of atoms in the selected molecule and MolName is a c-string (null terminated) for the molecule name.

 

COMM – Remark chunk:

COMM {
    HEADER {
        "COMM",
        1 + sizeof(Remark[ ])
    }
    UBYTE Remark[ ];
}

In this chunk, you can include any remark.

 

C subroutines and definitions:

In order to simplify the C programming, are reported some useful definitions and subroutines:

 

/**** Definitions ****/

typedef char             BYTE;
typedef unsigned char    UBYTE;
typedef int              LONG;
typedef unsigned int     ULONG;
typedef short            WORD;
typedef unsigned short   UWORD;
typedef float            FLOAT;
typedef double           DOUBLE;

/*** IFF Chunk header ****/

typedef struct {
    char Hdr[4];
    ULONG Size;
} IFFHDR;

/**** Prototypes ****/

void SwapW(void *);
void SwapL(void *);
void SwapD(void *);

/**** Change the endian for WORD and UWORD ****/

void SwapW(register void *Val)
{
    register UBYTE T;

    T = ((UBYTE *)Val)[0];
    ((UBYTE *)Val)[0] = ((UBYTE *)Val)[1];
    ((UBYTE *)Val)[1] = T;
}

/**** Change endian for LONG, ULONG and FLOAT ****/

void SwapL(register void *Val)
{
    register UBYTE T;

    T = ((UBYTE *)Val)[0];
    ((UBYTE *)Val)[0] = ((UBYTE *)Val)[3];
    ((UBYTE *)Val)[3] = T;
    T = ((UBYTE *)Val)[1];
    ((UBYTE *)Val)[1] = ((UBYTE *)Val)[2];
    ((UBYTE *)Val)[2] = T;
}

/**** Change the endian for DOUBLE ****/

void SwapD(void *Val)
{
    register UBYTE T;

    T = ((UBYTE *)Val)[0];
    ((UBYTE *)Val)[0] = ((UBYTE *)Val)[7];
    ((UBYTE *)Val)[7] = T;
    T = ((UBYTE *)Val)[1];
    ((UBYTE *)Val)[1] = ((UBYTE *)Val)[6];
    ((UBYTE *)Val)[6] = T;
    T = ((UBYTE *)Val)[2];
    ((UBYTE *)Val)[2] = ((UBYTE *)Val)[5];
    ((UBYTE *)Val)[5] = T;
    T = ((UBYTE *)Val)[3];
    ((UBYTE *)Val)[3] = ((UBYTE *)Val)[4];
    ((UBYTE *)Val)[4] = T;
}