Changes between Version 15 and Version 16 of FileCompression


Ignore:
Timestamp:
Aug 2, 2011, 12:17:09 AM (13 years ago)
Author:
davea
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • FileCompression

    v15 v16  
    11[[PageOutline]]
    22= File compression =
    3 == Compression of output files == #compress-output
    4 
    5 If you include the `<gzip_when_done>` tag in an [XmlFormat#Files output file description], the file will be gzip-compressed after it has been generated.
    6 
    7 The gzip_when_done is only supported in client version 5.8 (version # needs to be confirmed) and more recently.  If you will receive files from clients that do not support the gzip_when_done flag then you should open the files with a function similar to this to your validator/assimilator:
     3
     4== BOINC-supplied compression ==
     5
     6=== Compression of input files === #compress-input
     7
     8Starting with version 5.4,
     9the BOINC client is able to handle HTTP `Content-Encoding` types
     10'deflate' (zlib algorithm) and 'gzip' (gzip algorithm).
     11The client decompresses these files 'on the fly'
     12and stores them on disk in uncompressed form.
     13This can be used in the following two ways.
     14
     15Both methods store files uncompressed on the client.
     16If you need compression on the client,
     17you must do it at the application level (see below).
     18
     19==== gzip encoding ====
     20
     21To use this method, gzip your downloadable files,
     22giving them a filename suffix such as '.gz'.
     23(The name used in your `<file_info>` elements,
     24however, is the original filename without '.gz').
     25
     26Include the following line in `httpd.conf`:
     27{{{
     28AddEncoding x-gzip .gz
     29}}}
     30and restart apache.
     31
     32This method has the advantage of reducing server disk usage and server CPU load,
     33but it will only work with 5.4+ clients.
     34BOINC clients older than 5.4 won't be able to download files.
     35Use the 'min_core_client_version' entry in config.xml to enforce this.
     36
     37==== Apache mod_deflate ====
     38
     39You can use the Apache 2.0 mod_deflate module to automatically compress files on the fly.
     40See http://httpd.apache.org/docs/2.0/mod/mod_deflate.html.
     41This method will work with all BOINC clients,
     42but it will do compression only for 5.4+ clients.
     43
     44You can use this in conjunction with gzip encoding because the mod_deflate module
     45allows you to exempt certain filetypes from on-the-fly compression.
     46
     47This method increases CPU load on the web server,
     48but this is typically not significant.
     49
     50You'll need to modify your `httpd.conf` file; example:
     51{{{
     52# Enable module
     53LoadModule deflate_module modules/mod_deflate.so
     54
     55# Log file compression
     56DeflateFilterNote Input instream
     57DeflateFilterNote Output outstream
     58DeflateFilterNote Ratio ratio
     59
     60LogFormat '"%r" %{outstream}n/%{instream}n (%{ratio}n%%)' deflate
     61CustomLog logs/deflate_log deflate
     62
     63# Use low settings for compression to make sure impact on server is low
     64DeflateMemLevel 2
     65DeflateCompressionLevel 2
     66
     67Alias /boinc/download /path/to/files/download
     68
     69<Directory /path/to/files/download>
     70SetOutputFilter DEFLATE
     71SetEnvIfNoCase Request_URI \.(?:gz|gif|jpg|jpeg|png)$ no-gzip dont-vary
     72</Directory>
     73}}}
     74
     75This configuration tells Apache to compress all files served from
     76the download direction except for files that end with `gz`,`gif`,`jpg`,`jpeg` and `png`.
     77An alternate way to specify the files is the following:
     78{{{
     79Alias /boinc/download /path/to/files/download
     80
     81<Directory /path/to/files/download>
     82AddOutputFilter DEFLATE .faa .mask
     83</Directory>
     84}}}
     85This configuration tells Apache to compress only the file types
     86`.faa` and `.mask` served from the download directory.
     87
     88=== Compression of output files === #compress-output
     89
     90If you include the `<gzip_when_done>` tag in an [XmlFormat#Files output file description],
     91the file will be gzip-compressed after it has been generated.
     92
     93The gzip_when_done is only supported in client version 5.8+.
     94If you receive files from clients that do not support the gzip_when_done flag,
     95then you should open the files with a function similar
     96to this to your validator/assimilator:
    897{{{
    998#!cpp
     
    12101    char cmd[512];
    13102    char buf[4096];
     103
    14104    result->erase();
    15 
    16     //build the command
    17     cmd[0]='\0';
    18     strcat(cmd, "gzip -dcf ");
    19     strcat(cmd, file.c_str());
    20 
     105    sprintf(cmd, "gzip -dcf %s", file.c_str());
    21106    infile = popen(cmd, "r");
    22107    if (infile == NULL) {
     
    29114    result->append("\0");
    30115    if (pclose(infile) != 0) {
    31             fprintf(stderr, "%s: pclose failed\n", file.c_str());
    32             return 1;
     116        fprintf(stderr, "%s: pclose failed\n", file.c_str());
     117        return 1;
    33118    }
    34119    return 0;
     
    36121}}}
    37122
    38 This will automatically uncompress the file if it is compressed or it will open it without modification if it is not compressed.
    39 
    40 == Compression of input files == #compress-input
    41 
    42 Starting with version 5.4, the BOINC client is able to handle HTTP `Content-Encoding` types 'deflate' (zlib algorithm) and 'gzip' (gzip algorithm). The client decompresses these files 'on the fly' and stores them on disk in uncompressed form.
    43 
    44 You can use this in two ways:
    45 
    46     * Use the Apache 2.0 mod_deflate module to automatically compress files on the fly. This method will work with all BOINC clients, but it will do compression only for 5.4+ clients. See [#mod_deflate Using mod_deflate].
    47     * Compress files and give them a filename suffix such as '.gz'. The name used in your `<file_info>` elements, however, is the original filename without '.gz'. BOINC clients older than 5.4 won't be able to download files.
    48 
    49 Include the following line in `httpd.conf`:
    50 
    51 {{{
    52 AddEncoding x-gzip .gz
    53 }}}
    54 
    55 and restart apache.
    56 This will add the content encoding to the header so that the client will decompress the file automatically.
    57 This method has the advantage of reducing server disk usage and server CPU load,
    58 but it will only work with 5.4+ clients.
    59 Use the 'min_core_version' field of the app_version table to enforce this.
    60 You can use this in conjunction because the mod_deflate module
    61 allows you to exempt certain filetypes from on-the-fly compression.
    62 
    63 Both methods store files uncompressed on the client. If you need compression on the client, you must do it at the application level. The BOINC source distribution includes a version of the zip library designed for use by BOINC applications on any platform (see below).
    64 
    65 
    66 === Using mod_deflate === #mod_deflate
    67 
    68 Apache 2.0 includes a module called mod_deflate.
    69 You can read about it here:
    70 http://httpd.apache.org/docs/2.0/mod/mod_deflate.html
    71 
    72 This module allows you to specify that certain files will be
    73 compressed dynamically when it is being sent to clients that specify
    74 that they can handle it.
    75 The BOINC client 5.4 and higher includes the ability to
    76 decompress compressed files as they are downloaded.
    77 If a BOINC client 5.2 or earlier requests work,
    78 then the server will simply not compress the file so that
    79 the client can handle the file.
    80 We were expecting to only compress a few key files due to
    81 the expected load on the server.
    82 However, it turns out that the load on the server
    83 is actually quite small so we are compressing most of the files
    84 downloaded from our servers.
    85 Adding the compression on the fly only added about 5%
    86 to the system CPU utilization (obviously it will vary
    87 based on the power of your servers).
    88 
    89 You need to read the Apache 2.0 documentation about this
    90 module to make sure you understand it.
    91 However, our `httpd.conf` file for these changes includes the following:
    92 {{{
    93 # Enable module
    94 LoadModule deflate_module modules/mod_deflate.so
    95 
    96 # Log file compression
    97 DeflateFilterNote Input instream
    98 DeflateFilterNote Output outstream
    99 DeflateFilterNote Ratio ratio
    100 
    101 LogFormat '"%r" %{outstream}n/%{instream}n (%{ratio}n%%)' deflate
    102 CustomLog logs/deflate_log deflate
    103 
    104 # Use low settings for compression to make sure impact on server is low
    105 DeflateMemLevel 2
    106 DeflateCompressionLevel 2
    107 
    108 Alias /boinc/download /path/to/files/download
    109 
    110 <Directory /path/to/files/download>
    111 SetOutputFilter DEFLATE
    112 SetEnvIfNoCase Request_URI \.(?:gz|gif|jpg|jpeg|png)$ no-gzip dont-vary
    113 </Directory>
    114 }}}
    115 
    116 This configuration tells Apache to compress all files served from
    117 the download direction except for files that end with `gz`,`gif`,`jpg`,`jpeg` and `png`.
    118 An alternate way to specify the files is the following:
    119 {{{
    120 Alias /boinc/download /path/to/files/download
    121 
    122 <Directory /path/to/files/download>
    123 AddOutputFilter DEFLATE .faa .mask
    124 </Directory>
    125 }}}
    126 This configuration tells Apache to compress only the file types
    127 `.faa` and `.mask` served from the download directory.
    128 
    129 == Using boinc_zip == #boinc-zip
     123This will uncompress the file if it is compressed or will read it
     124without modification if it is not compressed.
     125
     126== Application-level compression ==
     127
     128=== Using boinc_zip === #boinc-zip
    130129
    131130You can also do compression in your application.
    132131To assist this, BOINC provides a library
    133 boinc_zip, based on the [http://www.info-zip.org Info-Zip] libraries, but combines both zip & unzip
     132boinc_zip, based on the [http://www.info-zip.org Info-Zip] libraries,
     133but combines both zip & unzip
    134134functionality in one library.
    135135Any questions/comments please email Carl Christensen  (carlgt1 at yahoo dot com)
     
    142142distributing `zip` & `unzip` executable binaries for different platforms).
    143143
    144 === Limitations === #boinc-zip-limitations
     144==== Limitations ==== #boinc-zip-limitations
    145145The "unzip" functionality is there, that is you can unzip
    146146a file and it will create all directories & files in the zip file. 
     
    150150function which will be explained below.
    151151
    152 === Building === #boinc-zip-building
     152==== Building ==== #boinc-zip-building
    153153
    154154For Windows, you can just add the project "boinc_zip" to your
     
    165165to build properly for your platform.
    166166
    167 Also, please note that boinc_zip relies on some BOINC functions that you will need (and will most likely be in your app already since they are handy) -- namely `boinc/lib/filesys.C` and `boinc/lib/util.C`.
    168 
    169 === Using === #boinc-zip-using
     167Also, please note that boinc_zip relies on some BOINC functions that you will need
     168(and will most likely be in your app already since they are handy) --
     169namely `boinc/lib/filesys.C` and `boinc/lib/util.C`.
     170
     171==== Using ==== #boinc-zip-using
    170172Basically, you will need to `#include "boinc_zip.h"` in your app (of course
    171173your compiler will need to know where it is, i.e. -I../boinc/zip).
     
    198200`ZipFileList` instance, and then pass this into `boinc_filelist` as follows:
    199201{{{
    200 bool boinc_filelist(const std::string directory,
    201                   const std::string pattern,
    202                   ZipFileList* pList,
    203                   const unsigned char ucSort = SORT_NAME | SORT_DESCENDING,
    204                   const bool bClear = true);
     202bool boinc_filelist(
     203        const std::string directory,
     204    const std::string pattern,
     205    ZipFileList* pList,
     206        const unsigned char ucSort = SORT_NAME | SORT_DESCENDING,
     207        const bool bClear = true
     208);
    205209}}}
    206210if you want to zip up all text (.txt) files in a directory, just pass in:
     
    220224described above).
    221225
    222 === Getting boinc_zip === #boinc-zip-getting
    223 
    224 boinc_zip is no longer in the main boinc subversion "trunk" but resides in this "depends" brance:
     226==== Getting boinc_zip ==== #boinc-zip-getting
     227
     228boinc_zip is no longer in the main boinc subversion "trunk"
     229but resides in this "depends" brance:
    225230
    226231svn co http://boinc.berkeley.edu/svn/trunk/depends_projects/zip
    227232
    228 Note for Linux/Mac:  To build along with the other boinc libraries, you will need to add the following lines to the bottom of the '''configure.ac''' file (where the various Makefiles are listed):
     233Note for Linux/Mac:  To build along with the other boinc libraries,
     234you will need to add the following lines to the bottom of the '''configure.ac''' file
     235(where the various Makefiles are listed):
    229236
    230237{{{
     
    235242
    236243
    237 Similarly for the '''Makefile.am''' file -- add zip, zip/zip and zip/unzip to the library subdirs:
     244Similarly for the '''Makefile.am''' file -- add zip, zip/zip and zip/unzip
     245to the library subdirs:
    238246
    239247{{{
     
    244252
    245253
    246 == Client and Server Compression and Decompression using gzip (zlib) == #gzip
    247 
    248 These basic routines may be useful if you want to compress/decompress a file using the zlib library (usually called "libz.a" and available for most platforms).  Include the header file below (qcn_gzip.h) in your program, and link against libz, and you will gain two simple to use functions for gzip'ing or gunzip'ing a file.  This is for simple single file or file-by-file compression or decompression (i.e. one file that is to be compressed into a .gz or decompressed back to it's original uncompressed state).  You can check for boinc client status if you want the ability to quit inside an operation etc.
     254=== Using gzip (zlib) === #gzip
     255
     256These basic routines may be useful if you want to compress/decompress a file
     257using the zlib library (usually called "libz.a" and available for most platforms).
     258Include the header file below (qcn_gzip.h) in your program, and link against libz,
     259and you will gain two simple to use functions for gzip'ing or gunzip'ing a file.
     260This is for simple single file or file-by-file compression or decompression
     261(i.e. one file that is to be compressed into a .gz or decompressed back to
     262it's original uncompressed state).
     263You can check for boinc client status if you want the ability to quit
     264inside an operation etc.
    249265
    250266qcn_gzip.h:
     
    270286#include "qcn_gzip.h"
    271287
    272 int do_gzip(const char* strGZ, const char* strInput)
    273 {
    274         // take an input file (strInput) and turn it into a compressed file (strGZ)
    275         // get rid of the input file after
    276         FILE* fIn = boinc_fopen(strInput, "rb");
    277         if (!fIn)  return 1; //error
    278         gzFile fOut = gzopen(strGZ, "wb");
    279         if (!fOut) return 1; //error
    280         fseek(fIn, 0, SEEK_SET);  // go to the top of the files
    281         gzseek(fOut, 0, SEEK_SET);
    282         unsigned char buf[1024];
    283         long lRead = 0, lWrite = 0;
    284         while (!feof(fIn)) { // read 1KB at a time until end of file
    285                 memset(buf, 0x00, 1024);
    286                 lRead = 0;
    287                 lRead = (long) fread(buf, 1, 1024, fIn);
    288                 lWrite = (long) gzwrite(fOut, buf, lRead);
    289                 if (lRead != lWrite) break;
    290         }
    291         gzclose(fOut);
    292         fclose(fIn);
    293         if (lRead != lWrite) return 1;  //error -- read bytes != written bytes
    294         // if we made it here, it compressed OK, can erase strInput and leave
    295         boinc_delete_file(strInput);
    296         return 0;
     288int do_gzip(const char* strGZ, const char* strInput) {
     289    // take an input file (strInput) and turn it into a compressed file (strGZ)
     290    // get rid of the input file after
     291    FILE* fIn = boinc_fopen(strInput, "rb");
     292    if (!fIn)  return 1; //error
     293    gzFile fOut = gzopen(strGZ, "wb");
     294    if (!fOut) return 1; //error
     295    fseek(fIn, 0, SEEK_SET);  // go to the top of the files
     296    gzseek(fOut, 0, SEEK_SET);
     297    unsigned char buf[1024];
     298    long lRead = 0, lWrite = 0;
     299    while (!feof(fIn)) { // read 1KB at a time until end of file
     300        memset(buf, 0x00, 1024);
     301        lRead = 0;
     302        lRead = (long) fread(buf, 1, 1024, fIn);
     303        lWrite = (long) gzwrite(fOut, buf, lRead);
     304        if (lRead != lWrite) break;
     305    }
     306    gzclose(fOut);
     307    fclose(fIn);
     308    if (lRead != lWrite) return 1;  //error -- read bytes != written bytes
     309    // if we made it here, it compressed OK, can erase strInput and leave
     310    boinc_delete_file(strInput);
     311    return 0;
    297312}
    298313
    299314// CMC - commented out status calls, are they too paranoid?
    300 //      if needed use sm->statusBOINC instead (for quit_request etc)
    301 
    302 int do_gunzip(const char* strGZ, const char* strInput, bool bKeep)
    303 {
    304         // take an input file (strInput) and turn it into a compressed file (strGZ)
    305         // get rid of the input file after
    306         //s.quit_request = 0;
    307         //checkBOINCStatus();
    308         FILE* fIn = boinc_fopen(strInput, "wb");
    309         if (!fIn)  return 1; //error
    310         gzFile fOut = gzopen(strGZ, "rb");
    311         if (!fOut) return 1; //error
    312         fseek(fIn, 0, SEEK_SET);  // go to the top of the files
    313         gzseek(fOut, 0, SEEK_SET);
    314         unsigned char buf[1024];
    315         long lRead = 0, lWrite = 0;
    316         while (!gzeof(fOut)) { // read 1KB at a time until end of file
    317                 memset(buf, 0x00, 1024);
    318                 lRead = 0;
    319                 lRead = (long) gzread(fOut,buf,1024);
    320                 lWrite = (long) fwrite(buf, 1, 1024, fIn);
    321                 if (lRead != lWrite) break;
    322                 //boinc_get_status(&s);
    323                 //if (s.quit_request || s.abort_request || s.no_heartbeat) break;
    324         }
    325         gzclose(fOut);
    326         fclose(fIn);
    327         //checkBOINCStatus();
    328         if (lRead != lWrite) return 1;  //error -- read bytes != written bytes
    329         // if we made it here, it compressed OK, can erase strInput and leave
    330         if (!bKeep) boinc_delete_file(strGZ);
    331         return 0;
     315//      if needed use sm->statusBOINC instead (for quit_request etc)
     316
     317int do_gunzip(const char* strGZ, const char* strInput, bool bKeep) {
     318        // take an input file (strInput) and turn it into a compressed file (strGZ)
     319        // get rid of the input file after
     320        //s.quit_request = 0;
     321        //checkBOINCStatus();
     322        FILE* fIn = boinc_fopen(strInput, "wb");
     323        if (!fIn)  return 1; //error
     324        gzFile fOut = gzopen(strGZ, "rb");
     325        if (!fOut) return 1; //error
     326        fseek(fIn, 0, SEEK_SET);  // go to the top of the files
     327        gzseek(fOut, 0, SEEK_SET);
     328        unsigned char buf[1024];
     329        long lRead = 0, lWrite = 0;
     330        while (!gzeof(fOut)) { // read 1KB at a time until end of file
     331                memset(buf, 0x00, 1024);
     332                lRead = 0;
     333                lRead = (long) gzread(fOut,buf,1024);
     334                lWrite = (long) fwrite(buf, 1, 1024, fIn);
     335                if (lRead != lWrite) break;
     336                //boinc_get_status(&s);
     337                //if (s.quit_request || s.abort_request || s.no_heartbeat) break;
     338        }
     339        gzclose(fOut);
     340        fclose(fIn);
     341        //checkBOINCStatus();
     342        if (lRead != lWrite) return 1;  //error -- read bytes != written bytes
     343        // if we made it here, it compressed OK, can erase strInput and leave
     344        if (!bKeep) boinc_delete_file(strGZ);
     345        return 0;
    332346}
    333347
    334348}}}
     349