Class: Mongo::GridIO

Inherits:
Object show all
Includes:
WriteConcern
Defined in:
lib/mongo/gridfs/grid_io.rb

Overview

GridIO objects represent files in the GridFS specification. This class manages the reading and writing of file chunks and metadata.

Constant Summary

DEFAULT_CHUNK_SIZE =
256 * 1024
DEFAULT_CONTENT_TYPE =
'binary/octet-stream'
PROTECTED_ATTRS =
[:files_id, :file_length, :client_md5, :server_md5]

Instance Attribute Summary (collapse)

Attributes included from WriteConcern

#legacy_write_concern

Instance Method Summary (collapse)

Methods included from WriteConcern

#get_write_concern, gle?, #write_concern_from_legacy

Constructor Details

- (GridIO) initialize(files, chunks, filename, mode, opts = {})

Create a new GridIO object. Note that most users will not need to use this class directly; the Grid and GridFileSystem classes will instantiate this class

Parameters:

  • files (Mongo::Collection)

    a collection for storing file metadata.

  • chunks (Mongo::Collection)

    a collection for storing file chunks.

  • filename (String)

    the name of the file to open or write.

  • mode (String)

    ‘r’ or ‘w’ or reading or creating a file.

  • opts (Hash) (defaults to: {})

    a customizable set of options

Options Hash (opts):

  • :query (Hash)

    a query selector used when opening the file in ‘r’ mode.

  • :query_opts (Hash)

    any query options to be used when opening the file in ‘r’ mode.

  • :fs_name (String)

    the file system prefix.

  • (262144) (Integer)

    :chunk_size size of file chunks in bytes.

  • :metadata (Hash) — default: {}

    any additional data to store with the file.

  • :_id (ObjectId) — default: ObjectId

    a unique id for the file to be use in lieu of an automatically generated one.

  • :content_type (String) — default: 'binary/octet-stream'

    If no content type is specified, the content type will may be inferred from the filename extension if the mime-types gem can be loaded. Otherwise, the content type ‘binary/octet-stream’ will be used.

  • :w (String, Integer, Symbol) — default: 1

    Set the write concern

    Notes on write concern:

    
      When :w > 0, the chunks sent to the server
      will be validated using an md5 hash. If validation fails, an exception will be raised.
    


54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/mongo/gridfs/grid_io.rb', line 54

def initialize(files, chunks, filename, mode, opts={})
  @files          = files
  @chunks         = chunks
  @filename       = filename
  @mode           = mode
  opts            = opts.dup
  @query          = opts.delete(:query) || {}
  @query_opts     = opts.delete(:query_opts) || {}
  @fs_name        = opts.delete(:fs_name) || Grid::DEFAULT_FS_NAME
  @write_concern  = get_write_concern(opts)
  @local_md5      = Digest::MD5.new if Mongo::WriteConcern.gle?(@write_concern)
  @custom_attrs   = {}

  case @mode
    when 'r' then init_read
    when 'w' then init_write(opts)
    else
      raise GridError, "Invalid file mode #{@mode}. Mode should be 'r' or 'w'."
  end
end

Instance Attribute Details

- (Object) chunk_size (readonly)

Returns the value of attribute chunk_size



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def chunk_size
  @chunk_size
end

- (Object) client_md5 (readonly)

Returns the value of attribute client_md5



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def client_md5
  @client_md5
end

- (Object) content_type (readonly)

Returns the value of attribute content_type



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def content_type
  @content_type
end

- (Object) file_length (readonly)

Returns the value of attribute file_length



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def file_length
  @file_length
end

- (Object) file_position (readonly)

Returns the value of attribute file_position



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def file_position
  @file_position
end

- (Object) filename (readonly)

Returns the value of attribute filename



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def filename
  @filename
end

- (Object) files_id (readonly)

Returns the value of attribute files_id



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def files_id
  @files_id
end

- (Object) metadata (readonly)

Returns the value of attribute metadata



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def 
  
end

- (Object) server_md5 (readonly)

Returns the value of attribute server_md5



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def server_md5
  @server_md5
end

- (Object) upload_date (readonly)

Returns the value of attribute upload_date



28
29
30
# File 'lib/mongo/gridfs/grid_io.rb', line 28

def upload_date
  @upload_date
end

Instance Method Details

- (Object) [](key)



75
76
77
# File 'lib/mongo/gridfs/grid_io.rb', line 75

def [](key)
  @custom_attrs[key] || instance_variable_get("@#{key.to_s}")
end

- (Object) []=(key, value)



79
80
81
82
83
84
85
86
# File 'lib/mongo/gridfs/grid_io.rb', line 79

def []=(key, value)
  if PROTECTED_ATTRS.include?(key.to_sym)
    warn "Attempting to overwrite protected value."
    return nil
  else
    @custom_attrs[key] = value
  end
end

- (Object) cache_chunk_data



372
373
374
375
376
377
378
# File 'lib/mongo/gridfs/grid_io.rb', line 372

def cache_chunk_data
  @current_chunk_data = @current_chunk['data'].to_s
  if @current_chunk_data.respond_to?(:force_encoding)
    @current_chunk_data.force_encoding("binary")
  end
  @chunk_data_length  = @current_chunk['data'].length
end

- (Object) check_existing_file



438
439
440
441
442
# File 'lib/mongo/gridfs/grid_io.rb', line 438

def check_existing_file
  if @files.find_one('_id' => @files_id)
    raise GridError, "Attempting to overwrite with Grid#put. You must delete the file first."
  end
end

- (BSON::ObjectId) close

Creates or updates the document from the files collection that stores the chunks’ metadata. The file becomes available only after this method has been called.

This method will be invoked automatically when on GridIO#open is passed a block. Otherwise, it must be called manually.

Returns:



233
234
235
236
237
238
239
240
241
242
# File 'lib/mongo/gridfs/grid_io.rb', line 233

def close
  if @mode[0] == ?w
    if @current_chunk['n'].zero? && @chunk_position.zero?
      warn "Warning: Storing a file with zero length."
    end
    @upload_date = Time.now.utc
    id = @files.insert(to_mongo_object)
  end
  id
end

- (Mongo::GridIO) each { ... }

Read a chunk of the data from the file and yield it to the given block.

Note that this method reads from the current file position.

Yields:

  • Yields on chunk per iteration as defined by this file’s chunk size.

Returns:



253
254
255
256
257
258
259
260
# File 'lib/mongo/gridfs/grid_io.rb', line 253

def each
  return read_all unless block_given?
  while chunk = read(chunk_size)
    yield chunk
    break if chunk.empty?
  end
  self
end

- (Boolean) eof Also known as: eof?

Return a boolean indicating whether the position pointer is at the end of the file.

Returns:

  • (Boolean)

Raises:



187
188
189
190
# File 'lib/mongo/gridfs/grid_io.rb', line 187

def eof
  raise GridError, "file not opened for read #{@mode}" unless @mode[0] == ?r
  @file_position >= @file_length
end

- (Object) get_content_type

Determine the content type based on the filename.



478
479
480
481
482
483
484
# File 'lib/mongo/gridfs/grid_io.rb', line 478

def get_content_type
  if @filename
    if types = MIME::Types.type_for(@filename)
      types.first.simplified unless types.empty?
    end
  end
end

- (Object) get_md5

Get a server-side md5 and validate against the client if running with acknowledged writes



460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
# File 'lib/mongo/gridfs/grid_io.rb', line 460

def get_md5
  md5_command            = BSON::OrderedHash.new
  md5_command['filemd5'] = @files_id
  md5_command['root']    = @fs_name
  @server_md5 = @files.db.command(md5_command)['md5']
  if Mongo::WriteConcern.gle?(@write_concern)
    @client_md5 = @local_md5.hexdigest
    if @local_md5 == @server_md5
      @server_md5
    else
      raise GridMD5Failure, "File on server failed MD5 check"
    end
  else
    @server_md5
  end
end

- (String) getc

Return the next byte from the GridFS file.

Returns:



221
222
223
# File 'lib/mongo/gridfs/grid_io.rb', line 221

def getc
  read_length(1)
end

- (String) gets(separator = "\n", length = nil)

Return the next line from a GridFS file. This probably makes sense only if you’re storing plain text. This method has a somewhat tricky API, which it inherits from Ruby’s StringIO#gets.

Parameters:

  • separator (String, Integer) (defaults to: "\n")

    or length. If a separator, read up to the separator. If a length, read the length number of bytes. If nil, read the entire file.

  • length (Integer) (defaults to: nil)

    If a separator is provided, then read until either finding the separator or passing over the length number of bytes.

Returns:



206
207
208
209
210
211
212
213
214
215
216
# File 'lib/mongo/gridfs/grid_io.rb', line 206

def gets(separator="\n", length=nil)
  if separator.nil?
    read_all
  elsif separator.is_a?(Integer)
    read_length(separator)
  elsif separator.length > 1
    read_to_string(separator, length)
  else
    read_to_character(separator, length)
  end
end

- (Object) init_read

Initialize the class for reading a file.

Raises:



403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# File 'lib/mongo/gridfs/grid_io.rb', line 403

def init_read
  doc = @files.find(@query, @query_opts).next_document
  raise GridFileNotFound, "Could not open file matching #{@query.inspect} #{@query_opts.inspect}" unless doc

  @files_id     = doc['_id']
  @content_type = doc['contentType']
  @chunk_size   = doc['chunkSize']
  @upload_date  = doc['uploadDate']
  @aliases      = doc['aliases']
  @file_length  = doc['length']
       = doc['metadata']
  @md5          = doc['md5']
  @filename     = doc['filename']
  @custom_attrs = doc

  @current_chunk = get_chunk(0)
  @file_position = 0
end

- (Object) init_write(opts)

Initialize the class for writing a file.



423
424
425
426
427
428
429
430
431
432
433
434
435
436
# File 'lib/mongo/gridfs/grid_io.rb', line 423

def init_write(opts)
  opts           = opts.dup
  @files_id      = opts.delete(:_id) || BSON::ObjectId.new
  @content_type  = opts.delete(:content_type) || (defined? MIME) && get_content_type || DEFAULT_CONTENT_TYPE
  @chunk_size    = opts.delete(:chunk_size) || DEFAULT_CHUNK_SIZE
        = opts.delete(:metadata)
  @aliases       = opts.delete(:aliases)
  @file_length   = 0
  opts.each {|k, v| self[k] = v}
  check_existing_file if Mongo::WriteConcern.gle?(@write_concern)

  @current_chunk = create_chunk(0)
  @file_position = 0
end

- (Object) inspect



262
263
264
# File 'lib/mongo/gridfs/grid_io.rb', line 262

def inspect
  "#<GridIO _id: #{@files_id}>"
end

- (String) read(length = nil) Also known as: data

Read the data from the file. If a length if specified, will read from the current file position.

Parameters:

  • length (Integer) (defaults to: nil)

Returns:

  • (String)

    the data in the file



95
96
97
98
99
100
101
102
103
104
# File 'lib/mongo/gridfs/grid_io.rb', line 95

def read(length=nil)
  return '' if @file_length.zero?
  if length == 0
    return ''
  elsif length.nil? && @file_position.zero?
    read_all
  else
    read_length(length)
  end
end

- (Integer) rewind

Rewind the file. This is equivalent to seeking to the zeroth position.

Returns:

  • (Integer)

    the position of the file after rewinding (always zero).

Raises:



178
179
180
181
# File 'lib/mongo/gridfs/grid_io.rb', line 178

def rewind
  raise GridError, "file not opened for read" unless @mode[0] == ?r
  seek(0)
end

- (Integer) seek(pos, whence = IO::SEEK_SET)

Position the file pointer at the provided location.

Parameters:

  • pos (Integer)

    the number of bytes to advance the file pointer. this can be a negative number.

  • whence (Integer) (defaults to: IO::SEEK_SET)

    one of IO::SEEK_CUR, IO::SEEK_END, or IO::SEEK_SET

Returns:

  • (Integer)

    the new file position

Raises:



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/mongo/gridfs/grid_io.rb', line 146

def seek(pos, whence=IO::SEEK_SET)
  raise GridError, "Seek is only allowed in read mode." unless @mode == 'r'
  target_pos = case whence
               when IO::SEEK_CUR
                 @file_position + pos
               when IO::SEEK_END
                 @file_length + pos
               when IO::SEEK_SET
                 pos
               end

  new_chunk_number = (target_pos / @chunk_size).to_i
  if new_chunk_number != @current_chunk['n']
    save_chunk(@current_chunk) if @mode[0] == ?w
    @current_chunk = get_chunk(new_chunk_number)
  end
  @file_position  = target_pos
  @chunk_position = @file_position % @chunk_size
  @file_position
end

- (Integer) tell Also known as: pos

The current position of the file.

Returns:

  • (Integer)


170
171
172
# File 'lib/mongo/gridfs/grid_io.rb', line 170

def tell
  @file_position
end

- (Object) to_mongo_object



444
445
446
447
448
449
450
451
452
453
454
455
456
457
# File 'lib/mongo/gridfs/grid_io.rb', line 444

def to_mongo_object
  h                = BSON::OrderedHash.new
  h['_id']         = @files_id
  h['filename']    = @filename if @filename
  h['contentType'] = @content_type
  h['length']      = @current_chunk ? @current_chunk['n'] * @chunk_size + @chunk_position : 0
  h['chunkSize']   = @chunk_size
  h['uploadDate']  = @upload_date
  h['aliases']     = @aliases if @aliases
  h['metadata']    =  if 
  h['md5']         = get_md5
  h.merge!(@custom_attrs)
  h
end

- (Integer) write(io)

Write the given string (binary) data to the file.

Parameters:

  • string (String)

    the data to write

Returns:

  • (Integer)

    the number of bytes written.

Raises:



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/mongo/gridfs/grid_io.rb', line 114

def write(io)
  raise GridError, "file not opened for write" unless @mode[0] == ?w
  if io.is_a? String
    if Mongo::WriteConcern.gle?(@write_concern)
      @local_md5.update(io)
    end
    write_string(io)
  else
    length = 0
    if Mongo::WriteConcern.gle?(@write_concern)
      while(string = io.read(@chunk_size))
        @local_md5.update(string)
        length += write_string(string)
      end
    else
      while(string = io.read(@chunk_size))
        length += write_string(string)
      end
    end
    length
  end
end

- (Object) write_string(string)



380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
# File 'lib/mongo/gridfs/grid_io.rb', line 380

def write_string(string)
    # Since Ruby 1.9.1 doesn't necessarily store one character per byte.
    if string.respond_to?(:force_encoding)
      string.force_encoding("binary")
    end

    to_write = string.length
    while (to_write > 0) do
      if @current_chunk && @chunk_position == @chunk_size
        next_chunk_number = @current_chunk['n'] + 1
        @current_chunk    = create_chunk(next_chunk_number)
      end
      chunk_available = @chunk_size - @chunk_position
      step_size = (to_write > chunk_available) ? chunk_available : to_write
      @current_chunk['data'] = BSON::Binary.new((@current_chunk['data'].to_s << string[-to_write, step_size]).unpack("c*"))
      @chunk_position += step_size
      to_write -= step_size
      save_chunk(@current_chunk)
    end
    string.length - to_write
  end

  # Initialize the class for reading a file.
  def init_read
    doc = @files.find(@query, @query_opts).next_document
    raise GridFileNotFound, "Could not open file matching #{@query.inspect} #{@query_opts.inspect}" unless doc

    @files_id     = doc['_id']
    @content_type = doc['contentType']
    @chunk_size   = doc['chunkSize']
    @upload_date  = doc['uploadDate']
    @aliases      = doc['aliases']
    @file_length  = doc['length']
         = doc['metadata']
    @md5          = doc['md5']
    @filename     = doc['filename']
    @custom_attrs = doc

    @current_chunk = get_chunk(0)
    @file_position = 0
  end

  # Initialize the class for writing a file.
  def init_write(opts)
    opts           = opts.dup
    @files_id      = opts.delete(:_id) || BSON::ObjectId.new
    @content_type  = opts.delete(:content_type) || (defined? MIME) && get_content_type || DEFAULT_CONTENT_TYPE
    @chunk_size    = opts.delete(:chunk_size) || DEFAULT_CHUNK_SIZE
          = opts.delete(:metadata)
    @aliases       = opts.delete(:aliases)
    @file_length   = 0
    opts.each {|k, v| self[k] = v}
    check_existing_file if Mongo::WriteConcern.gle?(@write_concern)

    @current_chunk = create_chunk(0)
    @file_position = 0
  end

  def check_existing_file
    if @files.find_one('_id' => @files_id)
      raise GridError, "Attempting to overwrite with Grid#put. You must delete the file first."
    end
  end

  def to_mongo_object
    h                = BSON::OrderedHash.new
    h['_id']         = @files_id
    h['filename']    = @filename if @filename
    h['contentType'] = @content_type
    h['length']      = @current_chunk ? @current_chunk['n'] * @chunk_size + @chunk_position : 0
    h['chunkSize']   = @chunk_size
    h['uploadDate']  = @upload_date
    h['aliases']     = @aliases if @aliases
    h['metadata']    =  if 
    h['md5']         = get_md5
    h.merge!(@custom_attrs)
    h
  end

  # Get a server-side md5 and validate against the client if running with acknowledged writes
  def get_md5
    md5_command            = BSON::OrderedHash.new
    md5_command['filemd5'] = @files_id
    md5_command['root']    = @fs_name
    @server_md5 = @files.db.command(md5_command)['md5']
    if Mongo::WriteConcern.gle?(@write_concern)
      @client_md5 = @local_md5.hexdigest
      if @local_md5 == @server_md5
        @server_md5
      else
        raise GridMD5Failure, "File on server failed MD5 check"
      end
    else
      @server_md5
    end
  end

  # Determine the content type based on the filename.
  def get_content_type
    if @filename
      if types = MIME::Types.type_for(@filename)
        types.first.simplified unless types.empty?
      end
    end
  end
end