bes  Updated for version 3.17.4
BESDapFunctionResponseCache.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of HYrax, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2016 OPeNDAP, Inc.
7 // Author: Nathan David Potter <ndp@opendap.org>
8 // James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 //#define DODS_DEBUG
29 
30 #include <cstdio>
31 #include <unistd.h>
32 #include <sys/stat.h>
33 
34 #include <iostream>
35 #include <string>
36 #include <fstream>
37 #include <sstream>
38 
39 #ifdef HAVE_TR1_FUNCTIONAL
40 #include <tr1/functional>
41 #endif
42 
43 #include <DDS.h>
44 #include <ConstraintEvaluator.h>
45 #include <DDXParserSAX2.h>
46 
47 #include <XDRStreamMarshaller.h>
48 #include <XDRStreamUnMarshaller.h>
49 #include <XDRFileUnMarshaller.h>
50 
51 #include <D4StreamMarshaller.h>
52 #include <D4StreamUnMarshaller.h>
53 
54 #include <Sequence.h> // We have to special-case these; see read_data_ddx()
55 
56 #include <debug.h>
57 #include <mime_util.h> // for last_modified_time() and rfc_822_date()
58 #include <util.h>
59 
60 #include "CacheTypeFactory.h"
61 #include "CacheMarshaller.h"
62 #include "CacheUnMarshaller.h"
63 
64 #include "BESDapFunctionResponseCache.h"
65 #include "BESDapResponseBuilder.h"
66 #include "BESInternalError.h"
67 
68 #include "BESUtil.h"
69 #include "TheBESKeys.h"
70 #include "BESLog.h"
71 #include "BESDebug.h"
72 
73 #define DEBUG_KEY "response_cache"
74 
75 #ifdef HAVE_TR1_FUNCTIONAL
76 #define HASH_OBJ std::tr1::hash
77 #else
78 #define HASH_OBJ std::hash
79 #endif
80 
81 using namespace std;
82 using namespace libdap;
83 
84 const string DATA_MARK = "--DATA:";
85 
86 // If the size of the constraint is larger then this value, don't cache the response.
87 const unsigned int max_cacheable_ce_len = 4096;
88 const unsigned int max_collisions = 50; // It's hard to believe this could happen
89 
90 const unsigned int default_cache_size = 20; // 20 GB
91 const string default_cache_prefix = "rc";
92 const string default_cache_dir = ""; // I'm making the default empty so that no key == no caching. jhrg 9.26.16
93 
94 const string BESDapFunctionResponseCache::PATH_KEY = "DAP.FunctionResponseCache.path";
95 const string BESDapFunctionResponseCache::PREFIX_KEY = "DAP.FunctionResponseCache.prefix";
96 const string BESDapFunctionResponseCache::SIZE_KEY = "DAP.FunctionResponseCache.size";
97 
98 BESDapFunctionResponseCache *BESDapFunctionResponseCache::d_instance = 0;
99 
100 unsigned long BESDapFunctionResponseCache::get_cache_size_from_config()
101 {
102  bool found;
103  string size;
104  unsigned long size_in_megabytes = default_cache_size;
105  TheBESKeys::TheKeys()->get_value(SIZE_KEY, size, found);
106  if (found) {
107  BESDEBUG(DEBUG_KEY,
108  "BESDapFunctionResponseCache::getCacheSizeFromConfig(): Located BES key " << SIZE_KEY<< "=" << size << endl);
109  istringstream iss(size);
110  iss >> size_in_megabytes;
111  }
112 
113  return size_in_megabytes;
114 }
115 
116 string BESDapFunctionResponseCache::get_cache_prefix_from_config()
117 {
118  bool found;
119  string prefix = default_cache_prefix;
120  TheBESKeys::TheKeys()->get_value(PREFIX_KEY, prefix, found);
121  if (found) {
122  BESDEBUG(DEBUG_KEY,
123  "BESDapFunctionResponseCache::getCachePrefixFromConfig(): Located BES key " << PREFIX_KEY<< "=" << prefix << endl);
124  prefix = BESUtil::lowercase(prefix);
125  }
126 
127  return prefix;
128 }
129 
130 // If the cache prefix is the empty string, the cache is turned off.
131 string BESDapFunctionResponseCache::get_cache_dir_from_config()
132 {
133  bool found;
134 
135  string cacheDir = default_cache_dir;
136  TheBESKeys::TheKeys()->get_value(PATH_KEY, cacheDir, found);
137  if (found) {
138  BESDEBUG(DEBUG_KEY,
139  "BESDapFunctionResponseCache::getCacheDirFromConfig(): Located BES key " << PATH_KEY<< "=" << cacheDir << endl);
140  }
141 
142  return cacheDir;
143 }
144 
163 BESDapFunctionResponseCache::get_instance(const string &cache_dir, const string &prefix, unsigned long long size)
164 {
165  if (d_instance == 0) {
166  if (!cache_dir.empty() && dir_exists(cache_dir)) {
167  d_instance = new BESDapFunctionResponseCache(cache_dir, prefix, size);
168 #ifdef HAVE_ATEXIT
169  atexit(delete_instance);
170 #endif
171  }
172  }
173 
174  BESDEBUG(DEBUG_KEY,
175  "BESDapFunctionResponseCache::get_instance(dir,prefix,size) - d_instance: " << d_instance << endl);
176 
177  return d_instance;
178 }
179 
181 BESDapFunctionResponseCache::get_instance()
182 {
183  if (d_instance == 0) {
184  string cache_dir = get_cache_dir_from_config();
185  if (!cache_dir.empty() && dir_exists(cache_dir)) {
186  d_instance = new BESDapFunctionResponseCache(get_cache_dir_from_config(), get_cache_prefix_from_config(),
187  get_cache_size_from_config());
188 #ifdef HAVE_ATEXIT
189  atexit(delete_instance);
190 #endif
191  }
192  }
193 
194  BESDEBUG(DEBUG_KEY, "BESDapFunctionResponseCache::get_instance() - d_instance: " << d_instance << endl);
195 
196  return d_instance;
197 }
199 
209 bool BESDapFunctionResponseCache::is_valid(const string &cache_file_name, const string &dataset)
210 {
211  // If the cached response is zero bytes in size, it's not valid. This is true
212  // because a DAP data object, even if it has no data still has a metadata part.
213  // jhrg 10/20/15
214 
215  off_t entry_size = 0;
216  time_t entry_time = 0;
217  struct stat buf;
218  if (stat(cache_file_name.c_str(), &buf) == 0) {
219  entry_size = buf.st_size;
220  entry_time = buf.st_mtime;
221  }
222  else {
223  return false;
224  }
225 
226  if (entry_size == 0) return false;
227 
228  time_t dataset_time = entry_time;
229  if (stat(dataset.c_str(), &buf) == 0) {
230  dataset_time = buf.st_mtime;
231  }
232 
233  // Trick: if the d_dataset is not a file, stat() returns error and
234  // the times stay equal and the code uses the cache entry.
235 
236  // TODO Fix this so that the code can get a LMT from the correct handler.
237  if (dataset_time > entry_time) return false;
238 
239  return true;
240 }
241 
242 string BESDapFunctionResponseCache::get_resource_id(DDS *dds, const string &constraint)
243 {
244  return dds->filename() + "#" + constraint;
245 }
246 
247 bool BESDapFunctionResponseCache::can_be_cached(DDS *dds, const string &constraint)
248 {
249  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " constraint + dds->filename() length: "
250  << constraint.length() + dds->filename().size() << endl);
251 
252  return (constraint.length() + dds->filename().size() <= max_cacheable_ce_len);
253 }
254 
262 string BESDapFunctionResponseCache::get_hash_basename(const string &resource_id)
263 {
264  // Get a hash function for strings
265  HASH_OBJ<string> str_hash;
266  size_t hashValue = str_hash(resource_id);
267  stringstream hashed_id;
268  hashed_id << hashValue;
269  string cache_file_name = getCacheDirectory();
270  cache_file_name.append("/").append(getCacheFilePrefix()).append(hashed_id.str());
271 
272  return cache_file_name;
273 }
274 
296 DDS *
297 BESDapFunctionResponseCache::get_or_cache_dataset(DDS *dds, const string &constraint)
298 {
299  // Build the response_id. Since the response content is a function of both the dataset AND the constraint,
300  // glue them together to get a unique id for the response.
301  string resourceId = dds->filename() + "#" + constraint;
302 
303  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " resourceId: '" << resourceId << "'" << endl);
304 
305  // Get a hash function for strings
306  HASH_OBJ<string> str_hash;
307 
308  // Use the hash function to hash the resourceId.
309  size_t hashValue = str_hash(resourceId);
310  stringstream hashed_id;
311  hashed_id << hashValue;
312 
313  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " hashed_id: '" << hashed_id.str() << "'" << endl);
314 
315  // Use the parent class's get_cache_file_name() method and its associated machinery to get the file system path for the cache file.
316  // We store it in a variable called basename because the value is later extended as part of the collision avoidance code.
317  string cache_file_name = BESFileLockingCache::get_cache_file_name(hashed_id.str(), false);
318 
319  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " cache_file_name: '" << cache_file_name << "'" << endl);
320 
321  // Does the cached dataset exist? if yes, ret_dds points to it. If no,
322  // cache_file_name is updated to be the correct name for write_dataset_
323  // to_cache().
324  DDS *ret_dds = 0;
325  if ((ret_dds = load_from_cache(resourceId, cache_file_name))) {
326  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Data loaded from cache file: " << cache_file_name << endl);
327  ret_dds->filename(dds->filename());
328  }
329  else if ((ret_dds = write_dataset_to_cache(dds, resourceId, constraint, cache_file_name))) {
330  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Data written to cache file: " << cache_file_name << endl);
331  }
332  // get_read_lock() returns immediately if the file does not exist,
333  // but blocks waiting to get a shared lock if the file does exist.
334  else if ((ret_dds = load_from_cache(resourceId, cache_file_name))) {
335  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Data loaded from cache file (2nd try): " << cache_file_name << endl);
336  ret_dds->filename(dds->filename());
337  }
338 
339  BESDEBUG(DEBUG_KEY,__FUNCTION__ << " Used cache_file_name: " << cache_file_name << " for resource ID: " << resourceId << endl);
340 
341  return ret_dds;
342 }
343 
360 DDS *
361 BESDapFunctionResponseCache::load_from_cache(const string &resource_id, string &cache_file_name)
362 {
363  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " resource_id: " << resource_id << endl);
364 
365  DDS *cached_dds = 0; // nullptr
366 
367  unsigned long suffix_counter = 0;
368  bool keep_looking = true;
369  do {
370  if (suffix_counter > max_collisions) {
371  stringstream ss;
372  ss << "Cache error! There are " << suffix_counter << " hash collisions for the resource '" << resource_id
373  << "' And that is a bad bad thing.";
374  throw BESInternalError(ss.str(), __FILE__, __LINE__);
375  }
376 
377  // Build cache_file_name and cache_id_file_name from baseName
378  stringstream cfname;
379  cfname << cache_file_name << "_" << suffix_counter++;
380 
381  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " candidate cache_file_name: " << cfname.str() << endl);
382 
383  int fd; // unused
384  if (!get_read_lock(cfname.str(), fd)) {
385  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " !get_read_lock(cfname.str(), fd): " << fd << endl);
386  // If get_read_lock() returns false, that means the cache file doesn't exist.
387  // Set keep_looking to false and exit the loop.
388  keep_looking = false;
389  // Set the cache file name to the current value of cfname.str() - this is
390  // the name that does not exist and should be used by write_dataset_to_cache()
391  cache_file_name = cfname.str();
392  }
393  else {
394  // If get_read_lock() returns true, the cache file exists; look and see if
395  // it's the correct one. If so, cached_dds will be true and we exit.
396 
397  // Read the first line from the cache file and see if it matches the resource id
398  ifstream cache_file_istream(cfname.str().c_str());
399  char line[max_cacheable_ce_len];
400  cache_file_istream.getline(line, max_cacheable_ce_len);
401  string cached_resource_id;
402  cached_resource_id.assign(line);
403 
404  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " cached_resource_id: " << cached_resource_id << endl);
405 
406  if (cached_resource_id.compare(resource_id) == 0) {
407  // WooHoo Cache Hit!
408  BESDEBUG(DEBUG_KEY, "BESDapFunctionResponseCache::load_from_cache() - Cache Hit!" << endl);
409 
410  // non-null value value for cached_dds will exit the loop
411  cached_dds = read_cached_data(cache_file_istream);
412  }
413 
414  unlock_and_close(cfname.str());
415  }
416  } while (!cached_dds && keep_looking);
417 
418  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " Cache " << (cached_dds!=0?"HIT":"MISS") << " for: " << cache_file_name << endl);
419 
420  return cached_dds;
421 }
422 
427 DDS *
428 BESDapFunctionResponseCache::read_cached_data(istream &cached_data)
429 {
430  // Build a CachedSequence; all other types are as BaseTypeFactory builds
431  CacheTypeFactory factory;
432  DDS *fdds = new DDS(&factory);
433 
434  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " - BEGIN" << endl);
435 
436  // Parse the DDX; throw an exception on error.
437  DDXParser ddx_parser(fdds->get_factory());
438 
439  // Parse the DDX, reading up to and including the next boundary.
440  // Return the CID for the matching data part
441  string data_cid; // Not used. jhrg 5/5/16
442  try {
443  ddx_parser.intern_stream(cached_data, fdds, data_cid, DATA_MARK);
444  }
445  catch (Error &e) { // Catch the libdap::Error and throw BESInternalError
446  throw BESInternalError(e.get_error_message(), __FILE__, __LINE__);
447  }
448 
449  CacheUnMarshaller um(cached_data);
450 
451  for (DDS::Vars_iter i = fdds->var_begin(), e = fdds->var_end(); i != e; ++i) {
452  (*i)->deserialize(um, fdds);
453  }
454 
455  // mark everything as read. And 'to send.' That is, make sure that when a response
456  // is retrieved from the cache, all of the variables are marked as 'to be sent.'
457  for (DDS::Vars_iter i = fdds->var_begin(), e = fdds->var_end(); i != e; ++i) {
458  (*i)->set_read_p(true);
459  (*i)->set_send_p(true);
460 
461  // For Sequences, deserialize() will update the 'current row number,' which
462  // is the correct behavior but which will also confuse serialize(). Reset the
463  // current row number here so serialize() can start working from row 0. jhrg 5/13/16
464  // Note: Now uses the recursive version of reset_row_number. jhrg 5/16/16
465  if ((*i)->type() == dods_sequence_c) {
466  static_cast<Sequence*>(*i)->reset_row_number(true);
467  }
468  }
469 
470  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " - END." << endl);
471 
472  fdds->set_factory(0); // Make sure there is no left-over cruft in the returned DDS
473 
474  return fdds;
475 }
476 
492 DDS *
493 BESDapFunctionResponseCache::write_dataset_to_cache(DDS *dds, const string &resource_id, const string &func_ce,
494  const string &cache_file_name)
495 {
496  BESDEBUG(DEBUG_KEY, __FUNCTION__ << " BEGIN " << resource_id << ": "
497  << func_ce << ": " << cache_file_name << endl);
498 
499  DDS *fdds = 0; // will hold the return value
500 
501  int fd;
502  if (create_and_lock(cache_file_name, fd)) {
503  // If here, the cache_file_name could not be locked for read access;
504  // try to build it. First make an empty files and get an exclusive lock on them.
505  BESDEBUG(DEBUG_KEY,__FUNCTION__ << " Caching " << resource_id << ", func_ce: " << func_ce << endl);
506 
507  // Get an output stream directed at the locked cache file
508  ofstream cache_file_ostream(cache_file_name.c_str(), ios::out|ios::app|ios::binary);
509  if (!cache_file_ostream.is_open())
510  throw BESInternalError("Could not open '" + cache_file_name + "' to write cached response.", __FILE__, __LINE__);
511 
512  try {
513  // Write the resource_id to the first line of the cache file
514  cache_file_ostream << resource_id << endl;
515 
516  // Evaluate the function
517  ConstraintEvaluator func_eval;
518  func_eval.parse_constraint(func_ce, *dds);
519  fdds = func_eval.eval_function_clauses(*dds);
520 
521  fdds->print_xml_writer(cache_file_ostream, true, "");
522 
523  cache_file_ostream << DATA_MARK << endl;
524 
525  // Define the scope of the StreamMarshaller because for some types it will use
526  // a child thread to send data and it's dtor will wait for that thread to complete.
527  // We want that before we close the output stream (cache_file_stream) jhrg 5/6/16
528  {
529  ConstraintEvaluator new_ce;
530  CacheMarshaller m(cache_file_ostream);
531 
532  for (DDS::Vars_iter i = fdds->var_begin(); i != fdds->var_end(); i++) {
533  if ((*i)->send_p()) {
534  (*i)->serialize(new_ce, *fdds, m, false);
535  }
536  }
537  }
538 
539  // Change the exclusive locks on the new file to a shared lock. This keeps
540  // other processes from purging the new file and ensures that the reading
541  // process can use it.
542  exclusive_to_shared_lock(fd);
543 
544  // Now update the total cache size info and purge if needed. The new file's
545  // name is passed into the purge method because this process cannot detect its
546  // own lock on the file.
547  unsigned long long size = update_cache_info(cache_file_name);
548  if (cache_too_big(size)) update_and_purge(cache_file_name);
549 
550  unlock_and_close(cache_file_name);
551  }
552  catch (...) {
553  // Bummer. There was a problem doing The Stuff. Now we gotta clean up.
554  cache_file_ostream.close();
555  this->purge_file(cache_file_name);
556  unlock_and_close(cache_file_name);
557  throw;
558  }
559  }
560 
561  return fdds;
562 }
563 
Marshaller that knows how serialize dap data objects to a disk cache This class can be used with libd...
exception thrown if inernal error encountered
static string lowercase(const string &s)
Definition: BESUtil.cc:186
STL namespace.
virtual libdap::DDS * get_or_cache_dataset(libdap::DDS *dds, const std::string &constraint)
Return a DDS loaded with data that can be serialized back to a client.
virtual string get_cache_file_name(const string &src, bool mangle=true)
void get_value(const string &s, string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: BESKeys.cc:483
static BESKeys * TheKeys()
Definition: TheBESKeys.cc:43
Cache the results from server functions.
UnMarshaller that knows how to deserialize dap objects.