OpenDNSSEC-signer  2.1.12
signertasks.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 NLNet Labs. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
17  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
19  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
21  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
22  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
23  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 #include <time.h> /* time() */
28 
29 #include "daemon/engine.h"
30 #include "scheduler/worker.h"
31 #include "scheduler/schedule.h"
32 #include "signertasks.h"
33 #include "duration.h"
34 #include "hsm.h"
35 #include "locks.h"
36 #include "util.h"
37 #include "log.h"
38 #include "status.h"
39 #include "signer/tools.h"
40 #include "signer/zone.h"
41 #include "util.h"
42 #include "signertasks.h"
43 
48 static void
49 worker_queue_rrset(struct worker_context* context, fifoq_type* q, rrset_type* rrset, long* nsubtasks)
50 {
51  ods_status status = ODS_STATUS_UNCHANGED;
52  int tries = 0;
53  ods_log_assert(q);
54  ods_log_assert(rrset);
55 
56  pthread_mutex_lock(&q->q_lock);
57  status = fifoq_push(q, (void*) rrset, context, &tries);
58  while (status == ODS_STATUS_UNCHANGED) {
59  tries++;
60  if (context->worker->need_to_exit) {
61  pthread_mutex_unlock(&q->q_lock);
62  return;
63  }
70  ods_thread_wait(&q->q_nonfull, &q->q_lock, 5);
71  status = fifoq_push(q, (void*) rrset, context, &tries);
72  }
73  pthread_mutex_unlock(&q->q_lock);
74 
75  ods_log_assert(status == ODS_STATUS_OK);
76  *nsubtasks += 1;
77 }
78 
79 
84 static void
85 worker_queue_domain(struct worker_context* context, fifoq_type* q, domain_type* domain, long* nsubtasks)
86 {
87  rrset_type* rrset = NULL;
88  denial_type* denial = NULL;
89  ods_log_assert(context);
90  ods_log_assert(q);
91  ods_log_assert(domain);
92  rrset = domain->rrsets;
93  while (rrset) {
94  worker_queue_rrset(context, q, rrset, nsubtasks);
95  rrset = rrset->next;
96  }
97  denial = (denial_type*) domain->denial;
98  if (denial && denial->rrset) {
99  worker_queue_rrset(context, q, denial->rrset, nsubtasks);
100  }
101 }
102 
103 
108 static void
109 worker_queue_zone(struct worker_context* context, fifoq_type* q, zone_type* zone, long* nsubtasks)
110 {
111  ldns_rbnode_t* node = LDNS_RBTREE_NULL;
112  domain_type* domain = NULL;
113  ods_log_assert(context);
114  ods_log_assert(q);
115  ods_log_assert(zone);
116  if (!zone->db || !zone->db->domains) {
117  return;
118  }
119  if (zone->db->domains->root != LDNS_RBTREE_NULL) {
120  node = ldns_rbtree_first(zone->db->domains);
121  }
122  while (node && node != LDNS_RBTREE_NULL) {
123  domain = (domain_type*) node->data;
124  worker_queue_domain(context, q, domain, nsubtasks);
125  node = ldns_rbtree_next(node);
126  }
127 }
128 
129 
134 static ods_status
135 worker_check_jobs(worker_type* worker, task_type* task, int ntasks, long ntasksfailed)
136 {
137  ods_log_assert(worker);
138  ods_log_assert(task);
139  if (ntasksfailed) {
140  ods_log_error("[%s] sign zone %s failed: %ld RRsets failed",
141  worker->name, task->owner, ntasksfailed);
142  return ODS_STATUS_ERR;
143  } else if (worker->need_to_exit) {
144  ods_log_error("[%s] sign zone %s failed: worker needs to exit",
145  worker->name, task->owner);
146  return ODS_STATUS_ERR;
147  }
148  return ODS_STATUS_OK;
149 }
150 
151 void
152 drudge(worker_type* worker)
153 {
154  rrset_type* rrset;
155  ods_status status;
156  struct worker_context* superior;
157  hsm_ctx_t* ctx = NULL;
159  fifoq_type* signq = worker->taskq->signq;
160 
161  while (worker->need_to_exit == 0) {
162  ods_log_deeebug("[%s] report for duty", worker->name);
163  pthread_mutex_lock(&signq->q_lock);
164  superior = NULL;
165  rrset = (rrset_type*) fifoq_pop(signq, (void**)&superior);
166  if (!rrset) {
167  ods_log_deeebug("[%s] nothing to do, wait", worker->name);
174  pthread_cond_wait(&signq->q_threshold, &signq->q_lock);
175  if(worker->need_to_exit == 0)
176  rrset = (rrset_type*) fifoq_pop(signq, (void**)&superior);
177  }
178  pthread_mutex_unlock(&signq->q_lock);
179  /* do some work */
180  if (rrset) {
181  ods_log_assert(superior);
182  if (!ctx) {
183  ods_log_debug("[%s] create hsm context", worker->name);
184  ctx = hsm_create_context();
185  }
186  if (!ctx) {
187  engine = superior->engine;
188  ods_log_crit("[%s] error creating libhsm context", worker->name);
189  engine->need_to_reload = 1;
190  pthread_mutex_lock(&engine->signal_lock);
191  pthread_cond_signal(&engine->signal_cond);
192  pthread_mutex_unlock(&engine->signal_lock);
193  ods_log_error("signer instructed to reload due to hsm reset while signing");
194  status = ODS_STATUS_HSM_ERR;
195  } else {
196  status = rrset_sign(ctx, rrset, superior->clock_in);
197  }
198  fifoq_report(signq, superior->worker, status);
199  }
200  /* done work */
201  }
202  /* cleanup open HSM sessions */
203  if (ctx) {
204  hsm_destroy_context(ctx);
205  }
206 }
207 
208 time_t
209 do_readsignconf(task_type* task, const char* zonename, void* zonearg, void *contextarg)
210 {
211  struct worker_context* context = contextarg;
212  engine_type* engine = context->engine;
213  zone_type* zone = zonearg;
214  ods_status status;
215  status = tools_signconf(zone);
216  if (status == ODS_STATUS_UNCHANGED && !zone->signconf->last_modified) {
217  ods_log_debug("No signconf.xml for zone %s yet", task->owner);
218  status = ODS_STATUS_ERR;
219  zone->zoneconfigvalid = 0;
220  }
221  if (status == ODS_STATUS_OK || status == ODS_STATUS_UNCHANGED) {
222  /* status unchanged not really possible */
223  schedule_unscheduletask(engine->taskq, TASK_READ, zone->name);
224  schedule_scheduletask(engine->taskq, TASK_READ, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
225  zone->zoneconfigvalid = 1;
226  return schedule_SUCCESS;
227  } else {
228  zone->zoneconfigvalid = 0;
229  if (!zone->signconf->last_modified) {
230  ods_log_warning("WARNING: unable to sign zone %s, signconf is not ready", task->owner);
231  } else {
232  ods_log_crit("CRITICAL: failed to sign zone %s: %s", task->owner, ods_status2str(status));
233  }
234  return schedule_DEFER;
235  }
236 }
237 
238 time_t
239 do_forcereadsignconf(task_type* task, const char* zonename, void* zonearg, void *contextarg)
240 {
241  struct worker_context* context = contextarg;
242  engine_type* engine = context->engine;
243  zone_type* zone = zonearg;
244  ods_status status;
245  /* perform 'load signconf' task */
246  status = tools_signconf(zone);
247  if (status == ODS_STATUS_UNCHANGED) {
248  schedule_unscheduletask(engine->taskq, TASK_SIGNCONF, zone->name);
249  if(!zone->zoneconfigvalid) {
250  zone->zoneconfigvalid = 1;
251  schedule_unscheduletask(engine->taskq, TASK_READ, zone->name);
252  schedule_scheduletask(engine->taskq, TASK_READ, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
253  }
254  return schedule_SUCCESS;
255  } else if (status == ODS_STATUS_OK) {
256  schedule_unscheduletask(engine->taskq, TASK_SIGNCONF, zone->name);
257  schedule_unscheduletask(engine->taskq, TASK_READ, zone->name);
258  schedule_unscheduletask(engine->taskq, TASK_SIGN, zone->name);
259  schedule_unscheduletask(engine->taskq, TASK_WRITE, zone->name);
260  schedule_scheduletask(engine->taskq, TASK_READ, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
261  return schedule_SUCCESS;
262  } else {
263  return schedule_SUCCESS;
264  }
265 }
266 
267 time_t
268 do_signzone(task_type* task, const char* zonename, void* zonearg, void *contextarg)
269 {
270  struct worker_context* context = contextarg;
271  engine_type* engine = context->engine;
272  worker_type* worker = context->worker;
273  zone_type* zone = zonearg;
274  ods_status status;
275  time_t start = 0;
276  time_t end = 0;
277  long nsubtasks = 0;
278  long nsubtasksfailed = 0;
279  context->clock_in = time_now();
280  status = zone_update_serial(zone);
281  if (status != ODS_STATUS_OK) {
282  ods_log_error("[%s] unable to sign zone %s: failed to increment serial", worker->name, task->owner);
283  ods_log_crit("[%s] CRITICAL: failed to sign zone %s: %s",
284  worker->name, task->owner, ods_status2str(status));
285  return schedule_DEFER; /* backoff */
286  }
287  /* start timer */
288  start = time(NULL);
289  if (zone->stats) {
290  pthread_mutex_lock(&zone->stats->stats_lock);
291  if (!zone->stats->start_time) {
292  zone->stats->start_time = start;
293  }
294  zone->stats->sig_count = 0;
295  zone->stats->sig_soa_count = 0;
296  zone->stats->sig_reuse = 0;
297  zone->stats->sig_time = 0;
298  pthread_mutex_unlock(&zone->stats->stats_lock);
299  }
300  /* check the HSM connection before queuing sign operations */
301  if (hsm_check_context()) {
302  ods_log_error("signer instructed to reload due to hsm reset in sign task");
303  engine->need_to_reload = 1;
304  pthread_mutex_lock(&engine->signal_lock);
305  pthread_cond_signal(&engine->signal_cond);
306  pthread_mutex_unlock(&engine->signal_lock);
307  ods_log_crit("[%s] CRITICAL: failed to sign zone %s: %s",
308  worker->name, task->owner, ods_status2str(status));
309  return schedule_DEFER; /* backoff */
310  }
311  /* prepare keys */
312  status = zone_prepare_keys(zone);
313  if (status == ODS_STATUS_OK) {
314  /* queue menial, hard signing work */
315  worker_queue_zone(context, worker->taskq->signq, zone, &nsubtasks);
316  ods_log_deeebug("[%s] wait until drudgers are finished "
317  "signing zone %s", worker->name, task->owner);
318  /* sleep until work is done */
319  fifoq_waitfor(context->signq, worker, nsubtasks, &nsubtasksfailed);
320  }
321  /* stop timer */
322  end = time(NULL);
323  /* check status and jobs */
324  if (status == ODS_STATUS_OK) {
325  status = worker_check_jobs(worker, task, nsubtasks, nsubtasksfailed);
326  }
327  if (status == ODS_STATUS_OK && zone->stats) {
328  pthread_mutex_lock(&zone->stats->stats_lock);
329  zone->stats->sig_time = (end - start);
330  pthread_mutex_unlock(&zone->stats->stats_lock);
331  }
332  if (status != ODS_STATUS_OK) {
333  ods_log_crit("[%s] CRITICAL: failed to sign zone %s: %s",
334  worker->name, task->owner, ods_status2str(status));
335  return schedule_DEFER; /* backoff */
336  }
337 
338  schedule_scheduletask(engine->taskq, TASK_WRITE, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
339  return schedule_SUCCESS;
340 }
341 
342 time_t
343 do_readzone(task_type* task, const char* zonename, void* zonearg, void *contextarg)
344 {
345  ods_status status = ODS_STATUS_OK;
346  struct worker_context* context = contextarg;
347  engine_type* engine = context->engine;
348  zone_type* zone = zonearg;
349  /* perform 'read input adapter' task */
350  if (!zone->signconf->last_modified) {
351  ods_log_debug("no signconf.xml for zone %s yet", task->owner);
352  status = ODS_STATUS_ERR;
353  }
354  if (status == ODS_STATUS_OK) {
355  status = tools_input(zone);
356  if (status == ODS_STATUS_UNCHANGED) {
357  ods_log_verbose("zone %s unsigned data not changed, continue", task->owner);
358  status = ODS_STATUS_OK;
359  }
360  }
361  if (status != ODS_STATUS_OK) {
362  if (!zone->signconf->last_modified) {
363  ods_log_warning("WARNING: unable to sign zone %s, signconf is not ready", task->owner);
364  } else if (status != ODS_STATUS_XFR_NOT_READY) {
365  /* other statuses is critical, and we know it is not ODS_STATUS_OK */
366  ods_log_crit("CRITICAL: failed to sign zone %s: %s", task->owner, ods_status2str(status));
367  }
368  return schedule_DEFER;
369  } else {
370  /* unscheduling an existing sign task should no be necessary. After a read (this action)
371  * the logical next step is a sign. No other regular procedure that does not explicitly
372  * remove a sign task could create a sign task for this zone. So here we would be able
373  * to assume there is no sign task. However it occurs. The original code before refactoring
374  * also removed sign tasks. My premis this is caused by the locking code. A task actually
375  * starts executing even though the zone is being processed from another task. So for
376  * instance performing a force signconf just before a read task starts, can load to the read
377  * task to start executing even though the signconf task was still running. The forced signconf
378  * task cannot remove the read task (it is no longer queued), but will schedule a sign task.
379  * The read task can then continue, finding the just created sign task in its path.
380  */
381  schedule_unscheduletask(engine->taskq, TASK_SIGN, zone->name);
382  schedule_scheduletask(engine->taskq, TASK_SIGN, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
383  return schedule_SUCCESS;
384  }
385 }
386 
387 time_t
388 do_forcereadzone(task_type* task, const char* zonename, void* zonearg, void *contextarg)
389 {
390  ods_status status = ODS_STATUS_OK;
391  struct worker_context* context = contextarg;
392  engine_type* engine = context->engine;
393  zone_type* zone = zonearg;
394  /* perform 'read input adapter' task */
395  if (!zone->signconf->last_modified) {
396  ods_log_debug("no signconf.xml for zone %s yet", task->owner);
397  status = ODS_STATUS_ERR;
398  }
399  if (status == ODS_STATUS_OK) {
400  status = tools_input(zone);
401  if (status == ODS_STATUS_UNCHANGED) {
402  ods_log_verbose("zone %s unsigned data not changed, continue", task->owner);
403  status = ODS_STATUS_OK;
404  }
405  }
406  if (status != ODS_STATUS_OK) {
407  if (!zone->signconf->last_modified) {
408  ods_log_warning("WARNING: unable to sign zone %s, signconf is not ready", task->owner);
409  } else if (status != ODS_STATUS_XFR_NOT_READY) {
410  /* other statuses is critical, and we know it is not ODS_STATUS_OK */
411  ods_log_crit("CRITICAL: failed to sign zone %s: %s", task->owner, ods_status2str(status));
412  }
413  return schedule_SUCCESS;
414  } else {
415  schedule_unscheduletask(engine->taskq, TASK_SIGNCONF, zone->name);
416  schedule_unscheduletask(engine->taskq, TASK_FORCEREAD, zone->name);
417  schedule_unscheduletask(engine->taskq, TASK_READ, zone->name);
418  schedule_unscheduletask(engine->taskq, TASK_SIGN, zone->name);
419  schedule_unscheduletask(engine->taskq, TASK_WRITE, zone->name);
420  schedule_scheduletask(engine->taskq, TASK_SIGN, zone->name, zone, &zone->zone_lock, schedule_PROMPTLY);
421  return schedule_SUCCESS;
422  }
423 }
424 
425 time_t
426 do_writezone(task_type* task, const char* zonename, void* zonearg, void *contextarg)
427 {
428  struct worker_context* context = contextarg;
429  engine_type* engine = context->engine;
430  worker_type* worker = context->worker;
431  zone_type* zone = zonearg;
432  ods_status status;
433  time_t resign;
434  context->clock_in = time_now(); /* TODO this means something different */
435  /* perform write to output adapter task */
436  status = tools_output(zone, engine);
437  if (status != ODS_STATUS_OK) {
438  ods_log_crit("[%s] CRITICAL: failed to sign zone %s: %s",
439  worker->name, task->owner, ods_status2str(status));
440  return schedule_DEFER;
441  }
442  if (zone->signconf &&
443  duration2time(zone->signconf->sig_resign_interval)) {
444  resign = context->clock_in +
445  duration2time(zone->signconf->sig_resign_interval);
446  } else {
447  ods_log_error("[%s] unable to retrieve resign interval "
448  "for zone %s: duration2time() failed",
449  worker->name, task->owner);
450  ods_log_info("[%s] defaulting to 1H resign interval for "
451  "zone %s", worker->name, task->owner);
452  resign = context->clock_in + 3600;
453  }
454  /* backup the last successful run */
455  status = zone_backup2(zone, resign);
456  if (status != ODS_STATUS_OK) {
457  ods_log_warning("[%s] unable to backup zone %s: %s",
458  worker->name, task->owner, ods_status2str(status));
459  /* just a warning */
460  status = ODS_STATUS_OK;
461  }
462  schedule_scheduletask(engine->taskq, TASK_SIGN, zone->name, zone, &zone->zone_lock, resign);
463  return schedule_SUCCESS;
464 }
ods_status rrset_sign(hsm_ctx_t *ctx, rrset_type *rrset, time_t signtime)
Definition: rrset.c:570
time_t do_forcereadzone(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:388
time_t do_readzone(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:343
void drudge(worker_type *worker)
Definition: signertasks.c:152
time_t do_forcereadsignconf(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:239
time_t do_readsignconf(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:209
time_t do_signzone(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:268
time_t do_writezone(task_type *task, const char *zonename, void *zonearg, void *contextarg)
Definition: signertasks.c:426
rrset_type * rrset
Definition: denial.h:55
rrset_type * rrsets
Definition: domain.h:58
denial_type * denial
Definition: domain.h:53
schedule_type * taskq
Definition: engine.h:54
pthread_mutex_t signal_lock
Definition: engine.h:67
pthread_cond_t signal_cond
Definition: engine.h:66
int need_to_reload
Definition: engine.h:63
ldns_rbtree_t * domains
Definition: namedb.h:51
rrset_type * next
Definition: rrset.h:60
duration_type * sig_resign_interval
Definition: signconf.h:46
time_t last_modified
Definition: signconf.h:72
uint32_t sig_reuse
Definition: stats.h:58
pthread_mutex_t stats_lock
Definition: stats.h:63
uint32_t sig_soa_count
Definition: stats.h:57
time_t sig_time
Definition: stats.h:59
uint32_t sig_count
Definition: stats.h:56
time_t start_time
Definition: stats.h:61
worker_type * worker
Definition: signertasks.h:40
engine_type * engine
Definition: signertasks.h:39
fifoq_type * signq
Definition: signertasks.h:41
time_t clock_in
Definition: signertasks.h:42
signconf_type * signconf
Definition: zone.h:77
stats_type * stats
Definition: zone.h:85
namedb_type * db
Definition: zone.h:79
int zoneconfigvalid
Definition: zone.h:90
const char * name
Definition: zone.h:69
pthread_mutex_t zone_lock
Definition: zone.h:86
ods_status tools_signconf(zone_type *zone)
Definition: tools.c:52
ods_status tools_output(zone_type *zone, engine_type *engine)
Definition: tools.c:181
ods_status tools_input(zone_type *zone)
Definition: tools.c:93
ods_status zone_backup2(zone_type *zone, time_t nextResign)
Definition: zone.c:1040
ods_status zone_update_serial(zone_type *zone)
Definition: zone.c:434
ods_status zone_prepare_keys(zone_type *zone)
Definition: zone.c:393