1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 """
39 Implements the standard 'collect' action.
40 @sort: executeCollect
41 @author: Kenneth J. Pronovici <pronovic@ieee.org>
42 """
43
44
45
46
47
48
49
50 import os
51 import logging
52 import pickle
53
54
55 from CedarBackup3.filesystem import BackupFileList, FilesystemList
56 from CedarBackup3.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath
57 from CedarBackup3.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR
58 from CedarBackup3.actions.util import writeIndicatorFile
59
60
61
62
63
64
65 logger = logging.getLogger("CedarBackup3.log.actions.collect")
66
67
68
69
70
71
72
73
74
75
77 """
78 Executes the collect backup action.
79
80 @note: When the collect action is complete, we will write a collect
81 indicator to the collect directory, so it's obvious that the collect action
82 has completed. The stage process uses this indicator to decide whether a
83 peer is ready to be staged.
84
85 @param configPath: Path to configuration file on disk.
86 @type configPath: String representing a path on disk.
87
88 @param options: Program command-line options.
89 @type options: Options object.
90
91 @param config: Program configuration.
92 @type config: Config object.
93
94 @raise ValueError: Under many generic error conditions
95 @raise TarError: If there is a problem creating a tar file
96 """
97 logger.debug("Executing the 'collect' action.")
98 if config.options is None or config.collect is None:
99 raise ValueError("Collect configuration is not properly filled in.")
100 if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and
101 (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)):
102 raise ValueError("There must be at least one collect file or collect directory.")
103 fullBackup = options.full
104 logger.debug("Full backup flag is [%s]", fullBackup)
105 todayIsStart = isStartOfWeek(config.options.startingDay)
106 resetDigest = fullBackup or todayIsStart
107 logger.debug("Reset digest flag is [%s]", resetDigest)
108 if config.collect.collectFiles is not None:
109 for collectFile in config.collect.collectFiles:
110 logger.debug("Working with collect file [%s]", collectFile.absolutePath)
111 collectMode = _getCollectMode(config, collectFile)
112 archiveMode = _getArchiveMode(config, collectFile)
113 digestPath = _getDigestPath(config, collectFile.absolutePath)
114 tarfilePath = _getTarfilePath(config, collectFile.absolutePath, archiveMode)
115 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart):
116 logger.debug("File meets criteria to be backed up today.")
117 _collectFile(config, collectFile.absolutePath, tarfilePath,
118 collectMode, archiveMode, resetDigest, digestPath)
119 else:
120 logger.debug("File will not be backed up, per collect mode.")
121 logger.info("Completed collecting file [%s]", collectFile.absolutePath)
122 if config.collect.collectDirs is not None:
123 for collectDir in config.collect.collectDirs:
124 logger.debug("Working with collect directory [%s]", collectDir.absolutePath)
125 collectMode = _getCollectMode(config, collectDir)
126 archiveMode = _getArchiveMode(config, collectDir)
127 ignoreFile = _getIgnoreFile(config, collectDir)
128 linkDepth = _getLinkDepth(collectDir)
129 dereference = _getDereference(collectDir)
130 recursionLevel = _getRecursionLevel(collectDir)
131 (excludePaths, excludePatterns) = _getExclusions(config, collectDir)
132 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart):
133 logger.debug("Directory meets criteria to be backed up today.")
134 _collectDirectory(config, collectDir.absolutePath,
135 collectMode, archiveMode, ignoreFile, linkDepth, dereference,
136 resetDigest, excludePaths, excludePatterns, recursionLevel)
137 else:
138 logger.debug("Directory will not be backed up, per collect mode.")
139 logger.info("Completed collecting directory [%s]", collectDir.absolutePath)
140 writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR,
141 config.options.backupUser, config.options.backupGroup)
142 logger.info("Executed the 'collect' action successfully.")
143
144
145
146
147
148
149
150
151
152
153 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
154 """
155 Collects a configured collect file.
156
157 The indicated collect file is collected into the indicated tarfile.
158 For files that are collected incrementally, we'll use the indicated
159 digest path and pay attention to the reset digest flag (basically, the reset
160 digest flag ignores any existing digest, but a new digest is always
161 rewritten).
162
163 The caller must decide what the collect and archive modes are, since they
164 can be on both the collect configuration and the collect file itself.
165
166 @param config: Config object.
167 @param absolutePath: Absolute path of file to collect.
168 @param tarfilePath: Path to tarfile that should be created.
169 @param collectMode: Collect mode to use.
170 @param archiveMode: Archive mode to use.
171 @param resetDigest: Reset digest flag.
172 @param digestPath: Path to digest file on disk, if needed.
173 """
174 backupList = BackupFileList()
175 backupList.addFile(absolutePath)
176 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
177
178
179
180
181
182
183 -def _collectDirectory(config, absolutePath, collectMode, archiveMode,
184 ignoreFile, linkDepth, dereference, resetDigest,
185 excludePaths, excludePatterns, recursionLevel):
186 """
187 Collects a configured collect directory.
188
189 The indicated collect directory is collected into the indicated tarfile.
190 For directories that are collected incrementally, we'll use the indicated
191 digest path and pay attention to the reset digest flag (basically, the reset
192 digest flag ignores any existing digest, but a new digest is always
193 rewritten).
194
195 The caller must decide what the collect and archive modes are, since they
196 can be on both the collect configuration and the collect directory itself.
197
198 @param config: Config object.
199 @param absolutePath: Absolute path of directory to collect.
200 @param collectMode: Collect mode to use.
201 @param archiveMode: Archive mode to use.
202 @param ignoreFile: Ignore file to use.
203 @param linkDepth: Link depth value to use.
204 @param dereference: Dereference flag to use.
205 @param resetDigest: Reset digest flag.
206 @param excludePaths: List of absolute paths to exclude.
207 @param excludePatterns: List of patterns to exclude.
208 @param recursionLevel: Recursion level (zero for no recursion)
209 """
210 if recursionLevel == 0:
211
212 logger.info("Collecting directory [%s]", absolutePath)
213 tarfilePath = _getTarfilePath(config, absolutePath, archiveMode)
214 digestPath = _getDigestPath(config, absolutePath)
215
216 backupList = BackupFileList()
217 backupList.ignoreFile = ignoreFile
218 backupList.excludePaths = excludePaths
219 backupList.excludePatterns = excludePatterns
220 backupList.addDirContents(absolutePath, linkDepth=linkDepth, dereference=dereference)
221
222 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
223 else:
224
225 subdirs = FilesystemList()
226 subdirs.excludeFiles = True
227 subdirs.excludeLinks = True
228 subdirs.excludePaths = excludePaths
229 subdirs.excludePatterns = excludePatterns
230 subdirs.addDirContents(path=absolutePath, recursive=False, addSelf=False)
231
232
233 for subdir in subdirs:
234 _collectDirectory(config, subdir, collectMode, archiveMode,
235 ignoreFile, linkDepth, dereference, resetDigest,
236 excludePaths, excludePatterns, recursionLevel-1)
237 excludePaths.append(subdir)
238
239
240 _collectDirectory(config, absolutePath, collectMode, archiveMode,
241 ignoreFile, linkDepth, dereference, resetDigest,
242 excludePaths, excludePatterns, 0)
243
244
245
246
247
248
249 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
250 """
251 Execute the backup process for the indicated backup list.
252
253 This function exists mainly to consolidate functionality between the
254 L{_collectFile} and L{_collectDirectory} functions. Those functions build
255 the backup list; this function causes the backup to execute properly and
256 also manages usage of the digest file on disk as explained in their
257 comments.
258
259 For collect files, the digest file will always just contain the single file
260 that is being backed up. This might little wasteful in terms of the number
261 of files that we keep around, but it's consistent and easy to understand.
262
263 @param config: Config object.
264 @param backupList: List to execute backup for
265 @param absolutePath: Absolute path of directory or file to collect.
266 @param tarfilePath: Path to tarfile that should be created.
267 @param collectMode: Collect mode to use.
268 @param archiveMode: Archive mode to use.
269 @param resetDigest: Reset digest flag.
270 @param digestPath: Path to digest file on disk, if needed.
271 """
272 if collectMode != 'incr':
273 logger.debug("Collect mode is [%s]; no digest will be used.", collectMode)
274 if len(backupList) == 1 and backupList[0] == absolutePath:
275 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize()))
276 else:
277 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize()))
278 if len(backupList) > 0:
279 backupList.generateTarfile(tarfilePath, archiveMode, True)
280 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup)
281 else:
282 if resetDigest:
283 logger.debug("Based on resetDigest flag, digest will be cleared.")
284 oldDigest = {}
285 else:
286 logger.debug("Based on resetDigest flag, digest will loaded from disk.")
287 oldDigest = _loadDigest(digestPath)
288 (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True)
289 logger.debug("Removed %d unchanged files based on digest values.", removed)
290 if len(backupList) == 1 and backupList[0] == absolutePath:
291 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize()))
292 else:
293 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize()))
294 if len(backupList) > 0:
295 backupList.generateTarfile(tarfilePath, archiveMode, True)
296 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup)
297 _writeDigest(config, newDigest, digestPath)
298
299
300
301
302
303
305 """
306 Loads the indicated digest path from disk into a dictionary.
307
308 If we can't load the digest successfully (either because it doesn't exist or
309 for some other reason), then an empty dictionary will be returned - but the
310 condition will be logged.
311
312 @param digestPath: Path to the digest file on disk.
313
314 @return: Dictionary representing contents of digest path.
315 """
316 if not os.path.isfile(digestPath):
317 digest = {}
318 logger.debug("Digest [%s] does not exist on disk.", digestPath)
319 else:
320 try:
321 with open(digestPath, "rb") as f:
322 digest = pickle.load(f, fix_imports=True)
323 logger.debug("Loaded digest [%s] from disk: %d entries.", digestPath, len(digest))
324 except Exception as e:
325 digest = {}
326 logger.error("Failed loading digest [%s] from disk: %s", digestPath, e)
327 return digest
328
329
330
331
332
333
335 """
336 Writes the digest dictionary to the indicated digest path on disk.
337
338 If we can't write the digest successfully for any reason, we'll log the
339 condition but won't throw an exception.
340
341 @param config: Config object.
342 @param digest: Digest dictionary to write to disk.
343 @param digestPath: Path to the digest file on disk.
344 """
345 try:
346 with open(digestPath, "wb") as f:
347 pickle.dump(digest, f, 0, fix_imports=True)
348 changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup)
349 logger.debug("Wrote new digest [%s] to disk: %d entries.", digestPath, len(digest))
350 except Exception as e:
351 logger.error("Failed to write digest [%s] to disk: %s", digestPath, e)
352
353
354
355
356
357
358
359
360
361
363 """
364 Gets the collect mode that should be used for a collect directory or file.
365 If possible, use the one on the file or directory, otherwise take from collect section.
366 @param config: Config object.
367 @param item: C{CollectFile} or C{CollectDir} object
368 @return: Collect mode to use.
369 """
370 if item.collectMode is None:
371 collectMode = config.collect.collectMode
372 else:
373 collectMode = item.collectMode
374 logger.debug("Collect mode is [%s]", collectMode)
375 return collectMode
376
377
378
379
380
381
383 """
384 Gets the archive mode that should be used for a collect directory or file.
385 If possible, use the one on the file or directory, otherwise take from collect section.
386 @param config: Config object.
387 @param item: C{CollectFile} or C{CollectDir} object
388 @return: Archive mode to use.
389 """
390 if item.archiveMode is None:
391 archiveMode = config.collect.archiveMode
392 else:
393 archiveMode = item.archiveMode
394 logger.debug("Archive mode is [%s]", archiveMode)
395 return archiveMode
396
397
398
399
400
401
403 """
404 Gets the ignore file that should be used for a collect directory or file.
405 If possible, use the one on the file or directory, otherwise take from collect section.
406 @param config: Config object.
407 @param item: C{CollectFile} or C{CollectDir} object
408 @return: Ignore file to use.
409 """
410 if item.ignoreFile is None:
411 ignoreFile = config.collect.ignoreFile
412 else:
413 ignoreFile = item.ignoreFile
414 logger.debug("Ignore file is [%s]", ignoreFile)
415 return ignoreFile
416
417
418
419
420
421
423 """
424 Gets the link depth that should be used for a collect directory.
425 If possible, use the one on the directory, otherwise set a value of 0 (zero).
426 @param item: C{CollectDir} object
427 @return: Link depth to use.
428 """
429 if item.linkDepth is None:
430 linkDepth = 0
431 else:
432 linkDepth = item.linkDepth
433 logger.debug("Link depth is [%d]", linkDepth)
434 return linkDepth
435
436
437
438
439
440
442 """
443 Gets the dereference flag that should be used for a collect directory.
444 If possible, use the one on the directory, otherwise set a value of False.
445 @param item: C{CollectDir} object
446 @return: Dereference flag to use.
447 """
448 if item.dereference is None:
449 dereference = False
450 else:
451 dereference = item.dereference
452 logger.debug("Dereference flag is [%s]", dereference)
453 return dereference
454
455
456
457
458
459
473
474
475
476
477
478
480 """
481 Gets the digest path associated with a collect directory or file.
482 @param config: Config object.
483 @param absolutePath: Absolute path to generate digest for
484 @return: Absolute path to the digest associated with the collect directory or file.
485 """
486 normalized = buildNormalizedPath(absolutePath)
487 filename = "%s.%s" % (normalized, DIGEST_EXTENSION)
488 digestPath = os.path.join(config.options.workingDir, filename)
489 logger.debug("Digest path is [%s]", digestPath)
490 return digestPath
491
492
493
494
495
496
498 """
499 Gets the tarfile path (including correct extension) associated with a collect directory.
500 @param config: Config object.
501 @param absolutePath: Absolute path to generate tarfile for
502 @param archiveMode: Archive mode to use for this tarfile.
503 @return: Absolute path to the tarfile associated with the collect directory.
504 """
505 if archiveMode == 'tar':
506 extension = "tar"
507 elif archiveMode == 'targz':
508 extension = "tar.gz"
509 elif archiveMode == 'tarbz2':
510 extension = "tar.bz2"
511 normalized = buildNormalizedPath(absolutePath)
512 filename = "%s.%s" % (normalized, extension)
513 tarfilePath = os.path.join(config.collect.targetDir, filename)
514 logger.debug("Tarfile path is [%s]", tarfilePath)
515 return tarfilePath
516
517
518
519
520
521
556