Package pygccxml :: Package parser :: Module directory_cache

Source Code for Module pygccxml.parser.directory_cache

  1  # Copyright 2004-2008 Roman Yakovenko. 
  2  # Distributed under the Boost Software License, Version 1.0. (See 
  3  # accompanying file LICENSE_1_0.txt or copy at 
  4  # http://www.boost.org/LICENSE_1_0.txt) 
  5  # 
  6  # The initial version of the directory_cache_t class was written 
  7  # by Matthias Baas (baas@ira.uka.de). 
  8   
  9  """Directory cache implementation. 
 10   
 11  This module contains the implementation of a cache that uses individual 
 12  files stored in a dedicated cache directory to store the cached contents. 
 13  The cache class is L{directory_cache_t} which can be passed to the C{cache} 
 14  argument of the L{parse()} function. 
 15  """ 
 16   
 17  import os, os.path, gzip, md5 
 18  import cPickle 
 19  import declarations_cache 
 20   
21 -class index_entry_t:
22 """Entry of the index table in the directory cache index. 23 24 Each cached header file (i.e. each *.cache file) has a corresponding 25 index_entry_t object. This object is used to determine whether the 26 cache file with the declarations is still valid or not. 27 28 This class is a helper class for the directory_cache_t class. 29 """ 30
31 - def __init__( self, filesigs, configsig ):
32 """Constructor. 33 34 filesigs is a list of tuples (fileid, sig)... 35 configsig is the signature of the configuration object. 36 """ 37 self.filesigs = filesigs 38 self.configsig = configsig
39
40 - def __getstate__(self):
41 return (self.filesigs, self.configsig)
42
43 - def __setstate__(self, state):
44 self.filesigs, self.configsig = state
45 46
47 -class directory_cache_t ( declarations_cache.cache_base_t ):
48 """Cache class that stores its data as multiple files inside a directory. 49 50 The cache stores one index file called "index.dat" which is always 51 read by the cache when the cache object is created. Each header file 52 will have its corresponding *.cache file that stores the declarations 53 found in the header file. The index file is used to determine whether 54 a *.cache file is still valid or not (by checking if one of the dependent 55 files (i.e. the header file itself and all included files) have been 56 modified since the last run). 57 """ 58
59 - def __init__( self, dir="cache", compression=False, md5_sigs=True ):
60 """Constructor. 61 62 dir is the cache directory (it is created if it does not exist). 63 If compression is set to True the cache files will be compressed 64 using gzip. 65 md5_sigs determines whether file modifications is checked by 66 computing a md5 digest or by checking the modification date. 67 """ 68 declarations_cache.cache_base_t.__init__(self) 69 70 # Cache directory 71 self.__dir = os.path.abspath(dir) 72 73 # Flag that determines whether the cache files will be compressed 74 self.__compression = compression 75 76 # Flag that determines whether the signature is a md5 digest or 77 # the modification time 78 # (this flag is passed to the filename_repository_t class) 79 self.__md5_sigs = md5_sigs 80 81 # Filename repository 82 self.__filename_rep = filename_repository_t(self.__md5_sigs) 83 84 # Index dictionary (Key is the value returned by _create_cache_key() 85 # (which is based on the header file name) and value is an 86 # index_entry_t object) 87 self.__index = {} 88 89 # Flag that indicates whether the index was modified 90 self.__modified_flag = False 91 92 # Check if dir refers to an existing file... 93 if os.path.isfile(self.__dir): 94 raise ValueError, "Cannot use %s as cache directory. There is already a file with that name."%self.__dir 95 96 # Load the cache or create the cache directory... 97 if os.path.isdir(self.__dir): 98 self._load() 99 else: 100 # Create the cache directory... 101 os.mkdir(self.__dir)
102
103 - def flush(self):
104 """Save the index table to disk.""" 105 106 self._save()
107 # self.__filename_rep._dump() 108
109 - def update(self, source_file, configuration, declarations, included_files):
110 """Replace a cache entry by a new value. 111 112 @param source_file: Header file name. 113 @type source_file: str 114 @param configuration: Configuration object. 115 @type configuration: L{config_t} 116 @param declarations: Declarations contained in the header file. 117 @type declarations: picklable object 118 @param included_files: Dependent files 119 @type included_files: list of str 120 """ 121 # Normlize all paths... 122 source_file = os.path.normpath(source_file) 123 included_files = map(lambda p: os.path.normpath(p), included_files) 124 125 # Create the list of dependent files. This is the included_files list 126 # + the source file. Duplicate names are removed. 127 dependent_files = {} 128 for name in [source_file]+included_files: 129 dependent_files[name] = 1 130 dependent_files = dependent_files.keys() 131 132 key = self._create_cache_key(source_file) 133 # Remove an existing entry (if there is one) 134 # After calling this method, it is guaranteed that __index[key] 135 # does not exist anymore. 136 self._remove_entry(source_file, key) 137 138 # Create a new entry... 139 140 # Create the sigs of all dependent files... 141 filesigs = [] 142 for filename in dependent_files: 143 id_,sig = self.__filename_rep.acquire_filename(filename) 144 filesigs.append((id_,sig)) 145 146 configsig = self._create_config_signature(configuration) 147 entry = index_entry_t(filesigs, configsig) 148 self.__index[key] = entry 149 self.__modified_flag = True 150 151 # Write the declarations into the cache file... 152 cachefilename = self._create_cache_filename(source_file) 153 self._write_file(cachefilename, declarations)
154 155
156 - def cached_value(self, source_file, configuration):
157 """Return the cached declarations or None. 158 159 @param source_file: Header file name 160 @type source_file: str 161 @param configuration: Configuration object 162 @type configuration: L{config_t} 163 @return: Cached declarations or None 164 """ 165 166 # Check if the cache contains an entry for source_file 167 key = self._create_cache_key(source_file) 168 entry = self.__index.get(key) 169 if entry==None: 170 # print "CACHE: %s: Not cached"%source_file 171 return None 172 173 # Check if the entry is still valid. It is not valid if: 174 # - the source_file has been updated 175 # - the configuration object has changed (i.e. the header is parsed 176 # by gccxml with different settings which may influence the 177 # declarations) 178 # - the included files have been updated 179 # (this list is part of the cache entry as it cannot be known 180 # by the caller when cached_value() is called. It was instead 181 # passed to update()) 182 183 # Check if the config is different... 184 configsig = self._create_config_signature(configuration) 185 if configsig!=entry.configsig: 186 # print "CACHE: %s: Config mismatch"%source_file 187 return None 188 189 # Check if any of the dependent files has been modified... 190 for id_, sig in entry.filesigs: 191 if self.__filename_rep.is_file_modified(id_, sig): 192 # print "CACHE: %s: Entry not up to date"%source_file 193 return None 194 195 # Load and return the cached declarations 196 cachefilename = self._create_cache_filename(source_file) 197 decls = self._read_file(cachefilename) 198 199 # print "CACHE: Using cached decls for",source_file 200 return decls
201
202 - def _load(self):
203 """Load the cache. 204 205 Loads the file index.dat which contains the index table and 206 the file name repository. 207 208 This method is called by the constructor. 209 """ 210 211 indexfilename = os.path.join(self.__dir, "index.dat") 212 if os.path.exists(indexfilename): 213 data = self._read_file(indexfilename) 214 self.__index = data[0] 215 self.__filename_rep = data[1] 216 if self.__filename_rep._md5_sigs!=self.__md5_sigs: 217 print "CACHE: Warning: md5_sigs stored in the cache is set to %s."%self.__filename_rep._md5_sigs 218 print " Please remove the cache to change this setting." 219 self.__md5_sigs = self.__filename_rep._md5_sigs 220 else: 221 self.__index = {} 222 self.__filename_rep = filename_repository_t(self.__md5_sigs) 223 224 self.__modified_flag = False
225
226 - def _save(self):
227 """Save the cache index if it was modified. 228 229 Saves the index table and the file name repository in the file 230 index.dat. 231 """ 232 if self.__modified_flag: 233 self.__filename_rep.update_id_counter() 234 indexfilename = os.path.join(self.__dir, "index.dat") 235 self._write_file(indexfilename, (self.__index,self.__filename_rep)) 236 self.__modified_flag = False
237
238 - def _read_file(self, filename):
239 """Read a Python object from a cache file. 240 241 Reads a pickled object from disk and returns it. 242 243 @param filename: Name of the file that should be read. 244 @type filename: str 245 @returns: Unpickled file contents 246 """ 247 if self.__compression: 248 f = gzip.GzipFile(filename, "rb") 249 else: 250 f = file(filename, "rb") 251 res = cPickle.load(f) 252 f.close() 253 return res
254
255 - def _write_file(self, filename, data):
256 """Write a data item into a file. 257 258 The data object is written to a file using the pickle mechanism. 259 260 @param filename: Output file name 261 @type filename: str 262 @param data: A Python object that will be pickled 263 @type data: picklable object 264 """ 265 if self.__compression: 266 f = gzip.GzipFile(filename, "wb") 267 else: 268 f = file(filename, "wb") 269 cPickle.dump(data, f, cPickle.HIGHEST_PROTOCOL) 270 f.close()
271
272 - def _remove_entry(self, source_file, key):
273 """Remove an entry from the cache. 274 275 source_file is the name of the header and key is its corresponding 276 cache key (obtained by a call to L{_create_cache_key()}). 277 The entry is removed from the index table, any referenced file 278 name is released and the cache file is deleted. 279 280 If key references a non-existing entry, the method returns 281 immediately. 282 283 @param source_file: Header file name 284 @type source_file: str 285 @param key: Key value for the specified header file 286 @type key: hashable object 287 """ 288 289 entry = self.__index.get(key) 290 if entry==None: 291 return 292 293 # Release the referenced files... 294 for id_, sig in entry.filesigs: 295 self.__filename_rep.release_filename(id_) 296 297 # Remove the cache entry... 298 del self.__index[key] 299 self.__modified_flag = True 300 301 # Delete the corresponding cache file... 302 cachefilename = self._create_cache_filename(source_file) 303 try: 304 os.remove(cachefilename) 305 except OSError, e: 306 print "Could not remove cache file (%s)"%e
307 308
309 - def _create_cache_key(self, source_file):
310 """Return the cache key for a header file. 311 312 @param source_file: Header file name 313 @type source_file: str 314 @returns: Key for the given header file 315 @rtype: str 316 """ 317 path, name = os.path.split(source_file) 318 return name+str(hash(path))
319
320 - def _create_cache_filename(self, source_file):
321 """Return the cache file name for a header file. 322 323 @param source_file: Header file name 324 @type source_file: str 325 @returns: Cache file name (*.cache) 326 @rtype: str 327 """ 328 res = self._create_cache_key(source_file)+".cache" 329 return os.path.join(self.__dir, res)
330
331 - def _create_config_signature(self, config):
332 """Return the signature for a config object. 333 334 The signature is computed as md5 digest of the contents of 335 working_directory, include_paths, define_symbols and 336 undefine_symbols. 337 338 @param config: Configuration object 339 @type config: L{config_t} 340 @returns: Signature 341 @rtype: str 342 """ 343 m = md5.new() 344 m.update(config.working_directory) 345 map(lambda p: m.update(p), config.include_paths) 346 map(lambda p: m.update(p), config.define_symbols) 347 map(lambda p: m.update(p), config.undefine_symbols) 348 map(lambda p: m.update(p), config.cflags) 349 return m.digest()
350 351 352 353
354 -class filename_entry_t:
355 """This is a record stored in the filename_repository_t class. 356 357 The class is an internal class used in the implementation of the 358 filename_repository_t class and it just serves as a container for 359 the file name and the reference count. 360 """ 361
362 - def __init__( self, filename ):
363 """Constructor. 364 365 The reference count is initially set to 0. 366 """ 367 # Filename 368 self.filename = filename 369 # Reference count 370 self.refcount = 0 371 372 # Cached signature value for the file. 373 # If sig_valid flag is False, the signature still has to be computed, 374 # otherwise the cached value can be used. 375 # These attributes must not be pickled! 376 self.sig_valid = False 377 self.signature = None
378
379 - def __getstate__(self):
380 # Only pickle filename and refcount 381 return (self.filename, self.refcount)
382
383 - def __setstate__(self, state):
384 self.filename, self.refcount = state 385 self.sig_valid = False 386 self.signature = None
387
388 - def inc_ref_count(self):
389 """Increase the reference count by 1.""" 390 self.refcount += 1
391
392 - def dec_ref_count(self):
393 """Decrease the reference count by 1 and return the new count.""" 394 self.refcount -= 1 395 return self.refcount
396 397
398 -class filename_repository_t:
399 """File name repository. 400 401 This class stores file names and can check whether a file has been 402 modified or not since a previous call. 403 A file name is stored by calling acquire_filename() which returns 404 an ID and a signature of the file. The signature can later be used 405 to check if the file was modified by calling is_file_modified(). 406 If the file name is no longer required release_filename() should be 407 called so that the entry can be removed from the repository. 408 """ 409
410 - def __init__( self, md5_sigs ):
411 """Constructor. 412 """ 413 414 # Flag that determines whether the signature is a md5 digest or 415 # the modification time 416 # (this flag is passed to the filename_repository_t class) 417 self._md5_sigs = md5_sigs 418 419 # ID lookup table (key: filename / value: id_) 420 self.__id_lut = {} 421 422 # Entry dictionary (key: id_ / value: filename_entry_t) 423 # This dictionary contains the actual data. 424 # It must always hold that each entry in __entries has a corresponding 425 # entry in __id_lut (i.e. the keys in __id_lut must be the names 426 # stored in __entries) 427 self.__entries = {} 428 429 # A counter for new ids 430 self.__next_id = 1
431
432 - def acquire_filename(self, name):
433 """Acquire a file name and return its id and its signature. 434 """ 435 id_ = self.__id_lut.get(name) 436 # Is this a new entry? 437 if id_==None: 438 # then create one... 439 id_ = self.__next_id 440 self.__next_id += 1 441 self.__id_lut[name] = id_ 442 entry = filename_entry_t(name) 443 self.__entries[id_] = entry 444 else: 445 # otherwise obtain the entry... 446 entry = self.__entries[id_] 447 448 entry.inc_ref_count() 449 return id_, self._get_signature(entry)
450
451 - def release_filename(self, id_):
452 """Release a file name. 453 """ 454 entry = self.__entries.get(id_) 455 if entry==None: 456 raise ValueError, "Invalid filename id (%d)"%id_ 457 458 # Decrease reference count and check if the entry has to be removed... 459 if entry.dec_ref_count()==0: 460 del self.__entries[id_] 461 del self.__id_lut[entry.filename]
462
463 - def is_file_modified(self, id_, signature):
464 """Check if the file referred to by id_ has been modified. 465 """ 466 entry = self.__entries.get(id_) 467 if entry==None: 468 raise ValueError, "Invalid filename id_ (%d)"%id_ 469 470 # Is the signature already known? 471 if entry.sig_valid: 472 # use the cached signature 473 filesig = entry.signature 474 else: 475 # compute the signature and store it 476 filesig = self._get_signature(entry) 477 entry.signature = filesig 478 entry.sig_valid = True 479 480 return filesig!=signature
481
482 - def update_id_counter(self):
483 """Update the id_ counter so that it doesn't grow forever. 484 """ 485 if len(self.__entries)==0: 486 self.__next_id = 1 487 else: 488 self.__next_id = max(self.__entries.keys())+1
489
490 - def _get_signature(self, entry):
491 """Return the signature of the file stored in entry. 492 """ 493 if self._md5_sigs: 494 # return md5 digest of the file content... 495 if not os.path.exists(entry.filename): 496 return None 497 try: 498 f = file(entry.filename) 499 except IOError, e: 500 print "Cannot determine md5 digest:",e 501 return None 502 data = f.read() 503 f.close() 504 return md5.new(data).digest() 505 else: 506 # return file modification date... 507 try: 508 return os.path.getmtime(entry.filename) 509 except OSError, e: 510 return None
511
512 - def _dump(self):
513 """Dump contents for debugging/testing. 514 """ 515 516 print 70*"-" 517 print "ID lookup table:" 518 for name in self.__id_lut: 519 id_ = self.__id_lut[name] 520 print " %s -> %d"%(name, id_) 521 522 print 70*"-" 523 print "%-4s %-60s %s"%("ID", "Filename", "Refcount") 524 print 70*"-" 525 for id_ in self.__entries: 526 entry = self.__entries[id_] 527 print "%04d %-60s %d"%(id_, entry.filename, entry.refcount)
528