Coverage for ion/core/object/repository : 80.98%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
|
#!/usr/bin/env python
@file ion/core/object/repository.py @brief Repository for managing data structures @author David Stuebe @author Matt Rodriguez
TODO Refactor Merge to use a proxy repository for the readonly objects - they must live in a seperate workspace.
"""
""" An exception class for errors in the object management repository """
""" A dictionary class to contain the objects owned by a repository. All repository objects are accessible by other repositories via the workbench which maintains a cache of all the local objects. Clean up is the responsibility of each repository. """
# add everything currently in self to the cache!
# You get it - you own it! # If it does not raise a KeyError - add it else: raise KeyError('Key not found in index hash!')
""" D.copy() -> a shallow copy of D """ raise NotImplementedError('IndexHash does not support copy')
""" dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v. v defaults to None. """ raise NotImplementedError('IndexHash does not support fromkeys')
""" Get Item from the Index Hash"""
# You get it - you own it!
else:
""" Check to see if the Key exists """ else:
""" D.update(E, **F) -> None. Update D from E and F: for k in E: D[k] = E[k] (if E has keys else: for (k, v) in E: D[k] = v) then: for k in F: D[k] = F[k] """
# For now - don't bother parsing args just recount
""" Base class for the repository and merge container """
""" A dictionary containing objects which are not yet indexed, linked by a counter refrence in the current workspace """
""" Pointer to the current root object in the workspace """
""" All content elements are stored here - from incoming messages and new commits - everything goes here. Now it can be decoded for a checkout or sent in a message. """
""" Required for get_linked_object """
""" Need for access to sending messages! """
""" The upstream source of this repository. """
The list of currently excluded object types """
def root_object(self):
""" Since str is used in debugging it should never return an exception - no matter the state of the repository object """ output = '============== %s ==============\n' % self.__class__.__name__
output += 'Number of current workspace objects: %d \n' % len(self._workspace) output += 'Number of current index hash objects: %d \n' % len(self.index_hash) output += 'Excluded types:\n'
try: for type in self.excluded_types: output += str(type) except TypeError, te: output += te
output += '============== Root Object ==============\n' output += str(self._workspace_root) + '\n' output += '============ End Resource ============\n' return output
""" Clear the repository in preparation for python garbage collection """ # Do some clean up! for item in self._workspace.itervalues(): item.Invalidate()
self._workspace.clear() self.index_hash.clear() self._workspace_root = None
self._process = None self.upstream = None
""" Factory method for making wrapped GPB objects from the repository """
raise RepositoryError('Illegal argument type in get_linked_object.')
# Make sure the parent is set... #self.set_linked_object(link, obj)
# make sure the parent is set...
# @TODO - is this safe? Can the k/v be GC'd in the weakref dict inbetween haskey and get?
else:
link.type.version == element.type.version: #@TODO Consider removing this somewhat costly check... raise RepositoryError('The link type does not match the element type found!')
# For objects loaded from the hash the parent child relationship must be set #self.set_linked_object(link, obj)
# if the link is a commit but the linked object is not then it is a root object # The default for a root object should be ReadOnly = False
else: # When getting an object from it's parent, use the parents readonly setting
""" Load the child objects into the work space recursively """
# Get the link to the root object
# Catch only KeyErrors!
# Load the object structure
def _checkout_remote_commit(self, commit, excluded_types): """ Use the message client from the process to create a message Can not use the datastore client because of import problems... """
raise RepositoryError('Can not make a non-local checkout without a process!')
except ReceivedApplicationError, rae: log.error(rae) raise RepositoryError('Received application error during remote checkout operation!', 404) except ReceivedContainerError, rce: log.error(rce) raise RepositoryError('Received container error during remote checkout operation!', 404)
#print element
""" @brief Checkout_commit will checkout the content of a commit. It will attempt to get the content from the local repository. Failing that it will attempt to get it from a remote process. @param commit is the commit object to checkout @param excluded_types is a list of type objects to exclude while checking out the structure @retval the result maybe deferred as a result of the non local checkout. yielding will return the root_object. """
# Set the excluded types used in checking out this repository
raise RepositoryError('Invalid argument excluded_types in checkout_commit: must be a list of object types')
# This is a deferred!
# check that the calculated value in element.sha1 matches the stored value #@TODO Consider removing this somewhat costly check - it is already done when an object is read from a message raise RepositoryError('The sha1 key does not match the value. The data is corrupted! \n' +\ 'Element key %s, Calculated key %s' % (object_utils.sha1_to_hex(element.key), object_utils.sha1_to_hex(element.sha1)))
# Do not automatically load it into a particular space...
# If it is a leaf element set the bytes for the object, do not load it # If it is not a leaf element load it and find its child links
else:
# Make a note in the element of the child links as well!
#self.status is a property determined by the workspace root object status
""" A counter object used by this class to identify content objects untill they are indexed """
""" The current branch object of the mutable head Branch names are generallly nonsense (uuid or some such) """
""" Nick names for the branches of this repository - these are purely local! """
""" When merging a repository state there are multiple Merge Repository objects which hold the state of the object which is being merged. Access objects using: repo.merge[ind].<field in the root object> """
""" A place to stash the work space under a saved name. """
raise RepositoryError('Invalid argument type to set the persistent property of a repository') """ Set the persistence of this repository. Any repository which is declared persistent will not be GC'd until the persistent setting is changed to false """
raise RepositoryError('Invalid argument type to set the cached property of a repository') """ Set the cached property of this repository. Any repository which is declared cached will not be until the cache memory size of the workbench has been exceeded. """
""" This context object is used to determine when a repository should be moved from level 1 persistent caching to level 2 LRU caching in the workbench. """
### Structures for managing associations to a repository:
# New method to setup the mutable head of the repository
else:
A specially wrapped Mutable GPBObject which tracks branches and commits It is not 'stored' in the index - it lives in the repository """
""" Since str is used in debugging it should never return an exception - no matter the state of the repository object """
else:
output += 'Repository is currently merging %d state(s)! \n' % len(self.merge)
else:
except TypeError, te: output += te
for root in self.merge:
output += '============== Merge Root Object ==============\n' output += str(root) + '\n'
return False
#print self._dotgit #print other._dotgit
def Repository(self): """ Convience method to which is available for any object client or the repository of the object itself """
def repository_key(self):
raise RepositoryError('Invalid argument type to set the persistent property of a repository')
raise RepositoryError('Invalid argument type to set the cached property of a repository')
raise RepositoryError('Can not set the root object of the repository to a value which is not an instance of Wrapper')
""" Treat the index hash of serialized content as the relevant size of the repository for caching """
pass
""" Clear the repository in preparation for python garbage collection """
# Do some clean up! #print 'ITEM',item #print 'ITEM',item
for mr in self.merge: mr.clear()
""" Fill in a IDREF Object using the current state of the repository """ # Don't worry about type checking here.... will cause an attribute error if incorrect
except IndexError, ie: log.error(ie) raise RepositoryError('Can not create repository reference: no commits on the current branch!')
def branches(self): """ Convience method to access the branches from the mutable head (dotgit object) """
def commit_head(self): """ Convenience method to access the current commit """
raise RepositoryError('No current branch in the repository. Must checkout first.')
elif len(self._current_branch.commitrefs) == 0: return None else: raise RepositoryError('Branch should merge on read. Invalid state with more than one commit at the head of a branch!')
""" Convenience method to get a list of the current head commits """
# Do not error on multiple commit refs!
log.warn('This repository is currently a detached head. The current commit is not at the head of a branch.')
raise RepositoryError('No current branch in the repository. Must checkout first.')
""" @brief Create a new branch from the current commit and switch the workspace to the new branch. """ ## Need to check and then clear the workspace??? #if not self.status == self.UPTODATE: # raise Exception, 'Can not create new branch while the workspace is dirty'
# Unless this is an uninitialized repository it is an error to create # a new branch from one which has no commits yet...
# Generate a short random string for the branch name
raise RepositoryError('That branch nickname is already in use.')
# Get the linked commit
raise RepositoryError('Branch should merge on read. Invalid state!')
# Set the new branch to point at the commit
# Making a new branch re-attaches to a head! self._workspace_root.SetStructureReadWrite() self._detached_head = False
else: # This is a new repository with no commits yet! pass
else: log.info(str(self)) raise KeyError('Branch Key not found in repository %s: Could not delete branch name "%s"' % (self.repository_key, name))
# Clean up the branch nickname if any... del self.branchnicknames[k]
else:
""" Check out a particular branch Specify a branch, a branch and commit_id or a date Branch can be either a local nick name or a global branch key """
raise RepositoryError('Invalid excluded_types argument passed to checkout')
older_than = float(older_than)
raise RepositoryError('Can not checkout while the workspace is dirty') #What to do for uninitialized?
#Declare that it is a detached head!
raise RepositoryError('Checkout called with both commit_id and older_than!')
raise RepositoryError('Checkout must specify a branchname!')
raise RepositoryError('This branch is empty - there is nothing to checkout!')
# Set the current branch now!
# IF you are checking out a specific commit ID it is always a detached head!
# Use this set to make sure we only examine each commit once!
# Empty the crefs set to exit the while loop! # Save the CRef!
# For each child reference... # If we have not already looked at this one...
else: else: raise RepositoryError('End of Ancestors: No matching reference \ found in commit history on branch name %s, \ commit_id: %s' % (branchname, commit_id))
# IF you are checking out a specific commit date it is always a detached head! detached = True
# Need to make sure we get the closest commit to the older_than date! younger_than = -9999.99
# Use this set to make sure we only examine each commit once! touched_refs = set()
crefs = branch.commitrefs[:]
while len(crefs) >0:
new_set = set()
for ref in crefs:
if ref.date <= older_than & ref.date > younger_than: cref = ref younger_than = ref.date
# Only keep looking at parent references if they are to young elif ref.date > older_than: # For each child reference... for pref in ref.parentrefs: # If we have not already looked at this one... if not pref in touched_refs: new_set.add(pref) touched_refs.add(pref)
crefs = new_set
else: if not cref: raise RepositoryError('End of Ancestors: No matching commit \ found in commit history on branch name %s, \ older_than: %s' % (branch_name, older_than))
# Just checking out the current head - need to make sure it has not diverged! else:
raise RepositoryError('This branch has not commits - cant check it out!') else:
else:
# Do some clean up!
# Automatically fetch the object from the hashed dictionary
else:
# Deal with the newest ref seperately
# make a new commit ref
# Add the CRef to the hashed elements
# set the cref to be readonly
# Add the cref to the active commit objects - for convienance
# update the hashed elements
# Now look for the common ancestor
else:
except IndexError, ex: log.exception('No common ancestor found in Repository!\n%s' % str(self)) raise RepositoryError('No common ancestor found for commit ref.')
if self.status != self.MODIFIED: # What about not initialized return
if len(self._current_branch.commitrefs)==0: raise RepositoryError('This current branch is empty - there is nothing to reset too!')
cref = self._current_branch.commitrefs[0]
# Do some clean up! self.purge_workspace()
# Automatically fetch the object from the hashed dictionary or fetch if needed! rootobj = cref.objectroot self._workspace_root = rootobj
self.load_links(rootobj, self.excluded_types)
return rootobj
#@TODO consider changing this to a warning rather than an exception
# Do some clean up!
""" Commit the current workspace structure """
# If the repo is in a valid state - make the commit even if it is up to date
# Reset the commit counter - used for debuging only
# Add the CRef to the hashed elements
# set the cref to be readonly
# Add the cref to the active commit objects - for convenience
# update the hashed elements
else: raise RepositoryError('Repository in invalid state to commit')
# Like git, return the commit id
""" @brief internal method to create commit references @param comment a string that describes this commit @param date the date to associate with this commit. If not given then the current time is used. @retval a string which is the commit reference """ # Now add a Commit Ref # make a new commit ref
# If this is the first commit to a new repository the current branch is a dummy # If it is initialized it is real and we need to link to it!
# This branch is real - add it to our ancestors raise RepositoryError('The Branch is in an invalid state and should have been merged on read!') else: # This is a new repository and we must add a place for the commit ref! # Since the commit has no parents, set the root_seed
# For each branch that we merged from - add a reference
# Clear the merge root and merged from # Update the cref in the branch
""" merge the named branch in to the current branch
This method does not 'do' the merger of state. It simply adds the parent ref to the repositories merged from list!
"""
raise RepositoryError('Merging while the workspace is dirty better to make a new commit first!') #What to do for uninitialized?
raise RepositoryError('Can not merge in a repository which is not initialized (Checkout something first!)')
if commit_id == self._current_branch.commitrefs[0].MyId: raise RepositoryError('Can not merge into self!') try: crefs.append(self._commit_index[commit_id]) except KeyError, ex: raise RepositoryError('Can not merge from unknown commit_id %s' % commit_id)
raise RepositoryError('Branch Key: "%s" does not exist!' % branchname)
raise RepositoryError('Can not merge with current branch head (self into self)')
# Merge the divergent states of this branch! crefs.remove(self._current_branch.commitrefs[0])
else: # Assume we merge any and all states of this branch?
else: log.debug('''Arguments to Repository.merge - branchname: %s; commit_id: %s''' \ % (branchname, commit_id)) raise RepositoryError('merge takes either a branchname argument or a commit_id argument!')
# Create a merge container to hold the merge object state for access
def status(self): """ Check the status of the current workspace - return a status
# Be very careful with this method - it must not raise exceptions! """
except AttributeError, ae: log.error(ae) return self.INVALID
else: else:
if branchname is None: branchname = self._current_branch.branchkey
branch = self.get_branch(branchname)
if len(branch.commitrefs) is not 1: return None
cref = branch.commitrefs[0] keys=[]
while cref:
keys.append(sha1_to_hex(cref.MyId))
if cref.parentrefs: cref = cref.parentrefs[0].commitref else: cref = False return keys
""" Stash the current workspace for later reference """ raise Exception('Not implemented yet')
""" @brief CreateObject is used to make new locally create objects which can be added to the resource's data structure. @param type_id is the type_id of the object to be created @retval the new object which can now be attached to the resource """
""" This id is a purely local concern - not used outside the local scope. """
def fetch_links(self, links):
# Get the method from the process if it overrides workbench fetch_links = self._process.fetch_links
else: raise RepositoryError('The repository object has no process to send a message with. It can not get the linked objects!')
#@TODO provide catch mechanism to use the service name instead of the process name if the process does not respond...
# Load the content by the link!
''' @defer.inlineCallbacks def get_remote_linked_object(self, link):
try: obj = self.get_linked_object(link) except KeyError, ex: log.info('"get_remote_linked_object": Caught object not found:'+str(ex)) res = yield self._fetch_remote_objects([link,]) # Object is now in the hashed objects dictionary obj = self.get_linked_object(link)
defer.returnValue(obj) return
@defer.inlineCallbacks def _fetch_remote_objects(self, links):
if not self._process: raise RepositoryError('Linked Object not found and repository has no process to get it with!')
if hasattr(self._process, 'fetch_links'): # Get the method from the process if it overrides workbench fetch_links = self._process.fetch_links else: fetch_links = self._process.workbench.fetch_links
#@TODO provide catch mechanism to use the service name instead of the process name if the process does not respond... elements = yield fetch_links(self.upstream, links)
self.index_hash.update(elements)
@defer.inlineCallbacks def load_remote_links(self, items): """ Load links which may require remote (deferred) access """ if not hasattr(items, '__iter__'): items = [items,]
remote_objects = [] local_objects = [] for item in items: for link in item.ChildLinks: try: child = self.get_linked_object(link) local_objects.append(child) except KeyError, ex: log.info('"load_remote_links": Caught object not found:'+str(ex)) remote_objects.append(link)
if remote_objects: res = yield self._fetch_remote_objects(remote_objects) local_objects.append(link.Root) # Rerun Load_remote_links after getting the child objects
if len(local_objects) >0: res = yield self.load_remote_links(local_objects) else: res = True
defer.returnValue(res) '''
""" Copy an object. This method will serialize the current state of the value. Then read it back in as new objects in the repository. The copies will all be created in a modified state. Copy can move from one repository to another. The deep_copy parameter determines whether all child objects are also copied. """
raise RepositoryError('Can not copy an object which is not an instance of Wrapper')
# @TODO provide for transfer by serialization and re instantiation raise RepositoryError('You can not copy only part of a gpb composite, only the root!')
# Deal with the case where this serialization causes a hash conflict...
raise RepositoryError('Could not get element from the index hash during copy.')
# Deep copy from the original! # Use the copies link to get the child - possibly from a different repo!
except KeyError, ke: if ignore_copy_errors: log.debug("Copy Object: ignored unfound child link %s" % link) else: # reraise raise ke
# If it is a link - set a link to the value in the wrapper # Should never happen - checked in the caller... raise RepositoryError('Can not set a composite field unless it is of type Link')
# @TODO provide for transfer by serialization and re instantiation raise RepositoryError('You can not set a link equal to part of a gpb composite, only the root!')
# if this value is from another repository... you need to load it from the hashed objects into this repository
# Add the new link to the list of parents for the object value.AddParentLink(link) # Setting it again is a pass... return
# Add the new link to the list of parents for the object
# If the link is currently set
# Don't do this - read below! # If there are no parents left for the object delete it #if len(plinks)==0:
# This could lead to an invalid state #del self._workspace[link.key]
# to do this correctly make a weak reference # Remove it from the work space # del the old_obj # if it is still there in the weak ref put it back # because something is still referenceing it
# But really who cares - just leaving it hanging in the # workbench, it will be garbage collected later.
else:
#Make sure the link is in the objects set of child links
# Set the id of the linked wrapper
# Set the type #link.type = object_utils.get_type_from_obj(value)
""" Resolves a git-style treeish from the specified branch.
Treeishes have the ability to go back in commit history either linearly (parent) or along merges (2nd parent). You can use a treeish to see the state of an object previously. For example:
^^^^^ // 5 parents above the branch head ~3^~2 // 3 parents above, then one parent above, then another 2 parents above ^2~1 // 2nd parent of the branch head (of a merge commit), then 1 parent above that
Current support is for carrot-parent and tilde spec. See this document for more details: http://book.git-scm.com/4_git_treeishes.html
@param treeish A string of the treeish characters to follow. O rder is important. Point of reference is the tip of the branch's head. @param branch A branch name to use as a reference point. If not specified, master is used. @returns A commit that the treeish resolves to. If not found, a RepositoryError is raised. It is your responsibility to do something with the commit returned - typical procedure would be to call checkout and specify the commit_id parameter (as the returned object's .MyId attribute). """
# figure out additional numbers
# Tilde Spec: # The tilde spec will give you the Nth grandparent of a commit object.
curcommit = curcommit.parentrefs[0].commitref elif op == "^": # Carrot Parent: # This will give you the Nth parent of a particular commit. This format is only useful on merge # commits - commit objects that have more than one direct parent. curparents = curcommit.parentrefs if num > len(curparents): raise RepositoryError("Could not resolve treeish (%s): parent %d of commit (%s) requested, only %d present" % (origtreeish, num, sha1_to_hex(curcommit.MyId), len(curparents)))
curcommit = curparents[num-1].commitref else: raise RepositoryError("Unknown treeish char: %s (treeish: %s)" % (op, origtreeish))
log.debug("Treeish (%s) resolved to commit %s" % (origtreeish, sha1_to_hex(curcommit.MyId))) return curcommit
# The commit does not belong to the Merge container - it is an object from the repository
def load_root(self, excluded_types):
# may take a non-deferred path here
raise RepositoryError('Can not add item to MergeContainer - it is not a MergeRepository')
raise RepositoryError('Can not add item to MergeContainer - this commit is already merged')
# Add it to the list - for primary access, also add it to the dict!
root_list=[] for repo in self.merge_repos:
root_list.append(repo.root_object)
return root_list
return self._root_objects().__iter__()
return len(self.merge_repos)
|