Coverage for ion/services/coi/datastore_bootstrap/dataset_bootstrap : 59.93%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
|
#!/usr/bin/env python
@file ion/res/config.py @author David Stuebe @author Tim LaRocque
Sample Dataset are configure and loaded like so: 'ion.services.coi.datastore_bootstrap.ion_preload_config':{ # Path to files relative to ioncore-python directory! # Get files from: http://ooici.net/ion_data/ 'sample_traj_dataset' : '../../ion_data/SOS_Test.tar.gz', 'sample_station_dataset' : '../../ion_data/USGS_Test.tar.gz' },
"""
# Create CDM Type Objects
""" Example file: ion/services/coi/SOS_Test.arr This method loads data from byte array files on disk - structure container GPB's or tgz of the same... """
if filename.endswith('.tar.gz') or filename.endswith('.tgz'):
result = read_ooicdm_tar_file(instance, filename)
else: result = read_ooicdm_file(instance, filename)
log.debug('Bootstraping dataset from local byte array complete: "%s"' % filename)
return result
f = None try:
# Get an absolute path to the file filename = pu.get_ion_path(filename)
f = open(filename, 'r') result = True
except IOError, e: log.error('dataset_bootstrap.bootstrap_byte_array_dataset(): Could not open the given filepath "%s" for read access: %s' % (filename, str(e)))
if f is not None: head_elm, obj_dict = codec._unpack_container(f.read()) instance.Repository.index_hash.update(obj_dict)
root_obj = instance.Repository._load_element(head_elm)
instance.Repository.load_links(root_obj)
if root_obj.ObjectType == ION_MSG_TYPE: dataset = root_obj.message_object else: dataset = root_obj
instance.root_group = dataset.root_group
f.close()
return result
f = None tar = None result = False try:
# Get an absolute path to the file filename = pu.get_ion_path(filename)
log.debug('Untaring file...') tar = tarfile.open(filename, 'r')
#f = tar.extractfile(tar.next())
except IOError, e: log.error('dataset_bootstrap.bootstrap_byte_array_dataset(): Could not open the given filepath "%s" for read access: %s' % (filename, str(e)))
except ExtractError, e: log.error('dataset_bootstrap.bootstrap_byte_array_dataset(): Could not read from zipped tar filepath "%s", Extract error: %s' % (filename, str(e)))
if tar is None: return False
vars=[] root_obj = None for name in tar.getnames():
try: f = tar.extractfile(tar.getmember(name)) except ExtractError, e: log.error('dataset_bootstrap.bootstrap_byte_array_dataset(): Could not extract from zipped tar filepath "%s", Extract error: %s' % (filename, str(e))) return False
head_elm, obj_dict = codec._unpack_container(f.read()) instance.Repository.index_hash.update(obj_dict)
f.close()
head_obj = instance.Repository._load_element(head_elm)
# Get rid of the ION Message object if present... if head_obj.ObjectType == ION_MSG_TYPE: head_obj = head_obj.message_object
if head_obj.ObjectType == DATASET_TYPE: root_obj = head_obj else: vars.append(head_obj)
group = root_obj.root_group
instance.root_group = group
instance.Repository.load_links(group)
# Clear any bounded arrays which are empty. Create content field if it is not present for var in group.variables:
if var.IsFieldSet('content'):
content = var.content
if len(content.bounded_arrays) > 0:
i =0 while i < len(content.bounded_arrays):
ba = content.bounded_arrays[i]
if not ba.IsFieldSet('ndarray'): del content.bounded_arrays[i]
continue else: i += 1
else: var.content = instance.CreateObject(ARRAY_STRUCTURE_TYPE)
# Now add any bounded arrays that we need.... for var_container in vars:
if var_container.ObjectType != SUPPLEMENT_MSG_TYPE: raise IOError('Invalid variable supplement component found in the tar file dataset - "%s"' % filename)
#print 'Tar Content: \n',var_container.PPrint()
ba = var_container.bounded_array
log.debug('Adding content to variable name: %s' % var_container.variable_name) try: var = group.FindVariableByName(var_container.variable_name) except gpb_wrapper.OOIObjectError, oe: log.error(str(oe)) raise IOError('Expected variable name %s not found in tar file dataset - "%s"' % (var_container.variable_name, filename))
ba_link = var.content.bounded_arrays.add() ba_link.SetLink(ba)
result = True
tar.close()
#print 'Complete Group:',group.PPrint()
return result
""" Pass in a link from the resource object which is created in the initialization of the datastore This method constructs a dataset manually! """ # Attach the root group
raise ValueError('Cannot use argument "supplement_overlap_count" without specifying "supplement_number')
raise ValueError('Argument supplement_overlap_count (%i) is too large for the given supplement_number (%i)' % (supplement_overlap_count, supplement_number))
# Create all dimension and variable objects # Note: CDM variables such as scalars, coordinate variables and data are all represented by # the variable object type. Signifying the difference between these types is done # simply by the conventions used in implementing variable objects. Some noteable # fields of the variable object are the 'shape' field and the 'content' field. The # 'shape field is used for defining the dimensionality of the variable and is defined # as a repeated field so that it can support multi-dimensional variables. The 'content' # field can be filled with a Bounded Array, a Structure or a Sequence with the same rank # and length as the dimension objects stored in the variables shape field. # # See: http://oceanobservatories.org/spaces/display/CIDev/DM+CDM # # Scalars: # Scalar variables such as 'station ID' in the example below, are not associated with a # dimension and therefore do NOT contain an entry for their shape field. Also, the # BoundedArray which contains the station ID's content contains only a single value. # # Coordinate Variables: # Coordinate variables are those which contain an array of values upon which other variables # are dependent on. An example of this is the 'time' variable. Data variables such as # salinity are dependent on the dimension of time. Coordinate variables are represented # by constructing a dimension object for that coordinate and also creating a variable object # to store the values of that dimension. Once this is done, dependet data variables can # define their shape with the aforementioned dimension object as well. # # Data Variables: # Data variables are the most straight-forward types to implement. The following example # should explain all that is needed to use these types.
# Assign required field values (name, length, datatype, etc) #-----------------------------------------------------------
# Construct the Coordinate Variables: time and depth #------------------------------------------------------ # Add dimensionality (shape) # Add attributes (CDM conventions require certain attributes!) # Add data values
start_time = 1280102000 + int(round(random.random()* 360000))
else: # 2010-07-26T00:02:00Z
# Construct the Scalar Variables: lat, lon and station id #------------------------------------------------------------ # Add dimensionality (shape) # !! scalars DO NOT specify dimensions !! # Add attributes (CDM conventions require certain attributes!) # Add data values
else:
# Construct the Data Variable: salinity #----------------------------------------------------------- # Add dimensionality (shape) # Add attributes (CDM conventions require certain attributes!) # Add data values
else: # Produce values in the sequence: [29.00, 29.03, 28.97, 29.06, 28.94, ...] (calculated by 'base' +0.03, -0.06, +0.09, -0.12...) # TODO: Use this calculation to determine the min number for supplement_overlap_count at the begining of this method
# values used to be: [29.82, 29.74, 29.85, 30.14, 30.53, 30.85]
# Attach variable and dimension objects to the root group #--------------------------------------------------------
# Create and Attach global attributes to the root group #--------------------------------------------------------
# Don't use internal time tool - we don't want fractional seconds or 'T' #stime = IonTime(start_time * 1000) #etime = IonTime(end_time * 1000)
''' Helper method to create string attributes for variables and dataset groups '''
''' Helper method to add string attributes to variable instances '''
#---------------------------------------------# # Create the corresponding datasource objects # #---------------------------------------------#
#--------------------------------------------# # Create the corresponding datasource object # #--------------------------------------------#
# Just create it - the workbench/datastore will take care of the rest!
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: NDBC SOS Glider data
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.SOS datasource.property.append('salinity') datasource.station_id.append('48900') datasource.request_type = datasource.RequestType.NONE
datasource.base_url = "http://sdf.ndbc.noaa.gov/sos/server.php?"
datasource.max_ingest_millis = 10000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "Traj Data Source" datasource.ion_description = "Data Traj"
datasource.is_public = True
return True
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: USGS waterservices
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.USGS datasource.property.append('00010') datasource.property.append('00060') datasource.station_id.append('01463500') datasource.request_type = datasource.RequestType.NONE # *not used*
datasource.base_url = "http://waterservices.usgs.gov/nwis/iv?"
datasource.max_ingest_millis = 6000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "Station Data Source" datasource.ion_description = "Data Station"
datasource.is_public = True
return True
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: USGS waterservices
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.NETCDF_S datasource.request_type = datasource.RequestType.FTP datasource.base_url = "ftp://ftp7300.nrlssc.navy.mil/pub/smedstad/ROMS/" datasource.max_ingest_millis = 6000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "HyCom Data Source" datasource.ion_description = "Data HyCom"
# Add Search Pattern parameters search_pattern = datasource.Repository.create_object(SEARCH_PATTERN_TYPE)
search_pattern.dir_pattern = "%yyyy%/%DDD%/" search_pattern.file_pattern = "%yyyy%%MM%%dd%-MODIS_A-JPL-L2P-A%yyyy%%DDD%%HH%%mm%%ss%\\.L2_LAC_GHRSST_[a-zA-Z]-v01\\.nc\\.bz2" search_pattern.join_name = "time"
datasource.search_pattern = search_pattern
datasource.is_public = True
return True
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: USGS waterservices
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.NETCDF_S datasource.request_type = datasource.RequestType.DAP
datasource.base_url = "http://geoport.whoi.edu/thredds/dodsC/usgs/data0/rsignell/data/oceansites/OS_NTAS_2010_R_M-1.nc"
datasource.max_ingest_millis = 6000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "NTAS1 Data Source" datasource.ion_description = "Data NTAS1"
datasource.is_public = False
return True
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: USGS waterservices
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.NETCDF_S datasource.request_type = datasource.RequestType.DAP
datasource.base_url = "http://geoport.whoi.edu/thredds/dodsC/usgs/data0/rsignell/data/oceansites/OS_NTAS_2010_R_M-2.nc"
datasource.max_ingest_millis = 6000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "NTAS2 Data Source" datasource.ion_description = "Data NTAS2"
datasource.is_public = True
return True
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: USGS waterservices
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.NETCDF_S datasource.request_type = datasource.RequestType.DAP
datasource.base_url = "http://geoport.whoi.edu/thredds/dodsC/usgs/data0/rsignell/data/oceansites/OS_WHOTS_2010_R_M-1.nc"
datasource.max_ingest_millis = 6000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "WHOTS1 Data Source" datasource.ion_description = "Data WHOTS1"
datasource.is_public = False
return True
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: USGS waterservices
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.NETCDF_S datasource.request_type = datasource.RequestType.DAP
datasource.base_url = "http://geoport.whoi.edu/thredds/dodsC/usgs/data0/rsignell/data/oceansites/OS_WHOTS_2010_R_M-1.nc"
datasource.max_ingest_millis = 6000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "WHOTS2 Data Source" datasource.ion_description = "Data WHOTS2"
datasource.is_public = True
return True
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: USGS waterservices
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.USGS datasource.property.extend(['00010', '00060', '00065', '00045', '00095']) datasource.station_id.append('212359157502601') datasource.request_type = datasource.RequestType.NONE # *not used*
datasource.base_url = "http://waterservices.usgs.gov/nwis/iv?"
datasource.max_ingest_millis = 6000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "Moana Loa Data Source" datasource.ion_description = "Data from Moana Loa"
datasource.is_public = True
return True
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: USGS waterservices
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.USGS datasource.property.extend(['00010', '00060', '00065', '00045', '00095']) datasource.station_id.append('01491000') datasource.request_type = datasource.RequestType.NONE # *not used*
datasource.base_url = "http://waterservices.usgs.gov/nwis/iv?"
datasource.max_ingest_millis = 6000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "Choptank River Data Source" datasource.ion_description = "Data from Choptank River"
datasource.is_public = False
return True
#-------------------------------------------# # Create the corresponding datasource object # #-------------------------------------------# # Datasource: USGS waterservices
# Abort if the dataset does not exist
has_a_id = kwargs.get('has_a_id') has_a = ds_svc.workbench.get_repository(has_a_id)
datasource.Repository.commit('Commit source before creating association')
# Just create it - the workbench/datastore will take care of the rest! asssociation = ds_svc.workbench.create_association(datasource, has_a, dataset)
datasource.source_type = datasource.SourceType.USGS datasource.property.extend(['00010', '00060', '00065', '00045', '00095']) datasource.station_id.append('01184000') datasource.request_type = datasource.RequestType.NONE # *not used*
datasource.base_url = "http://waterservices.usgs.gov/nwis/iv?"
datasource.max_ingest_millis = 6000
datasource.registration_datetime_millis = IonTime().time_ms
datasource.ion_title = "Connecticut River Data Source" datasource.ion_description = "Data from Connecticut River"
datasource.is_public = False
return True
|