Source code for pavics_datacatalog.wps_processes.wps_pavicsupdate

import os
import time
import traceback
import json
from pywps import Process, get_format, configuration
from pywps import LiteralInput, ComplexOutput

from pavics import catalog

# Example usage:
# localhost/pywps?service=WPS&request=execute&version=1.0.0&\
# identifier=pavicsupdate&DataInputs=source=source_string;url=url_string;\
# updates=subject:new_subject,units:m

# Still need to perhaps validate the inputs, and consider whether we want
# to do updates that involve list of entries (not tested yet)

# The user under which apache is running must be able to write to that
# directory.
json_output_path = configuration.get_config_value('server', 'outputpath')

json_format = get_format('JSON')
gmlxml_format = get_format('GML')


[docs]class PavicsUpdate(Process):
    def __init__(self):
        self.solr_server = os.environ.get('SOLR_HOST', None)
        # The combination of the 'source' and 'url' fields provide the 'id'
        # in the Solr database, they both must be provided.
        inputs = [LiteralInput('id',
                               'id field of the dataset or file',
                               abstract='id field of the dataset or file.',
                               data_type='string'),
                  LiteralInput('type',
                               'Dataset or File',
                               abstract=('The File type will update a single '
                                         'file, the Dataset type will update '
                                         'all documents sharing its '
                                         'dataset_id'),
                               data_type='string',
                               default='File',
                               min_occurs=0,
                               mode=None),
                  LiteralInput('updates',
                               'Fields to update with their new values',
                               abstract=('Format is '
                                         'key1:value1,key2:value2,...'),
                               data_type='string')]
        outputs = [ComplexOutput('update_result',
                                 'PAVICS Catalogue Update Result',
                                 abstract='Update result as a json.',
                                 supported_formats=[json_format])]
        outputs[0].as_reference = True

        super(PavicsUpdate, self).__init__(
            self._handler,
            identifier='pavicsupdate',
            title='PAVICS Catalogue Update',
            abstract=('Update database entries using key:value pairs and '
                      'identified by their ids.'),
            version='0.1',
            inputs=inputs,
            outputs=outputs,
            store_supported=True,
            status_supported=True)

    def _handler(self, request, response):
        # Get the source and url to setup the update dictionary.
        update_id = request.inputs['id'][0].data
        if 'type' in request.inputs:
            update_type = request.inputs['type'][0].data
        else:
            update_type = 'File'
        if update_type == 'File':
            update_dict = {'id': update_id}
        elif update_type == 'Dataset':
            update_dict = {'dataset_id': update_id}
        else:
            raise NotImplementedError()
        # Get updates, which are the facets to add/modify.
        data_inputs = request.inputs['updates'][0].data
        # Split using comma & colon as separator
        key_value_pairs = data_inputs.split(',')
        for key_value_pair in key_value_pairs:
            kv = key_value_pair.split(':')
            # Here, we do not use the {'set':kv[1]} syntax, it does not work
            # in birdhouse-solr. Instead it's like adding a new entry, but
            # since the source and url already exist, it will update the other
            # fields.
            update_dict.update({kv[0]: kv[1]})

        try:
            update_result = catalog.pavicsupdate(self.solr_server, update_dict)
        except:
            raise Exception(traceback.format_exc())

        # Here we construct a unique filename
        time_str = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        output_file_name = "solr_result_{0}_.json".format(time_str)
        output_file = os.path.join(json_output_path, output_file_name)
        f1 = open(output_file, 'w')
        f1.write(json.dumps(update_result))
        f1.close()
        response.outputs['update_result'].file = output_file
        response.outputs['update_result'].output_format = json_format
        return response