Source code for biopax2cadbiom.sparql_wrapper

# -*- coding: utf-8 -*-
# MIT License
#
# Copyright (c) 2017 IRISA, Jean Coquet, Pierre Vignet, Mateo Boudet
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Contributor(s): Jean Coquet, Pierre Vignet, Mateo Boudet

"""Module used to query SPARQL endpoint.

"""
from __future__ import print_function

# Standard imports
import itertools as it
try:
    from SPARQLWrapper import SPARQLWrapper, JSON
except ImportError:
    raise ImportError("SPARQLWrapper seems not to be installed. \
          Please install the module with the following command:\n \
          sudo pip install SPARQLWrapper \n \
          or \
          pip install --user SPARQLWrapper")

# Custom imports
from biopax2cadbiom import namespaces as nm
import biopax2cadbiom.commons as cm

LOGGER = cm.logger()


[docs]def auto_add_prefixes(func): """Decorator: Add all prefixes to the SPARQL query at first argument of sparql_query() """ def modified_func(*args, **kwargs): """Return modified function with prefix added on the first argument""" return func(nm.get_RDF_prefixes() + args[0], **kwargs) return modified_func
[docs]def order_results(query, orderby='?uri', limit=cm.SPARQL_LIMIT): """Build nested query for access points with restrictions. Build the nested query by encapsulating the original between the same SELECT command (minus useless DISTINCT clause), and the OFFSET & LIMIT clauses at the end. PS: don't forget to add the ORDER BY at the end of the original query. http://vos.openlinksw.com/owiki/wiki/VOS/VirtTipsAndTricksHowToHandleBandwidthLimitExceed https://etl.linkedpipes.com/components/e-sparqlendpointselectscrollablecursor .. warning:: WE ASSUME THAT THE SECOND LINE OF THE QUERY CONTAINS THE FULL SELECT COMMAND !!! :param arg1: Original normal SPARQL query. :param arg2: Order queries by this variable. :param arg3: Max items queried for 1 block. :type arg1: <str> :type arg2: <str> :type arg3: <int> :return: A generator of lines of results. :rtype: <dict> """ # Assume that the second line contains the SELECT command # (cf queries in sparql_biopaxQueries.py) second_query_line = query.split('\n')[1] assert 'SELECT' in second_query_line # Build the nested query by encapsulating the original between # a generic SELECT command, and the OFFSET & LIMIT clauses at the end. # PS: don't forget to add the ORDER BY at the end of the original query. query_prefix = 'SELECT *\nWHERE {' for offset in it.count(): query_suffix = """ ORDER BY """ + orderby + """ } OFFSET """ + str(limit * offset) + """ LIMIT """ + str(limit) # Begin from 1 (avoid to break at limit-1 later) count = 1 # No result in the query => count not initialized for count, result in enumerate( sparql_query(query_prefix + query + query_suffix), 1): # print(result, offset, count) yield result # The last block size is less than limit => we stop iteration if count < limit: break
[docs]def load_sparql_endpoint(): """Make a connection to SPARQL endpoint & retrieve a cursor. :return: sparql cursor in version 1! => we don't use SPARQLWrapper2 cursor that provides SPARQLWrapper.SmartWrapper.Bindings-class to convert JSON from server. :rtype: <SPARQLWrapper> """ return SPARQLWrapper(cm.SPARQL_PATH, 'POST') # CHECK THIS
[docs]@auto_add_prefixes def sparql_query(query): """Wait for a valid database URI, and a SPARQL query. Yields all triplets returned by the query. The query need to yield three values, named object, relation and subject. :param: SPARQL query :type: <str> :return: Generator of results. :rtype: <generator <tuple>> """ LOGGER.debug(query) sparql = load_sparql_endpoint() # data in JSON format => proper python dict() sparql.setQuery(query) sparql.setReturnFormat(JSON) try: # PS: if XML stream is not used: don't use sparql.query(), #but sparql.queryAndConvert() instead. results = sparql.queryAndConvert() # Dictionary of dictionnaries in result # ex: # { # "head": { # "vars": [ "METACYC" , "name" ] # } , # "results": { # "bindings": [ # { # "METACYC": { "type": "literal" , "value": "PROPANOL" } # } # ] # } # } # print(results) # print("results: ", len(results['results']['bindings'])) except Exception as e: print("SPARQL query error" + str(e)) raise for binding in results['results']['bindings']: yield tuple(binding.get(var, dict()).get('value', None) for var in results['head']['vars'])