# -*- coding: utf-8 -*-
# MIT License
#
# Copyright (c) 2017 IRISA, Jean Coquet, Pierre Vignet, Mateo Boudet
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Contributor(s): Jean Coquet, Pierre Vignet, Mateo Boudet
"""Module used to query SPARQL endpoint.
"""
from __future__ import print_function
# Standard imports
import itertools as it
try:
from SPARQLWrapper import SPARQLWrapper, JSON
except ImportError:
raise ImportError("SPARQLWrapper seems not to be installed. \
Please install the module with the following command:\n \
sudo pip install SPARQLWrapper \n \
or \
pip install --user SPARQLWrapper")
# Custom imports
from biopax2cadbiom import namespaces as nm
import biopax2cadbiom.commons as cm
LOGGER = cm.logger()
[docs]def auto_add_prefixes(func):
"""Decorator: Add all prefixes to the SPARQL query at first argument
of sparql_query()
"""
def modified_func(*args, **kwargs):
"""Return modified function with prefix added on the first argument"""
return func(nm.get_RDF_prefixes() + args[0], **kwargs)
return modified_func
[docs]def order_results(query, orderby='?uri', limit=cm.SPARQL_LIMIT):
"""Build nested query for access points with restrictions.
Build the nested query by encapsulating the original between
the same SELECT command (minus useless DISTINCT clause),
and the OFFSET & LIMIT clauses at the end.
PS: don't forget to add the ORDER BY at the end of the original query.
http://vos.openlinksw.com/owiki/wiki/VOS/VirtTipsAndTricksHowToHandleBandwidthLimitExceed
https://etl.linkedpipes.com/components/e-sparqlendpointselectscrollablecursor
.. warning:: WE ASSUME THAT THE SECOND LINE OF THE QUERY CONTAINS THE FULL
SELECT COMMAND !!!
:param arg1: Original normal SPARQL query.
:param arg2: Order queries by this variable.
:param arg3: Max items queried for 1 block.
:type arg1: <str>
:type arg2: <str>
:type arg3: <int>
:return: A generator of lines of results.
:rtype: <dict>
"""
# Assume that the second line contains the SELECT command
# (cf queries in sparql_biopaxQueries.py)
second_query_line = query.split('\n')[1]
assert 'SELECT' in second_query_line
# Build the nested query by encapsulating the original between
# a generic SELECT command, and the OFFSET & LIMIT clauses at the end.
# PS: don't forget to add the ORDER BY at the end of the original query.
query_prefix = 'SELECT *\nWHERE {'
for offset in it.count():
query_suffix = """
ORDER BY """ + orderby + """
}
OFFSET """ + str(limit * offset) + """
LIMIT """ + str(limit)
# Begin from 1 (avoid to break at limit-1 later)
count = 1 # No result in the query => count not initialized
for count, result in enumerate(
sparql_query(query_prefix + query + query_suffix), 1):
# print(result, offset, count)
yield result
# The last block size is less than limit => we stop iteration
if count < limit:
break
[docs]def load_sparql_endpoint():
"""Make a connection to SPARQL endpoint & retrieve a cursor.
:return: sparql cursor in version 1!
=> we don't use SPARQLWrapper2 cursor that provides
SPARQLWrapper.SmartWrapper.Bindings-class to convert JSON from server.
:rtype: <SPARQLWrapper>
"""
return SPARQLWrapper(cm.SPARQL_PATH, 'POST') # CHECK THIS
[docs]@auto_add_prefixes
def sparql_query(query):
"""Wait for a valid database URI, and a SPARQL query.
Yields all triplets returned by the query.
The query need to yield three values, named object, relation and subject.
:param: SPARQL query
:type: <str>
:return: Generator of results.
:rtype: <generator <tuple>>
"""
LOGGER.debug(query)
sparql = load_sparql_endpoint()
# data in JSON format => proper python dict()
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
try:
# PS: if XML stream is not used: don't use sparql.query(),
#but sparql.queryAndConvert() instead.
results = sparql.queryAndConvert()
# Dictionary of dictionnaries in result
# ex:
# {
# "head": {
# "vars": [ "METACYC" , "name" ]
# } ,
# "results": {
# "bindings": [
# {
# "METACYC": { "type": "literal" , "value": "PROPANOL" }
# }
# ]
# }
# }
# print(results)
# print("results: ", len(results['results']['bindings']))
except Exception as e:
print("SPARQL query error" + str(e))
raise
for binding in results['results']['bindings']:
yield tuple(binding.get(var, dict()).get('value', None)
for var in results['head']['vars'])