diff --git a/README.md b/README.md index 8f94e4e06fe6f80621aceb2a9c52197c2021a99f..659880c5327815c2c7626060f1086af09d5a6e94 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ Requirements: - OrfeoToolBox python API installed ```bash -pip install pyotb +pip install pyotb --upgrade ``` -For Python>=3.6, latest version available is pyotb 1.3. For Python 3.5, latest version available is pyotb 1.2.2 +For Python>=3.6, latest version available is pyotb 1.3.1. For Python 3.5, latest version available is pyotb 1.2.2 ## Quickstart: running an OTB app as a oneliner pyotb has been written so that it is more convenient to run an application in Python. @@ -287,8 +287,35 @@ Advantages : - Can be integrated in OTB pipelines Limitations : -- It is not possible to use the tensorflow python API inside a script where OTBTF is used because of compilation issues between Tensorflow and OTBTF -- It is currently not possible to chain several `@pyotb.run_tf_function` functions +- It is not possible to use the tensorflow python API inside a script where OTBTF is used because of compilation issues +between Tensorflow and OTBTF, i.e. `import tensorflow` doesn't work in a script where pyotb has been imported + + +## Some examples +### Compute the mean of several rasters, taking into account NoData +Let's consider we have at disposal 73 NDVI rasters and their 73 binary cloud masks (where cloud is 1 and clear pixel is 0), corresponding to 73 dates of a year. + +Goal: compute the temporal mean (keeping the spatial dimension) of the NDVIs, excluding cloudy pixels. Piece of code to achieve that: + +```python +import pyotb + +masks = [pyotb.Input(path) for path in mask_paths] + +# For each pixel location, summing all valid NDVI values +summed = sum([pyotb.where(mask != 1, ndvi, 0) for mask, ndvi in zip(masks, ndvi_paths)]) + +# Printing the generated BandMath expression +print(summed.exp) # this returns a very long exp: (0 + ((im1b1 != 1) ? im2b1 : 0)) + ((im3b1 != 1) ? im4b1 : 0)) + ... + ((im145b1 != 1) ? im146b1 : 0))) + +# For each pixel location, getting the count of valid pixels +count = sum([pyotb.where(mask == 1, 0, 1) for mask in masks]) + +mean = summed / count # BandMath exp of this is very long: (0 + ((im1b1 != 1) ? im2b1 : 0)) + ... + ((im145b1 != 1) ? im146b1 : 0))) / (0 + ((im1b1 == 1) ? 0 : 1)) + ((im3b1 == 1) ? 0 : 1)) + ... + ((im145b1 == 1) ? 0 : 1))) +mean.write('ndvi_annual_mean.tif') +``` + +Note that no actual computation is executed before the last line where the result is written to disk. diff --git a/pyotb/apps.py b/pyotb/apps.py index c0b7019f00fce3984b6509ab569a50a6dc7ee41d..70b924782e422f29acecc9ea32b57bc6d2c5e613 100644 --- a/pyotb/apps.py +++ b/pyotb/apps.py @@ -83,6 +83,26 @@ class {name}(App): def __init__(self, *args, **kwargs): super().__init__('{name}', *args, **kwargs) """ -# Here we could customize the template and overwrite special methods depending on application for _app in AVAILABLE_APPLICATIONS: + # Default behavior for any OTB application exec(_code_template.format(name=_app)) + + # Customize the behavior for TensorflowModelServe application. The user doesn't need to set the env variable + # `OTB_TF_NSOURCES`, it is handled in pyotb + if _app == 'TensorflowModelServe': + class TensorflowModelServe(App): + def set_nb_sources(self, *args, n_sources=None): + """ + Set the number of sources of TensorflowModelServe. Can be either user-defined or deduced from the args + """ + if n_sources: + os.environ['OTB_TF_NSOURCES'] = str(int(n_sources)) + else: + # Retrieving the number of `source#.il` parameters + params_dic = {k: v for arg in args if isinstance(arg, dict) for k, v in arg.items()} + n_sources = len([k for k in params_dic if k.startswith('source') and k.endswith('.il')]) + os.environ['OTB_TF_NSOURCES'] = str(n_sources) + + def __init__(self, *args, n_sources=None, **kwargs): + self.set_nb_sources(*args, n_sources=n_sources) + super().__init__('TensorflowModelServe', *args, **kwargs) diff --git a/pyotb/core.py b/pyotb/core.py index cb74243294c7b88b01f66bd83791c22f7e3916a4..5110218a5811fced248398091483460744257623 100644 --- a/pyotb/core.py +++ b/pyotb/core.py @@ -915,10 +915,8 @@ def get_nbchannels(inp): try: info = App("ReadImageInfo", inp, otb_stdout=False) nb_channels = info.GetParameterInt("numberbands") - except (RuntimeError, TypeError) as e: - logger.error(f'Not a valid image : {inp}') - logger.error(f'{e}') - nb_channels = None + except Exception: # this happens when we pass a str that is not a filepath + raise TypeError(f'Could not get the number of channels of `{inp}`. Not a filepath or wrong filepath') return nb_channels diff --git a/pyotb/functions.py b/pyotb/functions.py index 6ad8bf224382d4c504bf372e80686f362435cad6..e3bf12ed432430627cf3d05fafd6f851671abd3a 100644 --- a/pyotb/functions.py +++ b/pyotb/functions.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- +import inspect import os +import sys +import textwrap import uuid import logging -import multiprocessing from collections import Counter from .core import (App, Input, Operation, logicalOperation, get_nbchannels) @@ -321,7 +323,7 @@ def define_processing_area(*args, window_rule='intersection', pixel_size_rule='m def run_tf_function(func): """ - This decorator enables using a function that calls some TF operations, with pyotb object as inputs. + This function enables using a function that calls some TF operations, with pyotb object as inputs. For example, you can write a function that uses TF operations like this : @run_tf_function @@ -335,68 +337,98 @@ def run_tf_function(func): :param func: function taking one or several inputs and returning *one* output :return wrapper: a function that returns a pyotb object """ + try: + from .apps import TensorflowModelServe + except ImportError: + raise Exception('Could not run Tensorflow function: failed to import TensorflowModelServe. Check that you ' + 'have OTBTF configured (https://github.com/remicres/otbtf#how-to-install)') - def create_and_save_tf_model(output_dir, *inputs): + def get_tf_pycmd(output_dir, channels, scalar_inputs): """ - Simply creates the TF model and save it to temporary location. - //!\\ Currently incompatible with OTBTF, to be run inside a multiprocessing.Process - //!\\ Does not work if OTBTF has been used previously in the script + Create a string containing all python instructions necessary to create and save the Keras model :param output_dir: directory under which to save the model - :param inputs: a list of pyotb objects or int/float + :param channels: list of raster channels (int). Contain `None` entries for non-raster inputs + :param scalar_inputs: list of scalars (int/float). Contain `None` entries for non-scalar inputs """ - import tensorflow as tf - # Change the raster inputs to TF inputs - model_inputs = [] # model inputs corresponding to rasters - tf_inputs = [] # inputs for the TF function corresponding to all inputs (rasters, int...) - for input in inputs: - if not isinstance(input, (int, float)): - nb_bands = input.shape[-1] - input = tf.keras.Input((None, None, nb_bands)) - model_inputs.append(input) - tf_inputs.append(input) - # call the TF operations on theses inputs - output = func(*tf_inputs) + # Getting the string definition of the tf function (e.g. "def multiply(x1, x2):...") + # TODO: maybe not entirely foolproof, maybe we should use dill instead? but it would add a dependency + func_def_str = inspect.getsource(func) + func_name = func.__name__ + + create_and_save_model_str = func_def_str + + # Adding the instructions to create the model and save it to output dir + create_and_save_model_str += textwrap.dedent(f""" + import tensorflow as tf - # Create and save the .pb model - model = tf.keras.Model(inputs=model_inputs, outputs=output) - model.save(output_dir) + model_inputs = [] + tf_inputs = [] + for channel, scalar_input in zip({channels}, {scalar_inputs}): + if channel: + input = tf.keras.Input((None, None, channel)) + tf_inputs.append(input) + model_inputs.append(input) + else: + if isinstance(scalar_input, int): # TF doesn't like mixing float and int + scalar_input = float(scalar_input) + tf_inputs.append(scalar_input) + + output = {func_name}(*tf_inputs) + + # Create and save the .pb model + model = tf.keras.Model(inputs=model_inputs, outputs=output) + model.save("{output_dir}") + """) + + return create_and_save_model_str def wrapper(*inputs, tmp_dir='/tmp'): """ - For the user point of view, this function simply applies some TF operations to some rasters. - Underlyingly, it saveq a .pb model that describe the TF operations, then creates an OTB ModelServe application - that applies this .pb model to the actual inputs. + For the user point of view, this function simply applies some TensorFlow operations to some rasters. + Underlyingly, it saves a .pb model that describe the TF operations, then creates an OTB ModelServe application + that applies this .pb model to the inputs. :param inputs: a list of pyotb objects, filepaths or int/float numbers :param tmp_dir: directory where temporary models can be written :return: a pyotb object, output of TensorFlowModelServe """ - # Change potential string filepaths to pyotb objects - inputs = [Input(input) if isinstance(input, str) and not isinstance(input, (int, float)) else input for input in - inputs] - - # Create and save the model. This is executed **inside an independent process** because (as of 2021-11), + # Get infos about the inputs + channels = [] + scalar_inputs = [] + raster_inputs = [] + for input in inputs: + try: + # this is for raster input + channel = get_nbchannels(input) + channels.append(channel) + scalar_inputs.append(None) + raster_inputs.append(input) + except Exception: + # this is for other inputs (float, int) + channels.append(None) + scalar_inputs.append(input) + + # Create and save the model. This is executed **inside an independent process** because (as of 2022-03), # tensorflow python library and OTBTF are incompatible out_savedmodel = os.path.join(tmp_dir, f'tmp_otbtf_model_{uuid.uuid4()}') - p = multiprocessing.Process(target=create_and_save_tf_model, args=(out_savedmodel, *inputs,)) - p.start() - p.join() - - # Getting the nb of inputs and setting it for OTBTF - raster_inputs = [input for input in inputs if not isinstance(input, (int, float))] - nb_model_inputs = len(raster_inputs) - os.environ['OTB_TF_NSOURCES'] = str(nb_model_inputs) - - # Run the OTBTF model serving application - model_serve = App('TensorflowModelServe', - {'model.dir': out_savedmodel, - 'optim.disabletiling': 'on', 'model.fullyconv': 'on'}, execute=False) - + pycmd = get_tf_pycmd(out_savedmodel, channels, scalar_inputs) + cmd_args = [sys.executable, "-c", pycmd] + try: + import subprocess + subprocess.run(cmd_args, env=os.environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except subprocess.SubprocessError: + logger.debug("Failed to call subprocess") + if not os.path.isdir(out_savedmodel): + logger.info("Failed to save the model") + + # Initialize the OTBTF model serving application + model_serve = TensorflowModelServe({'model.dir': out_savedmodel, 'optim.disabletiling': 'on', + 'model.fullyconv': 'on'}, n_sources=len(raster_inputs), execute=False) + # Set parameters and execute for i, input in enumerate(raster_inputs): model_serve.set_parameters({f'source{i + 1}.il': [input]}) - model_serve.Execute() # TODO: handle the deletion of the temporary model ? diff --git a/setup.py b/setup.py index 5c78ce9a3336dfb238dc817badb9d35db127cc92..81f33668809440f69c9bba89ce3336f2f924a737 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh: setuptools.setup( name="pyotb", - version="1.3", + version="1.3.1", author="Nicolas Narçon", author_email="nicolas.narcon@gmail.com", description="Library to enable easy use of the Orfeo Tool Box (OTB) in Python",