From 564d1631830ac1a174cd3bc6a5def09fab65164a Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 4 Sep 2023 10:57:15 +0200
Subject: [PATCH 01/13] Replace hyphen by NA in demuxStat

	Ref: #73
---
 bin/demuxStatsFromXML.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/demuxStatsFromXML.R b/bin/demuxStatsFromXML.R
index 5ad8819..3c5c1ff 100755
--- a/bin/demuxStatsFromXML.R
+++ b/bin/demuxStatsFromXML.R
@@ -84,7 +84,7 @@ for (pr in 1:length(projects)){
 				if (length(PerfectBarcodeCount) == 0) { PerfectBarcodeCount<-0 }
 				OneMismatchBarcodeCount<-xml_text(xml_find_all(xml, paste0(lane_path,"/OneMismatchBarcodeCount")))
 				
-				if (length(OneMismatchBarcodeCount) == 0) { OneMismatchBarcodeCount<- "-"}
+				if (length(OneMismatchBarcodeCount) == 0) { OneMismatchBarcodeCount<- NA}
 			
 				df_to_add<-data.frame(project, sample_name, barcode_names[bc], BarcodeCount, PerfectBarcodeCount, OneMismatchBarcodeCount)
 				df<-concat_df(df, df_to_add, vec.names)
@@ -191,7 +191,7 @@ if(nrow(tabUndetermined) > 0) { head(tabUndetermined) }
 if (dim(tabUndetermined)[1] != 0) {
 	df.tabUndetermined<-data.frame()
 	for (i in 1:dim(tabUndetermined)[1]) {
-		df.tabUndetermined.tmp<-data.frame("default", "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-")
+		df.tabUndetermined.tmp<-data.frame("default", "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], NA, NA)
 		df.tabUndetermined<-concat_df(df.tabUndetermined, df.tabUndetermined.tmp, vec.names)
 	}
 	
-- 
GitLab


From 2a0b3785edd8288396751c9f34b4e1c9cc159c2b Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Mon, 4 Sep 2023 16:54:48 +0200
Subject: [PATCH 02/13] Add GC bias analysis to DTM mode

	Ref: #76
---
 bin/parse_reports.sh                 | 11 +++++++++--
 conf/base.config                     | 14 ++++++++++++++
 conf/dependencies_genobioinfo.config |  1 +
 conf/dependencies_genologin.config   |  1 +
 modules/local/module_DTM.nf          |  3 ++-
 sub-workflows/local/dna_qc.nf        |  3 +++
 workflow/illumina_qc.nf              |  4 +++-
 7 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/bin/parse_reports.sh b/bin/parse_reports.sh
index a7d46ac..03d309d 100755
--- a/bin/parse_reports.sh
+++ b/bin/parse_reports.sh
@@ -1,12 +1,14 @@
 TAG=$1
 FASTP_REPORT=$2
 QUALIMAP_REPORT=$3/genome_results.txt
+GCBIAS_REPORT=$4
 
 O_STAT="./${TAG}.stat"
 O_CSV="./${TAG}.csv"
 
 ## Get values
 DUPLI=$(jq '.duplication.rate' $FASTP_REPORT)
+
 TOT_SEQ=$(( $(sed -n 's/number of reads = \(.*\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g' | sed 's/,//g') / 2 ))
 INSERT=$(sed -n 's/median insert size = \(.*\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g')
 GC_PERCENT=$(sed -n 's/GC percentage = \(.*%\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g')
@@ -14,6 +16,9 @@ GEN_COV=$(grep ">= 1X" $QUALIMAP_REPORT | sed -n 's/There is a \(.*%\) of.*/\1/p
 MEAN_COV=$(sed -n 's/mean coverageData.*= \(.*X\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g')
 ALIGN=$(sed -n 's/number of mapped reads =.*(\(.*%\))/\1/p' $QUALIMAP_REPORT | sed 's/ //g')
 
+AT_DROPOUT=$(grep '^ACCUMULATION_LEVEL' -A 1 $GCBIAS_REPORT | cut -d$'\t' -f6 | tail -1)
+GC_DROPOUT=$(grep '^ACCUMULATION_LEVEL' -A 1 $GCBIAS_REPORT | cut -d$'\t' -f6 | tail -1)
+
 ## Write stat file
 echo "duplication_rate: $DUPLI" >> $O_STAT
 echo "total_sequences: $TOT_SEQ" >> $O_STAT
@@ -22,7 +27,9 @@ echo "GC_percent: $GC_PERCENT" >> $O_STAT
 echo "genome_cov_percent: $GEN_COVcat " >> $O_STAT
 echo "mean_cov: $MEAN_COV" >> $O_STAT
 echo "align_percent: $ALIGN" >> $O_STAT
+echo "GC_bias_low: ${AT_DROPOUT}%" >> $O_STAT
+echo "GC_bias_high: ${GC_DROPOUT}%" >> $O_STAT
 
 ## Write export file
-echo "Sample;Tot_seq;Duplication_rate;Mean_insert_size;%GC;%Genome_cov;Mean_cov;%Align" > $O_CSV
-echo "$TAG;$TOT_SEQ;$DUPLI;$INSERT;$GC_PERCENT;$GEN_COV;$MEAN_COV;$ALIGN" >> $O_CSV
\ No newline at end of file
+echo "Sample;Tot_seq;Duplication_rate;Mean_insert_size;%GC;%Genome_cov;Mean_cov;%Align;GC_bias_low;GC_bias_high" > $O_CSV
+echo "$TAG;$TOT_SEQ;$DUPLI;$INSERT;$GC_PERCENT;$GEN_COV;$MEAN_COV;$ALIGN;${AT_DROPOUT}%;${GC_DROPOUT}%" >> $O_CSV
\ No newline at end of file
diff --git a/conf/base.config b/conf/base.config
index c5d1005..5c3be4b 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -359,4 +359,18 @@ process {
 			pattern: "*/*.txt"
 		]
 	}
+
+	withName: GC_BIAS {
+		time = { 2.h * task.attempt }
+		memory = { 4.GB * task.attempt }
+		cpus = 1 
+		module = toolsModuleHash['PICARD']
+
+		ext.args = ""
+
+		publishDir = [
+			path: "${params.outdir}/GC_Biais",
+			mode: 'copy'
+		]
+	}
 }
\ No newline at end of file
diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config
index e32e586..de6c332 100644
--- a/conf/dependencies_genobioinfo.config
+++ b/conf/dependencies_genobioinfo.config
@@ -7,6 +7,7 @@ toolsModuleHash['FASTP'] = ['bioinfo/fastp/0.23.2']
 toolsModuleHash['FASTQC'] = ['bioinfo/FastQC/0.12.1']  // version upgraded face to genologin
 toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQScreen/0.15.3']
 toolsModuleHash['R'] = ['statistics/R/4.3.0']
+toolsModuleHash['PICARD'] = ['devel/java/17.0.6', 'bioinfo/picard-tools/3.0.0']      // a vérifier pour R miniconda!!
 
 // ----- RNA ----- //
 toolsModuleHash['SALMON'] = ['bioinfo/Salmon/1.10.0']  // version upgraded face to genologin
diff --git a/conf/dependencies_genologin.config b/conf/dependencies_genologin.config
index fd18ea5..9e495d5 100644
--- a/conf/dependencies_genologin.config
+++ b/conf/dependencies_genologin.config
@@ -7,6 +7,7 @@ toolsModuleHash['FASTP'] = ['bioinfo/fastp-0.23.2']
 toolsModuleHash['FASTQC'] = ['bioinfo/FastQC_v0.11.7']
 toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQ-Screen-0.15.2']
 toolsModuleHash['R'] = ['system/R-4.0.4_gcc-9.3.0']
+toolsModuleHash['PICARD'] = ['bioinfo/picard-2.27.4', 'system/R-4.2.3_Miniconda3']
 
 // ----- RNA ----- //
 toolsModuleHash['SALMON'] = ['bioinfo/salmon-1.9.0']
diff --git a/modules/local/module_DTM.nf b/modules/local/module_DTM.nf
index 451b3cd..9ef5c59 100644
--- a/modules/local/module_DTM.nf
+++ b/modules/local/module_DTM.nf
@@ -8,13 +8,14 @@ process PARSE_REPORTS {
 	input:
 		tuple val(sample), path(fastp_json_report)
 		tuple val(sample), path(qualimap_folder)
+		tuple val(sample), path(gc_bias_report)
 		
 	output:
 		tuple val(sample), path("*.csv"), emit: csv
 		
 	script:
 	"""
-		bash parse_reports.sh $sample $fastp_json_report $qualimap_folder
+		bash parse_reports.sh $sample $fastp_json_report $qualimap_folder $gc_bias_report
 	"""
 }
 
diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf
index f41af2e..7f39268 100644
--- a/sub-workflows/local/dna_qc.nf
+++ b/sub-workflows/local/dna_qc.nf
@@ -34,15 +34,18 @@ workflow DNA_QC {
 
 			qualimap_report_emitted =  QUALIMAP.out.report
 			flagstats_output_emitted = SAMTOOLS_FLAGSTATS.out.txt
+			bam_output_emitted = SAMTOOLS_SORT.out.bam
 
 		} else {
 			System.out.println "Pas de référence genomique ou transcriptomique renseignée, on ne peut pas faire d'alignement"
 			// If Qualimap and Samtools were not executed
 			qualimap_report_emitted =  Channel.empty()
 			flagstats_output_emitted = Channel.empty()
+			bam_output_emitted = Channel.empty()
 		}
 
 	emit:
 		qualimap_report = qualimap_report_emitted
 		flagstats_output = flagstats_output_emitted
+		bam = bam_output_emitted
 }
\ No newline at end of file
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 9407f9c..5c87fb1 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -68,6 +68,7 @@ include {	TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_RUN;
 			TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS
 						} from "$baseDir/modules/local/module_NGL-Bi.nf"
 include {	MULTIQC		} from "${params.shared_modules}/multiqc.nf"
+include {	GCBIAS as GC_BIAS } from "${params.shared_modules}/gcbias.nf"
 include {	workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf"
 include {	UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC } from "${params.shared_modules}/ngl_bi.nf"
 include {	READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW')
@@ -118,7 +119,8 @@ workflow ILLUMINA_QC {
 
 		// DTM process
 		if (params.DTM_mode) {
-			PARSE_REPORTS(CORE.out.fastp_report, DNA_QC.out.qualimap_report)
+			GC_BIAS(DNA_QC.out.bam, params.reference_genome)
+			PARSE_REPORTS(CORE.out.fastp_report, DNA_QC.out.qualimap_report, GC_BIAS.out.summary)
 		}
 
 	} else if (params.data_nature =~ 'RNA-*') {
-- 
GitLab


From a588e8c84a11c2a65abfaf7f45d704d93ec3cbd7 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 5 Sep 2023 09:15:16 +0200
Subject: [PATCH 03/13] New configuration for NGL-bi prod/dev mode

	ref: #77
---
 conf/base.config | 19 +++++--------------
 conf/prod.config | 13 +++++++++++++
 2 files changed, 18 insertions(+), 14 deletions(-)
 create mode 100644 conf/prod.config

diff --git a/conf/base.config b/conf/base.config
index 5c3be4b..03d6f24 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -196,16 +196,7 @@ process {
 	// ----- WithLabel
 	withLabel: littleJob {
 		executor = 'local'
-	}
-
-	withLabel: ngl {
-		beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/prod_illumina_qc.conf"
-		publishDir = [
-            path: { "${params.outdir}/ngl" },
-            mode: 'copy',
-			pattern: "*.{log,created}"
-        ]
-	}
+	}	
 
 	withLabel: samtools {
 		module = toolsModuleHash['SAMTOOLS']
@@ -242,7 +233,7 @@ process {
     withName: GZIP {
 		ext.args = '-f'
         publishDir = [
-            path: { "${params.outdir}/archives" },
+            path: "${params.outdir}/archives",
             mode: 'symlink',
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
             pattern: "*.gz"
@@ -265,7 +256,7 @@ process {
 		module = toolsModuleHash['SEQTK']
 
         publishDir = [
-            path: { "${params.outdir}/subset" },
+            path: "${params.outdir}/subset",
             mode: 'symlink',
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
             pattern: "*.fast{a,q}"
@@ -312,7 +303,7 @@ process {
 		memory = { 10.GB * task.attempt * params.resource_factor }
 
 		publishDir = [
-            path: { "${params.outdir}/MultiQC" },
+            path: "${params.outdir}/MultiQC",
             mode: 'copy',
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
             pattern: "*.html"
@@ -335,7 +326,7 @@ process {
 	withName: MD5SUM {
 		time = { 3.h * task.attempt * params.resource_factor }
 		publishDir = [
-            path: { "${params.outdir}/fastq" },
+            path: "${params.outdir}/fastq",
             mode: 'copy',
             pattern: "*.md5sum"
         ]
diff --git a/conf/prod.config b/conf/prod.config
new file mode 100644
index 0000000..4f9b19c
--- /dev/null
+++ b/conf/prod.config
@@ -0,0 +1,13 @@
+// ========================================
+//				PARAMS
+//=========================================
+process {
+	withLabel: ngl {
+		beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/prod_illumina_qc.conf"
+		publishDir = [
+            path: "${params.outdir}/ngl",
+            mode: 'copy',
+			pattern: "*.{log,created}"
+        ]
+	}
+}
\ No newline at end of file
-- 
GitLab


From acf83435b8775604bc2d5068cce52becda4cc433 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 5 Sep 2023 09:16:13 +0200
Subject: [PATCH 04/13] Put NGL-Ni config in dev profile

	Ref: #77
---
 conf/test.config | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/conf/test.config b/conf/test.config
index 23c2af3..39d54d6 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -1,8 +1,20 @@
 // ========================================
 //				PARAMS
 //=========================================
+System.out.println "Profil dev => on ajuste les paramètres..."
 params {
 	ngl_bi_client = '/home/sbsuser/work/test/jules/VisualStudioSources/ngl-bi_client/'
 	shared_modules = '/home/sbsuser/work/Nextflow/shared_modules/ExportSources_Jules/'
 	is_dev_mode = true
 }
+
+process {
+	withLabel: ngl {
+		beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/dev_illumina_qc.conf"
+		publishDir = [
+            path: "${params.outdir}/ngl",
+            mode: 'copy',
+			pattern: "*.{log,created}"
+        ]
+	}
+}
-- 
GitLab


From 7ecf7e18157b9e290d53d39733a4cf9266730c59 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 5 Sep 2023 09:17:52 +0200
Subject: [PATCH 05/13] Select samples by project and name in Channel

	Ref: #75
---
 nextflow.config         | 19 ++++++++++---------
 workflow/illumina_qc.nf | 11 +++++++----
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 38b1344..0bae274 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -5,6 +5,7 @@ params {
 	// ----- GLOBAL PARAMETERS -----
 	inputdir = ""
 	project = ""
+	select_samples = ""
 	sequencer = ""
 	machine_id = ""
 	fc_id = ""
@@ -86,20 +87,20 @@ params {
 // ========================================
 //			ANALYSIS PARAMETERS
 //=========================================
-import java.text.SimpleDateFormat
-SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyyMMddHHmmss")
-
 import java.nio.file.Files
 import java.nio.file.Paths
-
-params.data_location = params.inputdir.toString() + "/" + params.project.toString()
-def n_read_files = Files.walk(Paths.get(params.data_location))
+def n_read_files = Files.walk(Paths.get(params.inputdir.toString()))
 	.filter(Files::isRegularFile)
 	.filter(p -> p.getFileName().toString().matches(".*_R[12](_.*)?\\.fastq\\.gz"))
 	.count()
 
-params.n_samples = n_read_files / 2
-params.resource_factor = 0.1 * params.n_samples
+// on retire les 2 fichiers undetermined
+def n_samples = ( n_read_files - 2 ) / 2 	
+System.out.println "\nNombre d'échantillons trouvés sur cette lane : $n_samples"
+params.resource_factor = 0.1 * n_samples
+
+import java.text.SimpleDateFormat
+SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyyMMddHHmmss")
 
 params {
 	// Dynamics params, depend on others
@@ -136,7 +137,7 @@ process.container = "$baseDir/template-nf.sif"
 
 profiles {
 	dev		{ includeConfig "$baseDir/conf/test.config" }
-	prod 	{ System.out.println "Mode prod !" }
+	prod 	{ includeConfig "$baseDir/conf/prod.config" }
 }
 
 // Avoid this error:
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 5c87fb1..78ad815 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -31,14 +31,17 @@ ch_ss = Channel.fromPath(params.samplesheet)
 ch_DemuxSummary=Channel.fromPath(params.inputdir+"/Stats/DemuxSummaryF1L*.txt")
 ch_DemuxStatXML=Channel.fromPath(params.inputdir+'/Stats/DemultiplexingStats.xml')
 
+// Get samples globPatterns
+def sampleList = []
+params.select_samples.tokenize(',').each { sample ->
+	sampleList.add("${params.inputdir}/${params.project}/**" + sample +'*_R{1,2}{_*,*}.fastq.gz')
+}
+
 // fastq one by one
 ch_read=Channel
-	.fromPath(params.data_location+'/**_R{1,2}{_*,*}.fastq.gz')
+	.fromPath(sampleList)
 	.map{$it -> [$it.simpleName, $it]}
 
-// fastq paired
-//ch_read_merged=Channel.fromFilePairs(params.data_location+'/*_R{1,2}_*.fastq.gz')
-
 // Channel of rRNA databases for sortmerna
 ch_sortmerna_db = Channel.from(
 	params.sortmerna_euk_18s, 
-- 
GitLab


From 5749a07494401ed97cd21443761c6340af063800 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 5 Sep 2023 16:07:57 +0200
Subject: [PATCH 06/13] Move ngl-bi client option to config

	Ref: #78
---
 conf/base.config                   | 13 +++++++++++++
 modules/local/module_NGL-Bi.nf     |  7 ++-----
 sub-workflows/local/begin_nglbi.nf |  2 +-
 workflow/illumina_qc.nf            |  4 ++--
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 03d6f24..657c5ba 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -225,6 +225,19 @@ process {
 //			   SHARED MODULES
 //=========================================
 process {
+	withName: TREATMENT_DEMUX_RUN {
+		ext.args = [
+			workflow.resume ? "--force" : '',
+			"--lane '${params.lane}'",
+		].join(' ')
+	}
+
+	withName: TREATMENT_DEMUX_READSETS {
+		ext.args = [
+			workflow.resume ? "--force" : '',
+		].join(' ')
+	}
+
 	withName: SAMTOOLS_FAIDX {
 		module = toolsModuleHash['SAMTOOLS']
 		beforeScript = "module purge"
diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf
index e243b2e..2ffd9ad 100644
--- a/modules/local/module_NGL-Bi.nf
+++ b/modules/local/module_NGL-Bi.nf
@@ -26,21 +26,18 @@ process TREATMENT_DEMUXSTAT {
 	input:
 		val nglCode
 		path csvFile
-		val lane
 
 	output:
 		path("*.log")
 		val 1, emit: ready
 
 	script:
-	laneOption = lane ? "--lane $lane" : ''
-	forceOption = workflow.resume ? "--force" : ''
+	def args = task.ext.args ?: ''
 	"""
 		perl ${params.ngl_bi_client}/GeT/perl/illumina/createNGL-BiTreatmentDemultiplexStat.pl \\
 			--code $nglCode \\
 			--stat $csvFile \\
-			${laneOption} \\
-			${forceOption} \\
+			$args \\
 			1> treatment_demux_${lane}.log
 	"""
 }
diff --git a/sub-workflows/local/begin_nglbi.nf b/sub-workflows/local/begin_nglbi.nf
index 0096dc4..c71b11e 100644
--- a/sub-workflows/local/begin_nglbi.nf
+++ b/sub-workflows/local/begin_nglbi.nf
@@ -52,7 +52,7 @@ workflow NGLBI {
 			params.sq_xp_code,
 			params.bi_run_code,
 			'',
-			params.lane,
+			'',
 			UPDATE_STATE_IPRG.out.ready
 		)
 
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index 78ad815..e038cac 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -101,8 +101,8 @@ workflow ILLUMINA_QC {
 
 		if (params.insert_to_ngl){
 			// Add demultiplexStat treatments
-			TREATMENT_DEMUX_RUN(params.bi_run_code, CORE_ILLUMINA.out.demuxStat, params.lane)
-			TREATMENT_DEMUX_READSETS(NGLBI.out.readsetsFile, CORE_ILLUMINA.out.demuxStat, '')
+			TREATMENT_DEMUX_RUN(params.bi_run_code, CORE_ILLUMINA.out.demuxStat)
+			TREATMENT_DEMUX_READSETS(NGLBI.out.readsetsFile, CORE_ILLUMINA.out.demuxStat)
 		}
 	}
 
-- 
GitLab


From 3d7f8b4139cecc0319ad8abf03e325eabce01ef6 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Tue, 5 Sep 2023 16:08:52 +0200
Subject: [PATCH 07/13] Filter readsets to create in config

	Ref: #75
---
 conf/base.config | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/conf/base.config b/conf/base.config
index 657c5ba..8792f37 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -225,6 +225,13 @@ process {
 //			   SHARED MODULES
 //=========================================
 process {
+	withName: CREATE_READSETS {
+		ext.args = [
+			params.run_name ? "--poolName '${params.run_name}'" : '',
+			params.lane ? "--laneNumberToWorkOn '${params.lane}'" : '',
+		].join(' ')
+	}
+
 	withName: TREATMENT_DEMUX_RUN {
 		ext.args = [
 			workflow.resume ? "--force" : '',
-- 
GitLab


From cb0dc6f6a1066c4a21aabc4e6dc757e3c5c47159 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 6 Sep 2023 09:36:56 +0200
Subject: [PATCH 08/13] Recover force option for treatementDemuxStat

	Ref: #78
---
 conf/base.config               | 8 +-------
 modules/local/module_NGL-Bi.nf | 2 ++
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 8792f37..22bb404 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -158,6 +158,7 @@ process {
 		module = toolsModuleHash['STAR']
 		memory = { 20.GB * task.attempt }
 		cpus = 2
+		time = { 1.d * task.attempt }
 	}
 
 	// ----- 16S/Amplicon ----- //
@@ -234,17 +235,10 @@ process {
 
 	withName: TREATMENT_DEMUX_RUN {
 		ext.args = [
-			workflow.resume ? "--force" : '',
 			"--lane '${params.lane}'",
 		].join(' ')
 	}
 
-	withName: TREATMENT_DEMUX_READSETS {
-		ext.args = [
-			workflow.resume ? "--force" : '',
-		].join(' ')
-	}
-
 	withName: SAMTOOLS_FAIDX {
 		module = toolsModuleHash['SAMTOOLS']
 		beforeScript = "module purge"
diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf
index 2ffd9ad..416eb72 100644
--- a/modules/local/module_NGL-Bi.nf
+++ b/modules/local/module_NGL-Bi.nf
@@ -33,10 +33,12 @@ process TREATMENT_DEMUXSTAT {
 
 	script:
 	def args = task.ext.args ?: ''
+	forceOption = workflow.resume ? "--force" : ''
 	"""
 		perl ${params.ngl_bi_client}/GeT/perl/illumina/createNGL-BiTreatmentDemultiplexStat.pl \\
 			--code $nglCode \\
 			--stat $csvFile \\
+			${forceOption} \\
 			$args \\
 			1> treatment_demux_${lane}.log
 	"""
-- 
GitLab


From e2b5e6db507ee1ce404f0b7332120e673b6e96fd Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 6 Sep 2023 13:28:06 +0200
Subject: [PATCH 09/13] New outdir_prefix option

	Ref: #75
---
 nextflow.config         | 4 +++-
 workflow/illumina_qc.nf | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 0bae274..d621dab 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -4,6 +4,7 @@
 params {
 	// ----- GLOBAL PARAMETERS -----
 	inputdir = ""
+	outdir_prefix = ""
 	project = ""
 	select_samples = ""
 	sequencer = ""
@@ -106,7 +107,8 @@ params {
 	// Dynamics params, depend on others
 	samplesheet = inputdir.toString() + "/SampleSheet.csv"
 	nf_uniqueness = uniqueness_format.format(new Date())
-	outdir = params.inputdir + "/nextflow/" + project + "_" + run_name + "_" + nf_uniqueness
+	if (outdir_prefix == "") {outdir_prefix = project + "_" + run_name}
+	outdir = inputdir + "/nextflow/" + outdir_prefix + "_" + nf_uniqueness
 
 	subset_seq = miseq_subset_seq	
 	if ( sequencer =~ /NovaSeq.*/ ) {
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index e038cac..619c3c1 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -34,7 +34,7 @@ ch_DemuxStatXML=Channel.fromPath(params.inputdir+'/Stats/DemultiplexingStats.xml
 // Get samples globPatterns
 def sampleList = []
 params.select_samples.tokenize(',').each { sample ->
-	sampleList.add("${params.inputdir}/${params.project}/**" + sample +'*_R{1,2}{_*,*}.fastq.gz')
+	sampleList.add("${params.inputdir}/${params.project}/**" + sample +'_*_R{1,2}{_*,*}.fastq.gz')
 }
 
 // fastq one by one
-- 
GitLab


From 20877b71da676abdda2c49fe780795c80761139a Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 6 Sep 2023 14:07:49 +0200
Subject: [PATCH 10/13] Documentation of options for multipool lane

	Ref : #75
---
 docs/usage.md | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/usage.md b/docs/usage.md
index c967a19..9908d51 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -12,6 +12,9 @@ Name for the pipeline run. If not specified, Nextflow will automatically generat
 Specify this flag when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously.  
 You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names.
 
+- **`-work-dir / -w`** [str]  
+Specify the directory where intermediate result files are stored. the default is `$launchDir/work/`.
+
 - **`-profile`** [str]  
 Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments.  
 Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Conda) - see below.  
@@ -34,11 +37,14 @@ Some parameters have not default value, therefore they must be set to run the pi
 Path to the input directory for the data to be analyzed. No default value, MUST be given to the command line. This is the output directory of bcl2fastq. See bellow for the particular structure of it.  
 _Default_ : null
 
-
 - **`--project`** [str]  
 The project name associated with the analysis. The value of this parameter MUST be a directory name found in the `inputdir` path.  
 _Default_ : null
 
+- **`--select_samples `** [list]  
+Comma separated list of samples name. Each samples in this list must be exactly the beginning of the fastq file name in the `project` directory.  
+_Default_ : null
+
 - **`--data_nature`** [str]  
 Nature of the data sequenced. This parameter will be used to automatically select the workflow. Authorized values are : `DNA`, `RNA-*`, `Amplicon`, `16S`, (soon : `10X`, `Emseq-DNA`, `Hi-C`, `sparse`).  
 If value of data_nature is unknown, only the CORE pipeline is executed.  
@@ -82,6 +88,10 @@ _Default_ : null
 
 ### Optionnal parameters
 Some other parameters are only for tracability and have no effect on analysis, there are :  
+- **`--outdir_prefix`** [str]  
+This value will be a part of the name of the output directory. The real output directory is constructed as follow : `${inputdir}/nextflow/${outdir_prefix}_${nf_uniqueness}`. Where `${nf_uniqueness}` is the current date.  
+_Default_ : `${project}_${run_name}`
+
 - **`--machine_id`** [str]  
 The machine identifier, such as `A00318` or `AV232702`.  
 _Default_ : null
-- 
GitLab


From 75483e0eab551f71efe78d49d224bce9b19492ae Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 6 Sep 2023 14:08:24 +0200
Subject: [PATCH 11/13] data_prepare integrates multipool analysis

	Ref: #75
---
 bin/DTM/data_prepare.pl | 85 +++++++++++++++++++++--------------------
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/bin/DTM/data_prepare.pl b/bin/DTM/data_prepare.pl
index 9dfc6fd..39e7a92 100644
--- a/bin/DTM/data_prepare.pl
+++ b/bin/DTM/data_prepare.pl
@@ -67,7 +67,7 @@ GetOptions(
 );
 
 if ($input eq '') {
-	print STDERR ("USAGE : data_prepare.pl -i <dir_path>");
+	print STDERR "USAGE : data_prepare.pl -i <dir_path>";
 	die;
 }
 ##################################################################
@@ -100,22 +100,41 @@ MAIN:
         $lane = $1;
     }
 
-    chdir $input or (print STDERR ("Impossible de se déplacer dans $input\n") and die);
+    chdir $input or (print STDERR "Impossible de se déplacer dans $input\n" and die);
 
     print ("Recherche de la jFlow\n");
     my $regexpSampleSheetjFlow = '^[0-9]{8}_.*_jFlow.*\.toSubmit$';
-    my $file_jflow = `ls -t | egrep $regexpSampleSheetjFlow  | head -1`; $? and (print STDERR("[Erreur]Récup de la derniere jflow\n") and die);
+    my $file_jflow = `ls -t | egrep $regexpSampleSheetjFlow  | head -1`; $? and (print STDERR "[Erreur]Récup de la derniere jflow\n" and die);
     chomp($file_jflow);
     print ("\tjFlow trouvée : $file_jflow\n");
 
     print ("\tParametrage de la commande Nextflow\n");
 	my $submitted_jflow = $file_jflow;
 	my $nb_jflow_lines = `wc -l < $submitted_jflow`;
-	if ( $nb_jflow_lines == 1 ) {
-		# ouverture de la jFlow
-		print ("Ouverture en lecture de : $submitted_jflow\n");
-		open(SJF,"< $submitted_jflow") or (print STDERR("Impossible d'ouvrir le fichier jflow $submitted_jflow\n") and die);
-		my $line = <SJF>;
+
+	# parametrage pour NGL
+	my $regexpRunCreated = '^RunNGL-Bi.created$';
+	my $file_run_bi = `ls -t | egrep $regexpRunCreated  | head -1`; $? and (print STDERR "[Erreur]Récup du fichier $regexpRunCreated\n" and die);
+	chomp($file_run_bi);
+	print ("\tFichier run NGL-Bi trouvé : $file_run_bi\n");
+	open (RUNBI, $file_run_bi);
+	my $ngl_bi_run_name = <RUNBI>;
+	chomp($ngl_bi_run_name);
+	close (RUNBI);
+
+	my $sq_xp_code = `grep 'Experiment Name' SampleSheet.csv | cut -d',' -f2`; $? and print STDERR "[Erreur]Récup du code experience NGL-SQ\n";
+	chomp($sq_xp_code);
+	if ( length($sq_xp_code) <= 3 ) {
+		undef($sq_xp_code);
+	}
+
+	# ouverture de la jFlow
+	print ("Ouverture en lecture de : $submitted_jflow\n");
+	open(SJF,"< $submitted_jflow") or (print STDERR "Impossible d'ouvrir le fichier jflow $submitted_jflow\n" and die);
+	my $jFlowLineNumber = 1;
+	while (my $line = <SJF>){
+		print "Lecture ligne $.\n";
+		next if ($line =~ /(^$)|(^#.*$)/);		# exclusion des lignes vide ou commençant par #
 		my ($dataNature) =  $line =~ m/--data-nature \'?(\S+)\'?/;
 		if ($dataNature eq 'ReadyToLoad') {
 			my @jflow_components = split(' ', $line);
@@ -177,27 +196,13 @@ MAIN:
 			}
 		}
 
-		# parametrage pour NGL
-		my $regexpRunCreated = '^RunNGL-Bi.created$';
-		my $file_run_bi = `ls -t | egrep $regexpRunCreated  | head -1`; $? and (print STDERR("[Erreur]Récup du fichier $regexpRunCreated\n") and die);
-		chomp($file_run_bi);
-		print ("\tFichier run NGL-Bi trouvé : $file_run_bi\n");
-		open (RUNBI, $file_run_bi);
-		my $ngl_bi_run_name = <RUNBI>;
-		chomp($ngl_bi_run_name);
-		close (RUNBI);
-
-		my $sq_xp_code = `grep 'Experiment Name' SampleSheet.csv | cut -d',' -f2`; $? and print STDERR("[Erreur]Récup du code experience NGL-SQ\n");
-		chomp($sq_xp_code);
-		if ( length($sq_xp_code) <= 3 ) {
-			undef($sq_xp_code);
-		}
-
-		my $nf_params_file = "${project}_params.yml";
-		open (NF_PARAMS,"> $nf_params_file") or (print STDERR("Impossible d'ouvrir le fichier $nf_params_file : $!\n") and die);
+		my $nf_file_prefix = "${.}_${project}_${runName}";
+		my $nf_params_file = "${input}/${nf_file_prefix}_params.yml";
+		open (NF_PARAMS,"> $nf_params_file") or (print STDERR "Impossible d'ouvrir le fichier $nf_params_file : $!\n" and die);
 
 		print NF_PARAMS "inputdir: '$input'\n";
 		print NF_PARAMS "project: '$project'\n";
+		print NF_PARAMS "select_samples: '" . join(',', @samples) . "'\n";
 		print NF_PARAMS "is_multiplex: $isMultiplex\n";
 		print NF_PARAMS "data_nature: '$dataNature'\n";
 		print NF_PARAMS "species: '$species'\n";
@@ -225,41 +230,39 @@ MAIN:
 		print ("\tCopie du fichier de config FASTQSCREEN dans le répertoire courant\n" );
 		unless(-e $input."/fastq_screen.conf"){
 			cp($wf_illumina_nf_folder_path."/assets/fastq_screen.conf_example", $input."/fastq_screen.conf") or 
-			print STDERR("Impossible de copier le fichier fastq_screen.conf_n") and die;
+			print STDERR "Impossible de copier le fichier fastq_screen.conf_n" and die;
 		}
 		my $local_run_folder = basename($input);
 		unless(-e "${nf_outputs_folder}/${local_run_folder}"){
-			mkdir "${nf_outputs_folder}/${local_run_folder}" or (print STDERR ("Impossible de créer le répertoire ${nf_outputs_folder}/${local_run_folder}") and die);
+			mkdir "${nf_outputs_folder}/${local_run_folder}" or (print STDERR "Impossible de créer le répertoire ${nf_outputs_folder}/${local_run_folder}" and die);
 		}
 		unless(-e "${input}/nextflow"){
-			symlink("${nf_outputs_folder}/${local_run_folder}", "${input}/nextflow" ) or (print STDERR ("Impossible de créer le symlink ${input}/nextflow") and die);
+			symlink("${nf_outputs_folder}/${local_run_folder}", "${input}/nextflow" ) or (print STDERR "Impossible de créer le symlink ${input}/nextflow" and die);
 		}
 		my $nextflow_profile = $checkTest ? 'dev' : 'prod';
 		my $nextflow_time_job = $checkTest ? '3:00:00' : '3-00';
-		my $nextflow_cmd_line_sbatch = "sbatch -p wflowq -t $nextflow_time_job --mem 5GB ";
-		$nextflow_cmd_line_sbatch .= "-J nf-illumina_${barcodeFlowcell}_${lane} --wrap=";
+		my $nextflow_cmd_line_sbatch = "sbatch -p wflowq -t $nextflow_time_job --mem 5GB -o ${nf_file_prefix}.out";
+		$nextflow_cmd_line_sbatch .= "-J nf-illumina_${barcodeFlowcell}_${lane}_$. --wrap=";
 
 		my $nextflow_cmd_line_wrap = "module load bioinfo/nfcore-Nextflow-v22.12.0-edge; ";
 		$nextflow_cmd_line_wrap .= "cd $input/nextflow; ";
 		$nextflow_cmd_line_wrap .= "nextflow run $wf_illumina_nf_folder_path/main.nf -ansi-log false ";
 		$nextflow_cmd_line_wrap .= "-profile $nextflow_profile ";
-		$nextflow_cmd_line_wrap .= "-params-file ${input}/${project}_params.yml ";
+		$nextflow_cmd_line_wrap .= "-params-file ${input}/${nf_file_prefix}_params.yml ";
+		$nextflow_cmd_line_wrap .= "-w ./work/${nf_file_prefix} ";
 
 		my $nextflow_cmd_line = "$nextflow_cmd_line_sbatch\" $nextflow_cmd_line_wrap \" ";
 		print ("Commande nextflox à lancer : $nextflow_cmd_line\n");
 
-		chdir './nextflow' or (print STDERR("Impossible de se déplacer dans $input/nextflow_n") and die);
-		open(NFC,"> $input/nextflow/${project}_${runName}_nextflow.cmd")
-			and print("La commande nextflow est stockée ici : $input/nextflow/${project}_${runName}_nextflow.cmd\n")
-			or (print STDERR("Impossible d'ouvrir en écriture le fichier : ${project}_${runName}_nextflow.cmd\n") and die);
+		chdir "$input/nextflow" or (print STDERR "Impossible de se déplacer dans $input/nextflow" and die);
+		open(NFC,"> $input/nextflow/${nf_file_prefix}_nextflow.cmd")
+			and print("La commande nextflow est stockée ici : $input/nextflow/${nf_file_prefix}_nextflow.cmd\n")
+			or (print STDERR "Impossible d'ouvrir en écriture le fichier : ${nf_file_prefix}_nextflow.cmd\n" and die);
 		print NFC "$nextflow_cmd_line\n";
 		close NFC;
-	} else {
-		chomp($nb_jflow_lines);
-		print ("jFlow avec $nb_jflow_lines lignes : cas non pris en charge pour le moment._n");
+		print "\n";
 	}
-	close(SJF);
-
+	close SJF;
 }
 #############################################################################################################################
 #																															#
-- 
GitLab


From b6bb1134ada5c1d12f2f47b8c9da0f8a189f25c4 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 6 Sep 2023 17:05:28 +0200
Subject: [PATCH 12/13] Add host in data_prepare

---
 bin/DTM/data_prepare.pl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/bin/DTM/data_prepare.pl b/bin/DTM/data_prepare.pl
index 39e7a92..73fac9c 100644
--- a/bin/DTM/data_prepare.pl
+++ b/bin/DTM/data_prepare.pl
@@ -110,7 +110,6 @@ MAIN:
 
     print ("\tParametrage de la commande Nextflow\n");
 	my $submitted_jflow = $file_jflow;
-	my $nb_jflow_lines = `wc -l < $submitted_jflow`;
 
 	# parametrage pour NGL
 	my $regexpRunCreated = '^RunNGL-Bi.created$';
@@ -131,7 +130,6 @@ MAIN:
 	# ouverture de la jFlow
 	print ("Ouverture en lecture de : $submitted_jflow\n");
 	open(SJF,"< $submitted_jflow") or (print STDERR "Impossible d'ouvrir le fichier jflow $submitted_jflow\n" and die);
-	my $jFlowLineNumber = 1;
 	while (my $line = <SJF>){
 		print "Lecture ligne $.\n";
 		next if ($line =~ /(^$)|(^#.*$)/);		# exclusion des lignes vide ou commençant par #
@@ -196,6 +194,9 @@ MAIN:
 			}
 		}
 
+		my $host = hostname;
+		my $nf_host = $host =~ /node\d{3}/ ? 'genologin' : 'genobioinfo';
+
 		my $nf_file_prefix = "${.}_${project}_${runName}";
 		my $nf_params_file = "${input}/${nf_file_prefix}_params.yml";
 		open (NF_PARAMS,"> $nf_params_file") or (print STDERR "Impossible d'ouvrir le fichier $nf_params_file : $!\n" and die);
@@ -222,6 +223,7 @@ MAIN:
 		print NF_PARAMS "min_overlap: $minOverlap\n" if (defined($minOverlap));	# parametre non obligatoire
 		print NF_PARAMS "max_overlap: $maxOverlap\n" if (defined($maxOverlap));	# parametre non obligatoire
 		print NF_PARAMS "email: '$nf_mailRecipient'\n" if (defined($nf_mailRecipient));	# parametre non obligatoire
+		print NF_PARAMS "host: '$nf_host'\n";
 		print NF_PARAMS "description: '$description'\n";
 
 		close NF_PARAMS;
-- 
GitLab


From ea6c3920acb31cc437fbc9ccf1868e75cae9d7fc Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Wed, 6 Sep 2023 17:05:46 +0200
Subject: [PATCH 13/13] update version

---
 conf/report.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/report.config b/conf/report.config
index 2c00805..520c00b 100644
--- a/conf/report.config
+++ b/conf/report.config
@@ -29,5 +29,5 @@ manifest {
 	description = "Workflow for Illumina data quality control"
 	mainScript = 'main.nf'
 	nextflowVersion = '>=0.32.0'
-	version = '1.6.0'
+	version = '1.10.0'
 }
\ No newline at end of file
-- 
GitLab