# HG changeset patch # User ieguinoa # Date 1596639311 0 # Node ID b85d239b1d58f1ef61878f2201d408dbde8cc9f7 # Parent e0f0399888023a9125e17444136d2fbc591ddfd8 Uploaded diff -r e0f039988802 -r b85d239b1d58 .shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,12 @@ +categories: + - TODO +description: | + Submits experimental data and respective metadata to the European Nucleotide Archive (ENA). +long_description: | + The program submits experimental data and respective metadata to the European Nucleotide Archive (ENA). + The metadata should be provided in separate tables corresponding to ENA objects STUDY, SAMPLE, EXPERIMENT and RUN +name: ena_upload +owner: iuc +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload +homepage_url: https://github.com/usegalaxy-eu/ena-upload-cli +type: unrestricted \ No newline at end of file diff -r e0f039988802 -r b85d239b1d58 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,2 @@ +# ena-upload-wrapper +Galaxy wrapper for ena-cli-upload diff -r e0f039988802 -r b85d239b1d58 ena_upload.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ena_upload.xml Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,211 @@ + + + 0.1.3 + + + ena-upload-cli + + $output +#end if + + ]]> + + +#import json +#if $action_options.input_format_conditional.input_format == "build_tables": + #set $files_to_upload = list() + #set $studies = list() + #for $study in $action_options.input_format_conditional.rep_study: + #set samples = list() + #for $sample in $study.rep_sample: + #set experiments = list() + #for $experiment in $sample.rep_experiment: + #set runs = list() + #for $run in $experiment.rep_runs: + #set run_files = list() + #for $file in $run.upload_files: + $run_files.append(str($file.element_identifier)) + #end for + $runs.append($run_files) + #end for + $experiments.append({'title':str($experiment.experiment_title),'experiment_design':str($experiment.experiment_design),'library_strategy':str($experiment.library_strategy),'library_source':str($experiment.library_source),'library_selection':str($experiment.library_strategy),'library_layout':str($experiment.library_layout),'insert_size':str($experiment.insert_size),'library_construction_protocol':str($experiment.library_construction_protocol),'platform':str($experiment.platform),'instrument_model':str($experiment.instrument_model),'runs':$runs}) + #end for + $samples.append({'title':str($sample.sample_title),'description':str($sample.sample_description),'tax_name':str($sample.scientific_name),'tax_id':str($sample.tax_id),'experiments':$experiments}) + #end for + $studies.append({'title':str($study.study_title),'type':str($study.study_type),'abstract':str($study.study_abstract),'pubmed_id':str($study.study_pubmed_id),'samples':$samples}) + #end for + #echo $json.dumps($studies) +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + action_options['input_format_conditional']['dry_run'] == True + + + action_options['input_format_conditional']['dry_run'] == True + + + action_options['input_format_conditional']['dry_run'] == True + + + action_options['input_format_conditional']['dry_run'] == True + + + + diff -r e0f039988802 -r b85d239b1d58 extract_tables.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_tables.py Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,50 @@ +import argparse +import json +import os +import pathlib + +parser = argparse.ArgumentParser() +parser.add_argument('--studies',dest='studies_json_path', required=True) +parser.add_argument('--out_dir',dest='out_path', required=True) +args = parser.parse_args() + + +with open(args.studies_json_path,'r') as studies_json_file: + studies_dict = json.load(studies_json_file) + +studies_table = open(pathlib.Path(args.out_path) / 'studies.tsv', 'w') +studies_table.write('\t'.join(['alias','status','accession','title','study_type','study_abstract','pubmed_id','submission_date']) + '\n') + +samples_table = open(pathlib.Path(args.out_path) / 'samples.tsv', 'w') +samples_table.write('\t'.join(['alias','status','accession','title','scientific_name','taxon_id','sample_description','submission_date']) + '\n') + +experiments_table = open(pathlib.Path(args.out_path) / 'experiments.tsv', 'w') +experiments_table.write('\t'.join(['alias','status','accession','title','study_alias','sample_alias','design_description','library_name','library_strategy','library_source','library_selection','library_layout','insert_size','library_construction_protocol','platform','instrument_model','submission_date'])+ '\n') + +runs_table = open(pathlib.Path(args.out_path) / 'runs.tsv', 'w') +runs_table.write('\t'.join(['alias','status','accession','experiment_alias','file_name','file_format','file_checksum','submission_date'])+ '\n') + +action = 'add' +for study_index, study in enumerate(studies_dict): + study_alias = 'study_'+str(study_index) + studies_table.write('\t'.join([study_alias,action,'ENA_accession',study['title'], study['type'],study['abstract'],study['pubmed_id'],'ENA_submission_data'])) + for sample_index,sample in enumerate(study['samples']): + sample_alias = 'sample_'+str(sample_index) + samples_table.write('\t'.join([sample_alias,action,'ena_accession',sample['title'],sample['tax_name'], sample['tax_id'],sample['description'],'ENA_submission_date'])+ '\n') + for exp_index,exp in enumerate(sample['experiments']): + exp_alias = 'experiment_'+str(exp_index)+'_'+str(sample_index) + lib_alias = 'library_'+str(exp_index)+'_'+str(sample_index) + experiments_table.write('\t'.join([exp_alias,action,'accession_ena',exp['title'],study_alias,sample_alias,exp['experiment_design'],lib_alias,exp['library_strategy'],exp['library_source'],exp['library_selection'],exp['library_layout'],exp['insert_size'],exp['library_construction_protocol'],exp['platform'],exp['instrument_model'],'submission_date_ENA']) + '\n') + run_index = 0 + # exp['runs'] is a list of lists + for run in exp['runs']: + run_index += 1 + run_alias = '_'.join(['run',str(exp_index),str(sample_index),str(run_index)]) + for file_entry in run: + file_format = 'fastq.gz' if os.path.splitext(file_entry)[-1] == '.gz' else 'fastq.bz2' + runs_table.write('\t'.join([run_alias,action,'ena_run_accession',exp_alias,file_entry,file_format,'file_checksum','submission_date_ENA']) + '\n') + +studies_table.close() +samples_table.close() +experiments_table.close() +runs_table.close() diff -r e0f039988802 -r b85d239b1d58 tool-data/instrument_model.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/instrument_model.txt Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,58 @@ +minION +GridION +PromethION +454 GS +454 GS 20 +454 GS FLX +454 GS FLX+ +454 GS FLX Titanium +454 GS Junior +Illumina Genome Analyzer +Illumina Genome Analyzer II +Illumina Genome Analyzer IIx +Illumina HiSeq 1000 +Illumina HiSeq 1500 +Illumina HiSeq 2000 +Illumina HiSeq 2500 +Illumina HiSeq 3000 +Illumina HiSeq 4000 +Illumina iSeq 100 +Illumina HiScanSQ +Illumina NextSeq 500 +Illumina NextSeq 550 +Illumina NovaSeq 6000 +Illumina HiSeq X Five +Illumina HiSeq X Ten +Illumina MiSeq +Illumina MiniSeq +AB SOLiD System +AB SOLiD System 2.0 +AB SOLiD System 3.0 +AB SOLiD 3 Plus System +AB SOLiD 4 System +AB SOLiD 4hq System +AB SOLiD PI System +AB 5500 Genetic Analyzer +AB 5500xl Genetic Analyzer +AB 5500xl-W Genetic Analysis System +Ion Torrent PGM +Ion Torrent Proton +Ion Torrent S5 +Ion Torrent S5 XL +Complete Genomics +PacBio RS +PacBio RS II +Sequel +Sequel II +AB 3730xL Genetic Analyzer +AB 3730 Genetic Analyzer +AB 3500xL Genetic Analyzer +AB 3500 Genetic Analyzer +AB 3130xL Genetic Analyzer +AB 3130 Genetic Analyzer +AB 310 Genetic Analyzer +BGISEQ-500 +DNBSEQ-T7 +DNBSEQ-G400 +DNBSEQ-G50 +DNBSEQ-G400 FAST diff -r e0f039988802 -r b85d239b1d58 tool-data/library_layout.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/library_layout.txt Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,2 @@ +SINGLE +PAIRED diff -r e0f039988802 -r b85d239b1d58 tool-data/library_selection.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/library_selection.txt Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,27 @@ +RANDOM +PCR +RANDOM PCR +RT-PCR +HMPR +MF +repeat fractionation +size fractionation +MSLL +cDNA +ChIP +MNase +DNase +Hybrid Selection +Reduced Representation +Restriction Digest +5-methylcytidine antibody +MBD2 protein methyl-CpG binding domain +CAGE +RACE +MDA +padlock probes capture method +Oligo-dT +Inverse rRNA selection +ChIP-Seq +other +unspecified diff -r e0f039988802 -r b85d239b1d58 tool-data/library_source.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/library_source.txt Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,7 @@ +GENOMIC +TRANSCRIPTOMIC +METAGENOMIC +METATRANSCRIPTOMIC +SYNTHETIC +VIRAL RNA +OTHER diff -r e0f039988802 -r b85d239b1d58 tool-data/library_strategy.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/library_strategy.txt Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,38 @@ +WGS +WGA +WXS +RNA-Seq +ssRNA-seq +miRNA-Seq +ncRNA-Seq +FL-cDNA +EST +Hi-C +ATAC-seq +WCS +RAD-Seq +CLONE +POOLCLONE +AMPLICON +CLONEEND +FINISHING +ChIP-Seq +MNase-Seq +DNase-Hypersensitivity +Bisulfite-Seq +CTS +MRE-Seq +MeDIP-Seq +MBD-Seq +Tn-Seq +VALIDATION +FAIRE-seq +SELEX +RIP-Seq +ChIA-PET +Synthetic-Long-Read +Targeted-Capture +Tethered Chromatin Conformation Capture +ChM-Seq +GBS +OTHER diff -r e0f039988802 -r b85d239b1d58 tool-data/study_type.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/study_type.txt Wed Aug 05 14:55:11 2020 +0000 @@ -0,0 +1,14 @@ +Whole Genome Sequencing +Metagenomics +Transcriptome Analysis +Resequencing +Epigenetics +Synthetic Genomics +Forensic or Paleo-genomics +Gene Regulation Study +Cancer Genomics +Population Genomics +RNASeq +Exome Sequencing +Pooled Clone Sequencing +Transcriptome Sequencing