-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit d90c108
Showing
188 changed files
with
253,102 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
# File configuration for Annocript 0.2 | ||
# It contains parameters that are used very often by Annocript but very rarely by the user. | ||
# We collected the parameters in RARELY CHANGED which means that an expert user can decide to change | ||
# and VERY RARELY CHANGED because it is very rare that an user wants to change | ||
# READ CAREFULLY!!! | ||
# This file has been written with a specific sintax. | ||
# The variables MUST stay in the form: variable = value | ||
# A series of hashes (#########) closes the parameters to read | ||
# When you want to execute something you have to write YES (in upper case) or NO otherwise | ||
# other strings will give error. | ||
# THIS FILE SHOULD BE CHANGED ONLY IF YOU KNOW PERFECTLY WHAT YOU ARE DOING!!! | ||
############################## | ||
|
||
|
||
#RARELY CHANGED PARAMETERS | ||
#Links to Files | ||
#Uniprot databases are also present on expasy and ebi. Please use these domains if you want. | ||
swissprotDBLink = ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz | ||
tremblDBLink = ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz | ||
uniprotVerLink = ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/reldate.txt | ||
GODBLink = ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping_selected.tab.gz | ||
GOTermsLink = http://www.geneontology.org/doc/GO.terms_alt_ids | ||
enzymeDBLink = ftp://ftp.expasy.org/databases/enzyme/enzyme.dat | ||
cdDBLink = ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/Cdd_LE.tar.gz | ||
rfamDBLink = ftp://ftp.sanger.ac.uk/pub/databases/Rfam/CURRENT/Rfam.fasta.gz | ||
unirefDBLink = ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/uniref/uniref90/uniref90.fasta.gz | ||
unirefVerLink = ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/uniref/uniref90/uniref90.release_note | ||
cdTableLink = ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/cddid_all.tbl.gz | ||
pathwaysTableLink = http://www.uniprot.org/docs/pathway.txt | ||
uniprotOrgListLink = http://www.uniprot.org/docs/speclist.txt | ||
pfam2GOLink = http://geneontology.org/external2go/pfam2go | ||
silvaLSULink = ftp://ftp.arb-silva.de/current/Exports/SILVA_119_LSUParc_tax_silva.fasta.gz | ||
silvaSSULink = ftp://ftp.arb-silva.de/current/Exports/SILVA_119_SSUParc_tax_silva.fasta.gz | ||
|
||
#If active (YES) permits to keep in memory all the tables that have been used to generate the database | ||
#You can use to see if the construction has been well done | ||
keepTempFiles = YES | ||
|
||
#This is a threshold. If you have more than this memory Annocript will use more and be faster. | ||
bigMemory = 20 | ||
|
||
#Max tentatives to use when downloading or using some system function | ||
max_tentatives = 5 | ||
|
||
#A separator for different results in the same field of the table | ||
separator = ]---[ | ||
|
||
#Columns of Uniref ids in the idmapping.tab file | ||
UniRef100PosIdMap = 8 | ||
UniRef90PosIdMap = 9 | ||
UniRef50PosIdMap = 10 | ||
|
||
#FASTA CONSTRAINTS | ||
#Maximum number of chars a FASTA file can have | ||
maxFastaSeqLen = 80 | ||
#Regexes containing IUPAC allowed chars | ||
nuclIUPAC = ACGTURYSWKMBDHVN\.\- | ||
protIUPAC = ACDEFGHIKLMNPQRSTVWY | ||
|
||
#PORTRAIT CONSTRAINTS | ||
#to avoid stochastic interruption of Portrait split the transcriptome in N files | ||
split_num = 5000 | ||
#Minimum and maximum lengths for a transcriptome to work with Portrait | ||
max_port_seq_length = 100000 | ||
min_port_seq_length = 100 | ||
#Maximum percentage of Ns that can be present or Portrait will not work | ||
max_perc_ns = 20 | ||
|
||
|
||
##VERY RARELY CHANGED | ||
#Names given to the output files. Other information will be concatenate by Annocript | ||
outFileName = ann_out.txt | ||
outFiltered = filt_ann_out.txt | ||
ORFFastaFileName = orf_info.fasta | ||
NCOutFile = lncRNAseqs.fasta | ||
codingOutFile = codingSeqs.fasta | ||
gcContentFile = out.geecee | ||
|
||
#Name of the file with non coding sequences | ||
ncDB = ncRNA.fa | ||
|
||
#Modify this value if you want to get plots showing a different domain database ids | ||
#Possible values: cdd, cog, kog, pfam, prk, smart, tigr | ||
cdName4Expression = pfam | ||
|
||
#Names of the folders used by Annocript (A change will compromise Annocript) | ||
DBCreationFolder = DB_CREATION | ||
ProgExecFolder = PROGRAMS_EXEC | ||
GFFAndOutputFolder = GFF3_AND_OUTPUT | ||
usefulFolder = USEFUL | ||
guideFolder = GUIDE | ||
configFolder = CONFIGURATION | ||
|
||
#Database info | ||
platform = mysql | ||
host = localhost | ||
port = 3306 | ||
|
||
#Programs output names | ||
rpstblastnOut = rpstblastnOut | ||
blastxSPOut = blastxSPOut | ||
blastxTROut = blastxTROut | ||
blastxUnirefOut = blastxUnirefOut | ||
blastnOut = blastnOut | ||
portraitOut = portraitOut | ||
dna2pepOut = dna2pepOut | ||
outCount = countOut | ||
|
||
#Other names | ||
outHashFile = outHashFile.hash | ||
parsingFilePath = db_headers.txt | ||
filtMappingFile = filtMappingFile.txt | ||
R_barplot_script = tables_2_barplot.R | ||
R_piechart_script = tables_2_pie_chart.R | ||
R_seqs_stats_script = ann_seqs_stats.R | ||
R_log_file = R.log | ||
|
||
#Database names | ||
gffDB = gffDB | ||
|
||
#Stats output files names | ||
bpStatsFile = GO_bp | ||
mfStatsFile = GO_mf | ||
ccStatsFile = GO_cc | ||
bpStatsFileDom = GO_dom_bp | ||
mfStatsFileDom = GO_dom_mf | ||
ccStatsFileDom = GO_dom_cc | ||
cdStatsFile = CD | ||
closerOSFile = closer_os_table.txt | ||
pathwaysL1File = pwl1 | ||
pathwaysL2File = pwl2 | ||
pathwaysL3File = pwl3 | ||
ATGCPercFile = ATGC_Percentages.txt | ||
|
||
#Maximum length of descriptions in plots | ||
maxLengthDescs = 50 | ||
|
||
#BLAST databases names | ||
swissProtDB = uniprot_sprot.fasta | ||
tremblDB = uniprot_trembl.fasta | ||
|
||
#BLAST output type (you can use only: blast) | ||
blastOutType = blast | ||
|
||
#GFF3 Output file names | ||
blastxGFF3FileName = blastx_out.gff | ||
blastxTRGFF3FileName = blastxTremble_out.gff | ||
blastxSPGFF3FileName = blastxSprot_out.gff | ||
blastxUnirefGFF3FileName = blastxUniref_out.gff | ||
rpstblastnGFF3FileName = rpstblastn_out.gff | ||
blastnGFF3FileName = blastn_out.gff | ||
nonCodingAlgorithm = portrait | ||
nonCodingGFF3FileName = portait_out.gff | ||
orfAlgorithm = dna2pep | ||
orfGFF3FileName = dna2pep_out.gff | ||
|
||
#LOG FILES | ||
headParsNAValues = headParsNAValues.log | ||
uniprotGenesNotPresent = genesNotPresent.log | ||
|
||
#HTML PAGES | ||
htmlHome = index.html | ||
htmlAnnotStats = annot_stats.html | ||
htmlSeqStats = seq_stats.html | ||
AnnocriptLogo = a_logo.png | ||
|
||
#EXTERNAL PROGRAMS | ||
faSomeRecords = faSomeRecords | ||
############################# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
# File configuration for the user of Annocript 0.2 | ||
# READ CAREFULLY!!! | ||
# This file has been written with a specific sintax. | ||
# The variables MUST stay in the format: variable = value | ||
# A series of hashes (#########) closes the parameters to read | ||
# Parameters of BLAST programs without a value assigned will not be used (i.e. word_sizeX = ) | ||
# When you want to execute something you have to write YES (in upper case) or NO otherwise | ||
# other strings will give error. | ||
############################## | ||
|
||
#Allowed characters [A-za-z0-9\_\-]. Allowed extensions (fa|fasta). Please use a dot only to separate the extension! | ||
fastaSeqs = trial_transcriptome.fasta | ||
|
||
#organisms to blast ('all' means all the organisms in UniProt are taken) | ||
#please use 'all' or a file name with organisms names | ||
#Such file must be placed in your working directory (i.e. ann_works) | ||
#Selection of the organisms works only if the TrEMBL database is used | ||
blastedOrganism = all | ||
|
||
#How to extract GO terms: you can choose to extract | ||
#for proteins ('proteins'), domains ('domains') or for both ('both') | ||
goTermsAss = proteins | ||
|
||
#What have to do Annocript | ||
doDbCreation = YES | ||
doExecutePrograms = YES | ||
doBuildOutput = YES | ||
extractStatistics = YES | ||
|
||
#What programs to execute | ||
doBlastxSP = YES | ||
doBlastxTRorUf = YES | ||
doRpstblastn = YES | ||
doBlastn = YES | ||
doPortrait = YES | ||
doDna2Pep = YES | ||
|
||
#Permits the use of the GFF database. Use YES only if you need it. GFF output files will always be print. | ||
useGFFDB = NO | ||
|
||
#BLASTX and BLASTP PARAMETERS (use word_size 4 and threshold 18 to reduce computational time) | ||
#(outfmt can be only 0 with this version of Annocript) | ||
word_sizeX = 4 | ||
evalueX = 1E-5 | ||
num_descriptionsX = 5 | ||
num_alignmentsX = 5 | ||
max_target_seqsX = | ||
num_threadsX = 10 | ||
thresholdX = 18 | ||
matrixX = | ||
|
||
#BLASTN PARAMETERS | ||
word_sizeN = | ||
evalueN = 0.00001 | ||
num_descriptionsN = 1 | ||
num_alignmentsN = 1 | ||
max_target_seqsN = | ||
num_threadsN = 4 | ||
thresholdN = | ||
|
||
#RPSBLAST and RPSTBLASTN PARAMETERS | ||
word_sizeRPS = | ||
evalueRPS = 0.00001 | ||
num_descriptionsRPS = 20 | ||
num_alignmentsRPS = 20 | ||
max_target_seqsRPS = | ||
thresholdRPS = | ||
|
||
#BLAST results with evalue lower than evalMax will be shown in the tabular output | ||
evalMax = 0.00001 | ||
|
||
#Number of threads for parallel executions (Used only for RPSBLAST) | ||
threads4Parallel = 10 | ||
|
||
#DNA2PEP PARAMETERS (default is 'none') | ||
d2pMode = none | ||
|
||
#PLOTS | ||
#Number of top scored elements to show in the plots (maximum is 50) | ||
topToShow = 20 | ||
#Type of plot to show [piechart|barplot] | ||
plotType = barplot | ||
|
||
#Thresholds to be non-coding. They guide the heuristic in Annocript | ||
#Minimum Portrait score | ||
NCThresh = 0.95 | ||
#Maximum length of the ORF | ||
NCORFLength = 100 | ||
#Minimum length of the transcript | ||
NCSeqLength = 200 | ||
|
||
#FIXED PARAMETERS (You should set only once)# | ||
|
||
#Database account info | ||
mySqlUser = [INSERT_MYSQL_USERID] | ||
mySqlPass = [INSERT_MYSQL_PASSWORD] | ||
|
||
#UNIPROT informations for access | ||
uniprotWebUser = anonymous | ||
uniprotWebPass = [INSERT_YOUR_EMAIL_ADDRESS] | ||
|
||
#Programs Paths | ||
blastPath = /home/francesco/bin/ncbi-blast-2.2.30+/bin/ | ||
portraitPath = /home/francesco/bin/portrait-1.1/portrait-1.1.pl | ||
dna2pepPath = /home/francesco/bin/dna2pep-1.1/dna2pep.py | ||
############################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
ncDB;cdName4Expression;DBCreationFolder;ProgExecFolder;GFFAndOutputFolder;usefulFolder;guideFolder;configFolder;platform;host;port;rpstblastnOut;blastxSPOut;blastxTROut;blastxUnirefOut;blastnOut;portraitOut;dna2pepOut;outCount;outHashFile;parsingFilePath;filtMappingFile;R_barplot_script;R_piechart_script;R_seqs_stats_script;R_log_file;gffDB;keepTempFiles;bigMemory;separator;maxFastaSeqLen;nuclIUPAC;protIUPAC;outFileName;outFiltered;ORFFastaFileName;NCOutFile;codingOutFile;gcContentFile;bpStatsFile;mfStatsFile;ccStatsFile;bpStatsFileDom;mfStatsFileDom;ccStatsFileDom;cdStatsFile;closerOSFile;pathwaysL1File;pathwaysL2File;pathwaysL3File;UniRef100PosIdMap;UniRef90PosIdMap;UniRef50PosIdMap;split_num;max_port_seq_length;min_port_seq_length;max_perc_ns;ATGCPercFile;maxLengthDescs;swissProtDB;tremblDB;blastOutType;blastxGFF3FileName;blastxTRGFF3FileName;blastxSPGFF3FileName;blastxUnirefGFF3FileName;rpstblastnGFF3FileName;blastnGFF3FileName;nonCodingAlgorithm;nonCodingGFF3FileName;orfAlgorithm;orfGFF3FileName;swissprotDBLink;tremblDBLink;unirefDBLink;unirefVerLink;uniprotVerLink;GODBLink;GOTermsLink;enzymeDBLink;cdDBLink;rfamDBLink;cdTableLink;pathwaysTableLink;uniprotOrgListLink;pfam2GOLink;silvaLSULink;silvaSSULink;headParsNAValues;uniprotGenesNotPresent;htmlHome;htmlAnnotStats;htmlSeqStats;AnnocriptLogo;faSomeRecords;max_tentatives | ||
fastaSeqs;blastedOrganism;goTermsAss;doDbCreation;doExecutePrograms;doBuildOutput;mySqlUser;mySqlPass;uniprotWebUser;uniprotWebPass;doBlastxSP;doBlastxTRorUf;doRpstblastn;doBlastn;doPortrait;doDna2Pep;useGFFDB;word_sizeX;evalueX;num_descriptionsX;num_alignmentsX;max_target_seqsX;num_threadsX;thresholdX;matrixX;word_sizeN;evalueN;num_descriptionsN;num_alignmentsN;max_target_seqsN;num_threadsN;thresholdN;word_sizeRPS;evalueRPS;num_descriptionsRPS;num_alignmentsRPS;max_target_seqsRPS;thresholdRPS;threads4Parallel;evalMax;d2pMode;topToShow;plotType;extractStatistics;NCThresh;NCORFLength;NCSeqLength;blastPath;portraitPath;dna2pepPath |
Oops, something went wrong.