--- # Initial work, by Annie Brown, in fall 2020: file_transformation: - filtered original VCF file for MAF - bcftools_concatCommand=concat /project/shared_files/legume_project/* .vcf | bcftools_viewCommand=view -i MAF[0]>0.01 --output-type z --compression-level 9 --threads 20 -o /home/anne.brown/An_FUll_filtered1.vcf - extracted non-Synonymous SNPs from GTF files. - zcat *.gtf.gz | grep "non-Synonymous" > AnLab_all_nonSyn_SNPs.gft # Final formatting by S. Cannon, in Oct 2021: # Change to new naming convention, and add prefixes: cd /usr/local/www/data/private/Glycine/max/diversity/Wm82.gnm2.div.FL3Z basedir="/usr/local/www/data/private/Glycine/max/diversity" newdir="$basedir/Wm82.gnm2.div.Zhang_Jiang_2020" olddir="$basedir/Wm82.gnm2.div.FL3Z" base="glyma.Wm82.gnm2.div.Zhang_Jiang_2020" cd $basedir mkdir $newdir cat $olddir/glyma.Wm82.gnm2.div.FL3Z.Accessions.txt > $newdir/$base.info_accessions.txt gzcat $olddir/glyma.Wm82.gnm2.div.FL3Z.nonSyn_SNPs.gtf.gz | perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' > $newdir/$base.nonSyn_SNPs.gtf & nohup gzcat $olddir/glyma.Wm82.gnm2.div.FL3Z.SNPData_Full.vcf.gz | perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' > $newdir/$base.SNPdata_full.vcf & nohup gzcat $olddir/glyma.Wm82.gnm2.div.FL3Z.SNPData_filtered.01.vcf.gz | perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' > $newdir/$base.SNPdata_maf01.vcf & nohup gzcat $olddir/glyma.Wm82.gnm2.div.FL3Z.SNPData_filtered.05.vcf.gz | perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' > $newdir/$base.SNPdata_maf05.vcf & cat $olddir/MANIFEST.FL3Z.correspondence.yml | perl -pe 's/FL3Z/Zhang_Jiang_2020/' | cat > $newdir/MANIFEST.Wm82.gnm2.div.Zhang_Jiang_2020.correspondence.yml cat $olddir/MANIFEST.FL3Z.descriptions.yml | perl -pe 's/FL3Z/Zhang_Jiang_2020/; s/SNPData_filtered./SNPdata_maf/' | cat > $newdir/MANIFEST.Wm82.gnm2.div.Zhang_Jiang_2020.descriptions.yml cat $olddir/README.FL3Z.yml | perl -pe 's/.YAML.+/---/; s/FL3Z/Zhang_Jiang_2020/' | cat > $newdir/README.Wm82.gnm2.div.Zhang_Jiang_2020.yml cd $newdir for file in *txt *gtf *vcf; do nohup bgzip $file & done for file in *.vcf.gz; do tabix $file & done cat AnLab_1.5K.NS_SNPs_minusd2.5proveanAlleleFreq.01.vcf | perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' > $newdir/$base.NonsynSNPs_provean_freq01.vcf cat AnLab_1.5K.NS_SNPs_minusd2.5proveanAlleleFreq.01.gtf | perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' > $newdir/$base.NonsynSNPs_provean_freq01.gtf