disp(['Running VisualizeNextGen2011.m']) ; % Does visualization of Next Gen Sequencing Data % % Data are In File: % counts.csv % from Matt Wilkerson, Oct. 2011 % 1st column is % Remaining Columns are Reads % Rows are chromosome location % % This also requires Exon information, from the file: % exons.csv % which had some errors, it has been edited to: % exonsMarron.csv % by deleting the lines with the 1st column = % 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 54 % Look at P10, Case Number 38 % ipart = 344 ; % 0 - Read in Raw Data, and Save as .mat file % 1 - Individual Raw Data curves % 2 - Overlay of Raw Data curves % 3 - Standard PCA - Raw Data Curves % 4 - Study distributions of total reads % % 11 - Individual Raw Data curves Projected to Sphere % 12 - Overlays of Curves Projected to Sphere % 13 - PCA Analysis of Curves Projected to Sphere % 14 - Selected Brushing of PCA on Sphere % 15 - Clustering of Curves Projected to Sphere % % 21 - Individual log10 Data curves % 22 - Overlay of log10 Data curves % 23 - Standard PCA - log10 Data Curves % 24 - Selected Brushing of log10 PCA % 25 - Run SigClust on log10 Data Curves % 26 - Run SigClust on log10 Data Curves, iCovEst = 2 % 27 - PCA plot colored with 2-means clusters % % 31 - Individual log10 Data curves Projected to Sphere % 32 - Overlays of log10 Curves Projected to Sphere % 33 - PCA Analysis of log10 Curves Projected to Sphere % 34 - Selected Brushing of log10 PCA on Sphere % 35 - Clustering of log10 Curves Projected to Sphere % % 41 - Truncate to active genomic region, % Median Smooth (window = 121), Look at Residuals, % Highlight some common minimizers, % Also define "high expression cases", % Used in the remaining parts here % 42 - Standard curvdat & scatplot analyses % 43 - Zoom in on commom minimizers: % In windows of 100 on each side, make overlays % and compare with average and median curves % for detailed view of location of min'rs % In windows of 200 on each side, apply SiZer % 44 - Zoom in on common minimizers: SiZer on original data % % % % ipart bigger than 100 for those outside of % individual gene centered analysis % % 51 - Truncate to active genomic region, % Median Smooth (window = 31), Look at Residuals, % Highlight some common minimizers, % Also define "high expression cases", % Used in the remaining parts here % 52 - Standard curvdat & scatplot analyses % 53 - Zoom in on commom minimizers: % In windows of 100 on each side, make overlays % and compare with average and median curves % for detailed view of location of min'rs % In windows of 200 on each side, apply SiZer % % % % ipart bigger than 100 for those outside of % individual gene centered analysis % % 101 - Scatterplot matrix of all 4 total reads % 102 - Brushed Scatterplot matrix of all 4 total reads, % With gene log10 overlays % % 200s: % Investigate same thing, using David Marron's % version of this data % Caution: Case Names are now wrong % Have also delted last column of .csv file, since had bad data % % 200 - Read in Raw Data, and Save as .mat file % 204 - Study distributions of total reads % % 241 - Truncate to active genomic region, % Median Smooth (window = 121), Look at Residuals, % Highlight some common minimizers, % Also define "high expression cases", % Used in the remaining parts here % 242 - Standard curvdat & scatplot analyses % 243 - Zoom in on commom minimizers: % In windows of 100 on each side, make overlays % and compare with average and median curves % for detailed view of location of min'rs % In windows of 200 on each side, apply SiZer % 244 - Zoom in on common minimizers: SiZer on original data % % 300s: % Investigate same thing, using David Marron's % corrected data % Caution: Case Names are now wrong % Have also delted last column of .csv file, since had bad data % % 300 - Read in Raw Data, and Save as .mat file % 304 - Study distributions of total reads % % 341 - Truncate to active genomic region, % Median Smooth (window = 121), Look at Residuals, % Highlight some common minimizers, % Also define "high expression cases", % Used in the remaining parts here % 342 - Standard curvdat & scatplot analyses % 343 - Zoom in on commom minimizers: % In windows of 100 on each side, make overlays % and compare with average and median curves % for detailed view of location of min'rs % In windows of 200 on each side, apply SiZer % 344 - Zoom in on common minimizers: SiZer on original data % % To Do: % Find out about number of 0s in median residulas (all data) % Flat spots???? if ipart < 200 ; datfilename = 'VisualizeNextGen2011' ; elseif ipart < 300 ; datfilename = 'VisualizeNextGen2011newUnCorr' ; elseif ipart < 400 ; datfilename = 'VisualizeNextGen2011newCorr' ; end ; outfilepre = 'GraphicalOutput/VisNG' ; figure(1) ; clf ; if ipart == 0 | ... ipart == 200 | ... ipart == 300 ; % Read in Raw Data, and Save as 4 .mat files % First Read in Exon Info % % filestr = 'exons.csv' ; filestr = 'exonsMarron.csv' ; [numeric,txt]=xlsread(filestr) ; vchromall = numeric(:,2) ; vexleft = numeric(:,3) ; vexright = numeric(:,4) ; ng = 4 ; % Number of genes in input spreadsheet vchrom = unique(vchromall) ; disp(' Check chromosome numbers: 10 19 3 9') ; vchrom' GeneNamesS = {'PIK3CA'; 'CDK2A'; 'P10'; 'STK11'; } % Structure with Gene Names % Manually copied from spreadsheet % Put in same order as in vchrom disp(' Check gene names against entries in file:') ; for ig = 1:ng ; disp([' For Chromosome ' num2str(vchrom(ig)) ... ' Gene Name is ' GeneNamesS{ig}]) ; end ; disp(' ') ; % Now create structure of exon lefts and rights % vexleftS = {} ; vexrightS = {} ; for ig = 1:ng ; flag = (vchromall == vchrom(ig)) ; vexleftS{ig} = unique(vexleft(flag)) ; vexrightS{ig} = unique(vexright(flag)) ; end ; disp(' Check 1st exon left is 178866311 by showing difference is 0') ; abs(vexleftS{1}(1) - 178866311) disp(' Check last exon left is 1227592 by showing difference is 0') ; abs(vexleftS{end}(end) - 1227592) disp(' Check 1st exon right is 178866391 by showing difference is 0') ; abs(vexrightS{1}(1) - 178866391) disp(' Check last exon right is 1228434 by showing difference is 0') ; abs(vexrightS{end}(end) - 1228434) % Now Read in Main Data % if ipart == 0 ; filestr = 'counts.csv' ; elseif ipart == 200 ; filestr = 'uncorrected_coverage_oldformat.csv' ; elseif ipart == 300 ; filestr = 'corrected_coverage_oldformat.csv' ; end ; [numeric,txt]=xlsread(filestr) ; vbpnall = numeric(:,1) ; % all base pair numbers in input file mctsall = numeric(:,2:end) ; % matrix of all counts CaseNamesS = txt(1,2:end) ; % Structure of Case Names numeric = [] ; txt = [] ; % to save space % Check Inputs % disp(' ') ; disp('Check 1st Base Pair is 1205795 by showing difference is 0') ; abs(vbpnall(1) - 1205795) disp('Check last Base Pair is 89728533 by showing difference is 0') ; abs(vbpnall(end) - 89728533) disp('Check Upper Left Count is 0') ; mctsall(1,1) disp('Check Upper Right Count is 0') ; mctsall(1,end) disp('Check Lower Left Count is 29') ; mctsall(end,1) disp('Check Lower Right Count is 7') ; mctsall(end,end) disp('Check 1st Case Name is preview.20110919/110107_UNC9-SN296_0118_B815C0ABXX7TCGA-66-2756-11A-01R-A') ; CaseNamesS(1) disp('Check last Case Name is preview.20110919/110630_UNC11-SN627_0112_AD0CVJABXX8TCGA-22-5482-01A-01R-1635-07') ; CaseNamesS(end) % Put Data into Structure % DataS = {} ; for ig = 1:ng ; vexleft = vexleftS{ig} ; vexright = vexrightS{ig} ; % Check vectors have the same length % if length(vexleft) ~= length(vexright) ; disp('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') ; disp('!!! Error from VisualizeNextGen2011: !!!') ; disp('!!! Uneven length of Exon !!!') ; disp('!!! Boundary Vectors !!!') ; disp('!!! Terminating Execution !!!') ; disp('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') ; return ; end ; % Check each left exon end is larger than each right exon end % if sum(vexright <= vexleft) > 0 ; disp('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') ; disp('!!! Error from VisualizeNextGen2011: !!!') ; disp('!!! A left exon boundary is smaller !!!') ; disp('!!! than the right boundar !!!') ; disp('!!! Terminating Execution !!!') ; disp('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') ; return ; end ; vbpn = [] ; mcts = [] ; for il = 1:length(vexleft) ; flag = (vexleft(il) <= vbpnall) & (vbpnall <= vexright(il)) ; vbpn = [vbpn; vbpnall(flag)] ; mcts = [mcts; mctsall(flag,:)] ; end ; DataS{ig,1} = vbpn ; DataS{ig,2} = mcts ; end ; vbpnall = [] ; mctsall = [] ; % to save space % Recreate first page of Matt's file: stk11.summary.pdf % ig = 3 ; % for Chromosome 10 = P10 plot(DataS{ig,2}(:,1),'k-') ; title(CaseNamesS{1}) ; xlabel('exonic nt number, not genomic position') ; ylabel('RNA read depth') ; vaxh = axisSM([1:size(DataS{ig,2},1)]) ; vaxv = [0; 610] ; text(vaxh(1) + 0.1 * (vaxh(2) - vaxh(1)), ... vaxv(1) + 0.9 * (vaxv(2) - vaxv(1)), ... ['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; % Recreate last page of Matt's file: stk11.summary.pdf % figure(2) ; clf ; ig = 2 ; % for Chromosome 9 = CDK2A plot(DataS{ig,2}(:,end),'k-') ; title(CaseNamesS{end}) ; xlabel('exonic nt number, not genomic position') ; ylabel('RNA read depth') ; vaxh = axisSM([1:size(DataS{ig,2},1)]) ; vaxv = [0; 2010] ; text(vaxh(1) + 0.1 * (vaxh(2) - vaxh(1)), ... vaxv(1) + 0.9 * (vaxv(2) - vaxv(1)), ... ['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; % Save Data as .mat File % save(datfilename,'DataS','CaseNamesS','vchrom','GeneNamesS') ; % Save variables: % DataS % CaseNamesS % vchrom % GeneNamesS else ; % Load data from previously saved .mat file % Load data from .mat file % load(datfilename) ; % Loads variables: % DataS % CaseNamesS % vchrom % GeneNamesS ng = size(DataS,1) ; if ipart < 200 ; n = length(CaseNamesS) ; else ; n = 176 ; end ; vh = [200; 100; 400; 200] ; % vector of bandwidths, one for each gene % Create Projections of Data Curves onto Unit Sphere % DataSps = {} ; mflag0 = [] ; for ig = 1:ng ; mdat = DataS{ig,2} ; % raw data vlength = [] ; for i = 1:n ; vlength = [vlength norm(mdat(:,i))] ; % vector of lengths end ; vflag0 = (vlength <= 10^(-12)) ; if sum(vflag0) > 0 ; vn = 1:n ; disp('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') ; disp('!!! Warning from VisualizeNextGen2011: !!!') ; disp(['!!! All reads are 0, for ig = ' num2str(ig) ' ' GeneNamesS{ig}]) ; disp(['!!! Case # ' num2str(vn(vflag0))]) ; disp('!!! Will turn into constant vector !!!') ; disp('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') ; mdat(:,vflag0) = ones(size(mdat,1),sum(vflag0)) ; vlength(vflag0) = ones(1,sum(vflag0)) * sqrt(size(mdat,1)) ; end ; mdatps = mdat ./ vec2matSM(vlength,size(mdat,1)) ; % matrix of projections onto unit sphere DataSps{ig} = mdatps ; end ; % Create log10 version of data % DataSl = {} ; for ig = 1:ng ; DataSl{ig} = log10(DataS{ig,2} + 1) ; end ; % Create Projections of log10 Data Curves onto Unit Sphere % DataSlps = {} ; mflag0 = [] ; for ig = 1:ng ; mdat = DataSl{ig} ; % log10 data vlength = [] ; for i = 1:n ; vlength = [vlength norm(mdat(:,i))] ; % vector of lengths end ; vflag0 = (vlength <= 10^(-12)) ; if sum(vflag0) > 0 ; vn = 1:n ; disp('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') ; disp('!!! Warning from VisualizeNextGen2011: !!!') ; disp(['!!! All reads are 0, for ig = ' num2str(ig) ' ' GeneNamesS{ig}]) ; disp(['!!! Case # ' num2str(vn(vflag0))]) ; disp('!!! Will turn into constant vector !!!') ; disp('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') ; mdat(:,vflag0) = ones(size(mdat,1),sum(vflag0)) ; vlength(vflag0) = ones(1,sum(vflag0)) * sqrt(size(mdat,1)) ; end ; mdatps = mdat ./ vec2matSM(vlength,size(mdat,1)) ; % matrix of projections onto unit sphere DataSlps{ig} = mdatps ; end ; if ipart < 100 | ... 200 < ipart ; % Do analysis over each of the 4 genes for ig = 1:ng ; % Do everything for the ng genes genefilepre = [outfilepre 'Gene' num2str(ig) GeneNamesS{ig}] ; disp(' ') ; disp([' Working on gene #' num2str(ig) ' ' GeneNamesS{ig} ... ' on Chromosome ' num2str(vchrom(ig))]) ; if ipart == 1 ; % Individual Raw Data curves vaxh = axisSM([1:size(DataS{ig,2},1)]) ; vaxv = axisSM(DataS{ig,2}) ; vaxv(1) = 0 ; for i = 1:n ; % Loop through cases disp([' Working on curve ' num2str(i) ' of ' num2str(n)]) ; plot(DataS{ig,2}(:,i),'k-') ; title(CaseNamesS{i}) ; xlabel('exonic nt number, not genomic position') ; ylabel('RNA read depth') ; text(vaxh(1) + 0.1 * (vaxh(2) - vaxh(1)), ... vaxv(1) + 0.9 * (vaxv(2) - vaxv(1)), ... ['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip0' num2str(ipart) 'RawDat'] ; if i == 1 ; print('-dpsc2',savestr) ; else ; print('-dpsc2',savestr,'-append') ; end ; end ; elseif ipart == 2 ; % Overlay of Raw Data curves nbp = size(DataS{ig,2}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataS{ig,2}) ; vaxv(1) = 0 ; plot(DataS{ig,2},'-') ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; xlabel('exonic nt number, not genomic position') ; ylabel('RNA read depth') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip0' num2str(ipart) 'RawDatOverlay'] ; print('-dpsc2',savestr) ; elseif ipart == 3 ; % Standard PCA - Raw Data Curves legendcellstr = {{['Chromosome ' num2str(vchrom(ig))] ... ['Gene = ' GeneNamesS{ig}]}} ; savestr = [genefilepre 'ip0' num2str(ipart) 'RawCurvDat'] ; paramstruct = struct('viout',[1; 3], ... 'legendcellstr',legendcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; curvdatSM(DataS{ig,2},paramstruct) ; elseif ipart == 4 | ... ipart == 204 | ... ipart == 304 ; % Study distributions of total reads figure(1) ; clf ; vtotread = [] ; for i = 1:n ; % Loop through cases vtotread = [vtotread; sum(DataS{ig,2}(:,i))] ; end ; vlogread = log10(vtotread + 1) ; titstr = ['Distribution of Total Reads for Chromosome ' ... num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}] ; paramstruct = struct('titlestr',titstr,... 'xlabelstr','log_{10}(Total Number of Reads)',... 'ylabelstr','Density') ; kdeSM(vlogread,paramstruct) ; savestr = [genefilepre 'ip0' num2str(ipart) 'ReadTotalsKDE'] ; orient landscape ; print('-dpsc2',savestr) ; figure(2) ; clf ; savestr = [genefilepre 'ip0' num2str(ipart) 'ReadTotalsSiZer'] ; paramstruct = struct('iout',2, ... 'imovie',0, ... 'savestr',savestr, ... 'xlabelstr','log_{10}(Total Number of Reads)', ... 'famoltitle',titstr, ... 'iscreenwrite',1, ... 'nsh',41, ... 'nfh',41) ; sizerSM(vlogread,paramstruct) ; elseif ipart == 11 ; % Individual Raw Data curves Projected to Sphere vaxh = axisSM([1:size(DataSps{ig},1)]) ; vaxv = axisSM(DataSps{ig}) ; vaxv(1) = 0 ; for i = 1:n ; % Loop through cases disp([' Working on curve ' num2str(i) ' of ' num2str(n)]) ; plot(DataSps{ig}(:,i),'k-') ; title(CaseNamesS{i}) ; xlabel('exonic nt number, not genomic position') ; ylabel('RNA read depth') ; text(vaxh(1) + 0.1 * (vaxh(2) - vaxh(1)), ... vaxv(1) + 0.9 * (vaxv(2) - vaxv(1)), ... ['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; text(vaxh(1) + 0.1 * (vaxh(2) - vaxh(1)), ... vaxv(1) + 0.8 * (vaxv(2) - vaxv(1)), ... 'Projected to Unit Sphere') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'SphProjDat'] ; if i == 1 ; print('-dpsc2',savestr) ; else ; print('-dpsc2',savestr,'-append') ; end ; end ; elseif ipart == 12 ; % Overlays of Curves Projected to Sphere nbp = size(DataSps{ig}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataSps{ig}) ; vaxv(1) = 0 ; plot(DataSps{ig},'-') ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig} ... ', Projected to Unit Sphere']) ; xlabel('exonic nt number, not genomic position') ; ylabel('RNA read depth') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'SphProjDatOverlay'] ; print('-dpsc2',savestr) ; elseif ipart == 13 ; % PCA Analysis of Curves Projected to Sphere legendcellstr = {{['Chromosome ' num2str(vchrom(ig))] ... ['Gene = ' GeneNamesS{ig}] ... 'Projected to Unit Sphere'}} ; savestr = [genefilepre 'ip' num2str(ipart) 'SphProjCurvDat'] ; paramstruct = struct('viout',[1; 3], ... 'legendcellstr',legendcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; curvdatSM(DataSps{ig},paramstruct) ; elseif ipart == 14 ; % Selected Brushing of PCA on Sphere % Do PCA % paramstruct = struct('npc',4,... 'iscreenwrite',1,... 'viout',[0 0 0 0 1]) ; outstruct = pcaSM(DataSps{ig},paramstruct) ; mpc = getfield(outstruct,'mpc') ; % Set up Color Matrix % mcolor = ones(n,1) * [0 0 0] ; % Start with all black and update if ig == 1 ; vflag = (mpc(2,:) > 0.5)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(3,:) > 0.15)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; elseif ig == 2 ; vflag = (mpc(2,:) > 0.1)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(4,:) > 0.5)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; elseif ig == 3 ; vflag = (mpc(2,:) > 0.4)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(4,:) > 0.3)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; elseif ig == 4 ; vflag = (mpc(2,:) > 0.5)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(3,:) > 0.3)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; vflag = (mpc(4,:) < -0.4)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 1 0] ; end ; % Make brushed PC plot - Sphere Projected Data % titlecellstr = {{['Chromosome ' num2str(vchrom(ig))] ... ['Gene = ' GeneNamesS{ig}] ... 'Projected to Unit Sphere'}} ; savestr = [genefilepre 'ip' num2str(ipart) 'SphProjBrushPCA'] ; paramstruct = struct('npcadiradd',4, ... 'icolor',mcolor, ... 'titlecellstr',titlecellstr, ... 'labelcellstr',{{'PC 1'; 'PC 2'; 'PC 3'; 'PC 4'}}, ... 'savestr',savestr, ... 'iscreenwrite',1) ; scatplotSM(DataSps{ig},[],paramstruct) ; % Make data overlay - Sphere Projected Data % figure(2) ; clf ; nbp = size(DataSps{ig}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataSps{ig}) ; vaxv(1) = 0 ; plot(DataSps{ig},'k-') ; hold on ; for i = 1:n ; if sum(mcolor(i,:)) > 0.5 ; plot(DataSps{ig}(:,i),'-','Color',mcolor(i,:)) ; end ; end ; hold off ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig} ... ', Projected to Unit Sphere, Brushed by PCA']) ; xlabel('exonic nt number, not genomic position') ; ylabel('RNA read depth') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'SphProjBrushCurves'] ; print('-dpsc2',savestr) ; % Make data overlay - Raw Data % figure(3) ; clf ; nbp = size(DataS{ig,2}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataS{ig,2}) ; vaxv(1) = 0 ; plot(DataS{ig,2},'k-') ; hold on ; for i = 1:n ; if sum(mcolor(i,:)) > 0.5 ; plot(DataS{ig,2}(:,i),'-','Color',mcolor(i,:)) ; end ; end ; hold off ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig} ... ', Projected to Unit Sphere, Brushed by PCA']) ; xlabel('exonic nt number, not genomic position') ; ylabel('RNA read depth') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'RawBrushCurves'] ; print('-dpsc2',savestr) ; elseif ipart == 15 ; % Clustering of Curves Projected to Sphere elseif ipart == 21 ; % Individual log10 Data curves vaxh = axisSM([1:size(DataSl{ig},1)]) ; vaxv = axisSM(DataSl{ig}) ; vaxv(1) = 0 ; for i = 1:n ; % Loop through cases disp([' Working on curve ' num2str(i) ' of ' num2str(n)]) ; plot(DataSl{ig}(:,i),'k-') ; title(CaseNamesS{i}) ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; text(vaxh(1) + 0.1 * (vaxh(2) - vaxh(1)), ... vaxv(1) + 0.9 * (vaxv(2) - vaxv(1)), ... ['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'logDat'] ; if i == 1 ; print('-dpsc2',savestr) ; else ; print('-dpsc2',savestr,'-append') ; end ; end ; elseif ipart == 22 ; % Overlay of log10 Data curves nbp = size(DataSl{ig}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataSl{ig}) ; vaxv(1) = 0 ; plot(DataSl{ig},'-') ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'logDatOverlay'] ; print('-dpsc2',savestr) ; elseif ipart == 23 ; % Standard PCA - log10 Data Curves legendcellstr = {{['Chromosome ' num2str(vchrom(ig))] ... ['Gene = ' GeneNamesS{ig}]}} ; savestr = [genefilepre 'ip' num2str(ipart) 'logCurvDat'] ; paramstruct = struct('viout',[1; 3], ... 'legendcellstr',legendcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; curvdatSM(DataSl{ig},paramstruct) ; elseif ipart == 24 ; % Selected Brushing of log10 PCA % Do PCA % paramstruct = struct('npc',4,... 'iscreenwrite',1,... 'viout',[0 0 0 0 1]) ; outstruct = pcaSM(DataSl{ig},paramstruct) ; mpc = getfield(outstruct,'mpc') ; % Set up Color Matrix % mcolor = ones(n,1) * [0 0 0] ; % Start with all black and update if ig == 1 ; vflag = (mpc(2,:) < -10)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(2,:) > 11)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; elseif ig == 2 ; vflag = (mpc(1,:) > 0)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(2,:) > 4)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; elseif ig == 3 ; vflag = (mpc(1,:) > 70)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(3,:) < -11)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; elseif ig == 4 ; vflag = (mpc(2,:) > 4)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; vflag = (mpc(1,:) > 60)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; end ; % Make brushed PC plot - log10 Data % figure(1) ; clf ; titlecellstr = {{['Chromosome ' num2str(vchrom(ig))] ... ['Gene = ' GeneNamesS{ig}]}} ; savestr = [genefilepre 'ip' num2str(ipart) 'logBrushPCA'] ; paramstruct = struct('npcadiradd',4, ... 'icolor',mcolor, ... 'titlecellstr',titlecellstr, ... 'labelcellstr',{{'PC 1'; 'PC 2'; 'PC 3'; 'PC 4'}}, ... 'savestr',savestr, ... 'iscreenwrite',1) ; scatplotSM(DataSl{ig},[],paramstruct) ; % Make data overlay - log10 Data % figure(2) ; clf ; nbp = size(DataS{ig,2}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataSl{ig}) ; vaxv(1) = 0 ; plot(DataSl{ig},'k-') ; hold on ; for i = 1:n ; if sum(mcolor(i,:)) > 0.5 ; plot(DataSl{ig}(:,i),'-','Color',mcolor(i,:)) ; end ; end ; hold off ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig} ... ', log_{10} Transformed, Brushed by PCA']) ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'logRawBrushCurves'] ; print('-dpsc2',savestr) ; % Output files listing Brushed Case Names % savestr = [genefilepre 'ip' num2str(ipart) 'BrushedCaseNames.txt'] ; fid = fopen(savestr,'wt') ; % 'wt' is for "delete contents of this file and open % for writing" (with 't' for "text"). titlstr1 = 'Output from the MATLAB Script VisualizeNextGen2011.m, ' ; titlstr1 = [titlstr1,date] ; cntbytes = fprintf(fid,'%1s\n\n',titlstr1) ; % '%1s/n' says "print string, followed by a line feed" % (Note, this only goes to next line) titlstr2 = ' Output listing Brushed Case Names, ipart = 24, ' ; cntbytes = fprintf(fid,'%1s\n\n\n',titlstr2) ; % 3 line feeds gives two blank lines titlstr3 = [' Gene = ' GeneNamesS{ig}] ; cntbytes = fprintf(fid,'%1s\n\n\n',titlstr3) ; colorstr = ' Case Names Brushed Red: ' ; cntbytes = fprintf(fid,'%1s\n\n',colorstr) ; for i = 1:n ; if mcolor(i,:) == [1 0 0] ; cntbytes = fprintf(fid,'%1s\n',CaseNamesS{i}) ; end ; end ; colorstr = ' Case Names Brushed Blue: ' ; cntbytes = fprintf(fid,'\n\n\n%1s\n\n',colorstr) ; for i = 1:n ; if mcolor(i,:) == [0 0 1] ; cntbytes = fprintf(fid,'%1s\n',CaseNamesS{i}) ; end ; end ; colorstr = ' Case Names Brushed Black: ' ; cntbytes = fprintf(fid,'\n\n\n%1s\n\n',colorstr) ; for i = 1:n ; if mcolor(i,:) == [0 0 0] ; cntbytes = fprintf(fid,'%1s\n',CaseNamesS{i}) ; end ; end ; fclose('all') ; % !!!! Careful !!!! Need to do this to be able work with files after % running matlab. Alternative is: % fclose(fid) ; elseif ipart == 25 ; % SigClust on log10 Data Curves paramstruct = struct('datastr',['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}], ... 'BGSDsavestr',[genefilepre 'ip' num2str(ipart) 'SigClustAllPixels'], ... 'CovEsavestr',[genefilepre 'ip' num2str(ipart) 'SigClustCovEst'], ... 'pValsavestr',[genefilepre 'ip' num2str(ipart) 'SigClustPVal'], ... 'iscreenwrite',2) ; SigClustSM(DataSl{ig},paramstruct) ; elseif ipart == 26 ; % SigClust on log10 Data Curves, iCovEst = 2 paramstruct = struct('iCovEst',2, ... 'datastr',['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}], ... 'BGSDsavestr',[genefilepre 'ip' num2str(ipart) 'SigClustAllPixels'], ... 'CovEsavestr',[genefilepre 'ip' num2str(ipart) 'SigClustCovEst'], ... 'pValsavestr',[genefilepre 'ip' num2str(ipart) 'SigClustPVal'], ... 'iscreenwrite',2) ; SigClustSM(DataSl{ig},paramstruct) ; elseif ipart == 27 ; % PCA plots colored with 2-means results savestr = [genefilepre 'ip' num2str(ipart) '2MeansClusterView'] ; paramstruct = struct('maxstep',5, ... 'titlestr',['Chr. ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}], ... 'savestr',savestr, ... 'iscreenwrite',1) ; SigClust2meanFastSM(DataSl{ig},paramstruct) ; elseif ipart == 31 ; % Individual log10 Data curves Projected to Sphere vaxh = axisSM([1:size(DataSlps{ig},1)]) ; vaxv = axisSM(DataSlps{ig}) ; vaxv(1) = 0 ; for i = 1:n ; % Loop through cases disp([' Working on curve ' num2str(i) ' of ' num2str(n)]) ; plot(DataSlps{ig}(:,i),'k-') ; title(CaseNamesS{i}) ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; text(vaxh(1) + 0.1 * (vaxh(2) - vaxh(1)), ... vaxv(1) + 0.9 * (vaxv(2) - vaxv(1)), ... ['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; text(vaxh(1) + 0.1 * (vaxh(2) - vaxh(1)), ... vaxv(1) + 0.8 * (vaxv(2) - vaxv(1)), ... 'log_{10} Curves Projected to Unit Sphere') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'logSphProjDat'] ; if i == 1 ; print('-dpsc2',savestr) ; else ; print('-dpsc2',savestr,'-append') ; end ; end ; elseif ipart == 32 ; % Overlays of log10 Curves Projected to Sphere nbp = size(DataSlps{ig}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataSlps{ig}) ; vaxv(1) = 0 ; plot(DataSlps{ig},'-') ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig} ... ', log_{10} Curves Projected to Unit Sphere']) ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'logSphProjDatOverlay'] ; print('-dpsc2',savestr) ; elseif ipart == 33 ; % PCA Analysis of log10 Curves Projected to Sphere legendcellstr = {{['Chromosome ' num2str(vchrom(ig))] ... ['Gene = ' GeneNamesS{ig}] ... 'Projected to Unit Sphere'}} ; savestr = [genefilepre 'ip' num2str(ipart) 'logSphProjCurvDat'] ; paramstruct = struct('viout',[1; 3], ... 'legendcellstr',legendcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; curvdatSM(DataSlps{ig},paramstruct) ; elseif ipart == 34 ; % Selected Brushing of log10 PCA on Sphere % Do PCA % paramstruct = struct('npc',4,... 'iscreenwrite',1,... 'viout',[0 0 0 0 1]) ; outstruct = pcaSM(DataSlps{ig},paramstruct) ; mpc = getfield(outstruct,'mpc') ; % Set up Color Matrix % mcolor = ones(n,1) * [0 0 0] ; % Start with all black and update if ig == 1 ; vflag = (mpc(2,:) > 0.1)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(2,:) < -0.15)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; elseif ig == 2 ; vflag = (mpc(1,:) + mpc(2,:) > 0.02)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(4,:) > 0.04)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; elseif ig == 3 ; vflag = (mpc(1,:) > 0.4)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = (mpc(3,:) > 0.03)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; elseif ig == 4 ; vflag = (mpc(4,:) < -0.05)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; vflag = (mpc(1,:) + mpc(2,:) > 0.2)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; end ; % Make brushed PC plot - Sphere Projected Data % figure(1) ; clf ; titlecellstr = {{['Chromosome ' num2str(vchrom(ig))] ... ['Gene = ' GeneNamesS{ig}] ... 'Projected to Unit Sphere'}} ; savestr = [genefilepre 'ip' num2str(ipart) 'logSphProjBrushPCA'] ; paramstruct = struct('npcadiradd',4, ... 'icolor',mcolor, ... 'titlecellstr',titlecellstr, ... 'labelcellstr',{{'PC 1'; 'PC 2'; 'PC 3'; 'PC 4'}}, ... 'savestr',savestr, ... 'iscreenwrite',1) ; scatplotSM(DataSlps{ig},[],paramstruct) ; % Make data overlay - Sphere Projected Data % figure(2) ; clf ; nbp = size(DataSlps{ig}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataSlps{ig}) ; vaxv(1) = 0 ; plot(DataSlps{ig},'k-') ; hold on ; for i = 1:n ; if sum(mcolor(i,:)) > 0.5 ; plot(DataSlps{ig}(:,i),'-','Color',mcolor(i,:)) ; end ; end ; hold off ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig} ... ', log_{10} Projected to Unit Sphere, Brushed by PCA']) ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'logSphProjBrushCurves'] ; print('-dpsc2',savestr) ; % Make data overlay - log10 Data % figure(3) ; clf ; nbp = size(DataS{ig,2}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataSl{ig}) ; vaxv(1) = 0 ; plot(DataSl{ig},'k-') ; hold on ; for i = 1:n ; if sum(mcolor(i,:)) > 0.5 ; plot(DataSl{ig}(:,i),'-','Color',mcolor(i,:)) ; end ; end ; hold off ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig} ... ', log_{10} Transformed, Brushed by PCA']) ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'logRawBrushCurves'] ; print('-dpsc2',savestr) ; elseif ipart == 35 ; % Clustering of log10 Curves Projected to Sphere elseif (41 <= ipart) & (ipart < 50) | ... (241 <= ipart) & (ipart < 250) | ... (341 <= ipart) & (ipart < 350) ; % Truncate to active genomic region, % Median Smooth, Compute Residuals % Calculate running k-medians % d = size(DataSl{ig},1) ; k = 60 ; disp(' ') ; disp([' Calculating running medians of ' num2str(2 * k + 1)]) ; DataSlmed = [] ; for i = 1:n ; vDataSl = DataSl{ig}(:,i) ; vDataSlmed = [] ; for j = (k + 1):(d - k) ; vDataSlmed = [vDataSlmed; median(vDataSl((j - k):(j + k)))] ; end ; DataSlmed = [DataSlmed vDataSlmed] ; end ; vimed = ((k + 1):(d - k))' ; % Truncating to Central Regions, and finding starting % index of "high expression" cases % if ig == 1 ; % PIK3CA istart = 300 ; iend = 3300 ; hestart = 19 ; % index of start of "high expression" cases vcm = [737; 1327; 2080; 2763] ; % vector of common "minimizer" base pair locations elseif ig == 2 ; % CDK2A istart = 700 ; iend = 980 ; hestart = 73 ; % index of start of "high expression" cases vcm = [842] ; % vector of common "minimizer" base pair locations elseif ig == 3 ; % P10 istart = 2200 ; iend = 5200 ; hestart = 19 ; % index of start of "high expression" cases vcm = [2805; 3685; 4627] ; % vector of common "minimizer" base pair locations elseif ig == 4 ; % STK11 istart = 900 ; iend = 2200 ; hestart = 19 ; % index of start of "high expression" cases vcm = [1177; 1490; 1733] ; % vector of common "minimizer" base pair locations end ; disp(['For Gene: ' GeneNamesS{ig} ', with overall end = ' num2str(d)]) ; disp([' Truncating to istart = ' num2str(istart) ... ', iend = ' num2str(iend)]) ; dit = iend - istart + 1 ; DataSlt = DataSl{ig}(istart:iend,:) ; DataSlmedt = DataSlmed((istart - k):(iend - k),:) ; vimedt = vimed((istart - k):(iend - k),:) ; DataSlmedtres = DataSlt - DataSlmedt ; % Sorting on truncated log area % vilarea = [] ; for i = 1:n ; vilarea = [vilarea; sum(DataSlt(:,i))] ; end ; [temp,visort] = sort(vilarea) ; DataSlmedtressort = DataSlmedtres(:,visort) ; if ipart == 41 | ... ipart == 241 | ... ipart == 341 ; % Look at Residuals, Study Truncation vaxall = axisSM(DataSl{ig}) ; vaxtres = axisSM(DataSlmedtres) ; for ii = 1:n ; % Loop through cases i = visort(ii) ; disp([' Plotting curve ' num2str(i) ' of ' num2str(n)]) ; subplot(2,1,1) ; plot(DataSl{ig}(:,i),'k-') ; axis([0 (d + 1) vaxall(1) vaxall(2)]) ; hold on ; plot(vimed,DataSlmed(:,i),'r--','Linewidth',2) ; plot([istart; istart],[vaxall(1); vaxall(2)],'g-') ; plot([iend; iend],[vaxall(1); vaxall(2)],'g-') ; for icm = 1:length(vcm) ; cm = vcm(icm) ; plot([cm; cm],[vaxall(1); vaxall(2)],'b--') ; end ; hold off ; title(CaseNamesS{i},'Interpreter', 'none') ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; text(0 + 0.1 * ((d + 1) - 0), ... vaxall(1) + 0.9 * (vaxall(2) - vaxall(1)), ... ['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig} ... ' Sorted by Area under Truncated Log Counts']) ; text(0 + 0.1 * ((d + 1) - 0), ... vaxall(1) + 0.8 * (vaxall(2) - vaxall(1)), ... ['Running median of ' num2str(2 * k + 1)],'Color','r') ; text(0 + 0.7 * ((d + 1) - 0), ... vaxall(1) + 0.8 * (vaxall(2) - vaxall(1)), ... ['Case Number ' num2str(i)]) ; subplot(2,1,2) ; plot(vimedt,DataSlmedtres(:,i),'k-') ; hold on ; plot([istart; istart],[vaxtres(1); vaxtres(2)],'g-') ; plot([iend; iend],[vaxtres(1); vaxtres(2)],'g-') ; for icm = 1:length(vcm) ; cm = vcm(icm) ; plot([cm; cm],[vaxtres(1) vaxtres(2)],'b--') ; end ; hold off ; title(['Residuals from Running Median of ' num2str(2 * k + 1)]) ; xlabel('Truncated exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; axis([(istart - 1) (iend + 1) vaxtres(1) vaxtres(2)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'logDatTruncMediannResid'] ; if ii == 1 ; print('-dpsc2',savestr) ; else ; print('-dpsc2',savestr,'-append') ; end ; end ; elseif ipart == 42 | ... ipart == 242 | ... ipart == 342 ; % Standard curvdat & scatplot analyses legendcellstr = {{['Gene = ' GeneNamesS{ig}] ['Cases ' num2str(hestart) ' to 180']}} ; savestr = [genefilepre 'ip' num2str(ipart) 'logDatTruncMediannResidCurvDat'] ; paramstruct = struct('legendcellstr',legendcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; curvdatSM(DataSlmedtressort(:,hestart:end),paramstruct) ; titlecellstr = {{['Gene = ' GeneNamesS{ig}] ['Cases ' num2str(hestart) ' to 180']}} ; savestr = [genefilepre 'ip' num2str(ipart) 'logDatTruncMediannResidScatPlot'] ; paramstruct = struct('npcadiradd',4, ... 'titlecellstr',titlecellstr, ... 'labelcellstr',{{'PC 1','PC 2','PC 3','PC 4'}}, ... 'savestr',savestr, ... 'iscreenwrite',1) ; scatplotSM(DataSlmedtressort(:,hestart:end),[],paramstruct) ; elseif ipart == 43 | ... ipart == 243 | ... ipart == 343 ; % Zoom in on commom minimizers: % In windows of 100 on each side, make overlays % and compare with average and median curves % for detailed view of location of min'rs % In windows of 200 on each side, apply SiZer for icm = 1:length(vcm) ; % Loop through Common Minimizers % Work in windows of 100 around this common minimizer % cm = vcm(icm) ; winflag = abs(vimedt - cm) <= 100 ; mwdata = DataSlmedtressort(winflag,hestart:end) ; vwi = vimedt(winflag) ; vavg = mean(mwdata,2) ; vmd = median(mwdata,2) ; % Plot curves and minimizers % figure(1) ; clf ; vax = axisSM(mwdata) ; pleft = vwi(1) - 1 ; pright = vwi(end) + 1 ; plot(vwi,mwdata,'k-') ; title(['Chromosome ' num2str(vchrom(ig)) ', Gene = ' GeneNamesS{ig} ... ', Zoomed into Minimum at ' num2str(cm)]) ; axis([pleft pright vax(1) vax(2)]) ; hold on ; plot(vwi,vavg,'g--','LineWidth',3) ; plot(vwi,vmd,'m--','LineWidth',3) ; plot([cm; cm],[vax(1); vax(2)],'b--') ; text(pleft + 0.1 * (pright - pleft), ... vax(1) + 0.2 * (vax(2) - vax(1)), ... 'Average','Color','g') ; text(pleft + 0.1 * (pright - pleft), ... vax(1) + 0.1 * (vax(2) - vax(1)), ... 'Pointwise Median','Color','m') ; hold off ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'ZoomMin' num2str(icm) 'Curves'] ; print('-dpsc2',savestr) ; % Do curvdatSM analysis % figure(2) ; clf ; legendcellstr = {{['Chromosome ' num2str(vchrom(ig))] ['Gene = ' GeneNamesS{ig}]}} ; savestr = [genefilepre 'ip' num2str(ipart) 'ZoomMin' num2str(icm) 'CurvDat'] ; paramstruct = struct('legendcellstr',legendcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; curvdatSM(mwdata,paramstruct) ; % Work in windows of 200 around this common minimizer cm = vcm(icm) ; winflag = abs(vimedt - cm) <= 200 ; mwdata = DataSlmedtressort(winflag,hestart:end) ; vwi = vimedt(winflag) ; vavg = mean(mwdata,2) ; figure(3) ; clf ; savestr = [genefilepre 'ip' num2str(ipart) 'ZoomMin' num2str(icm) 'SiZer'] ; famoltitle = ['Chromosome ' num2str(vchrom(ig)) ', Gene = ' GeneNamesS{ig} ... ', Zoomed into Minimum at ' num2str(cm)] ; paramstruct = struct('imovie',0,... 'savestr',savestr,... 'famoltitle',famoltitle,... 'iscreenwrite',1,... 'nfh',21,... 'nsh',21) ; sizerSM([vwi vavg],paramstruct) ; end ; elseif ipart == 44 | ... ipart == 244 | ... ipart == 344 ; % Zoom in on commom minimizers: % In windows of 100 on each side, make overlays % and compare with average and median curves % for detailed view of location of min'rs % In windows of 200 on each side, apply SiZer to Original Data for icm = 1:length(vcm) ; % Loop through Common Minimizers % Work in windows of 200 around this common minimizer cm = vcm(icm) ; winflag = abs(vimedt - cm) <= 200 ; mwdata = DataSlt(winflag,hestart:end) ; vwi = vimedt(winflag) ; vavg = mean(mwdata,2) ; figure(3) ; clf ; savestr = [genefilepre 'ip' num2str(ipart) 'ZoomMin' num2str(icm) 'SiZer'] ; famoltitle = ['Chromosome ' num2str(vchrom(ig)) ', Gene = ' GeneNamesS{ig} ... ', Zoomed into Minimum at ' num2str(cm)] ; paramstruct = struct('imovie',0,... 'savestr',savestr,... 'famoltitle',famoltitle,... 'iscreenwrite',1,... 'nfh',21,... 'nsh',21) ; sizerSM([vwi vavg],paramstruct) ; end ; end ; elseif (51 <= ipart) & (ipart < 60) ; % Truncate to active genomic region, % Median Smooth, Compute Residuals % Calculate running k-medians % d = size(DataSl{ig},1) ; k = 15 ; disp(' ') ; disp([' Calculating running medians of ' num2str(2 * k + 1)]) ; DataSlmed = [] ; for i = 1:n ; vDataSl = DataSl{ig}(:,i) ; vDataSlmed = [] ; for j = (k + 1):(d - k) ; vDataSlmed = [vDataSlmed; median(vDataSl((j - k):(j + k)))] ; end ; DataSlmed = [DataSlmed vDataSlmed] ; end ; vimed = ((k + 1):(d - k))' ; % d-2*k vector, locations in median coordinates, % of indices in original data coordinates % Truncating to Central Regions, and finding starting % index of "high expression" cases % if ig == 1 ; % PIK3CA istart = 300 ; iend = 3300 ; hestart = 70 ; % index of start of "high expression" cases vcm = [737; 1327; 2080; 2763] ; % vector of common "minimizer" base pair locations maxrunthresh = 40 ; % threshold for plotting max number of runs elseif ig == 2 ; % CDK2A istart = 700 ; iend = 980 ; hestart = 90 ; % index of start of "high expression" cases vcm = [842] ; % vector of common "minimizer" base pair locations maxrunthresh = 50 ; % threshold for plotting max number of runs elseif ig == 3 ; % P10 istart = 2200 ; iend = 5200 ; hestart = 40 ; % index of start of "high expression" cases vcm = [2805; 3685; 4627] ; % vector of common "minimizer" base pair locations maxrunthresh = 60 ; % threshold for plotting max number of runs elseif ig == 4 ; % STK11 istart = 900 ; iend = 2200 ; hestart = 20 ; % index of start of "high expression" cases vcm = [1177; 1490; 1733] ; % vector of common "minimizer" base pair locations maxrunthresh = 40 ; % threshold for plotting max number of runs end ; disp(['For Gene: ' GeneNamesS{ig} ', with overall end = ' num2str(d)]) ; disp([' Truncating to istart = ' num2str(istart) ... ', iend = ' num2str(iend)]) ; dit = iend - istart + 1 ; % number of grid points after truncation DataSlt = DataSl{ig}(istart:iend,:) ; DataSlmedt = DataSlmed((istart - k):(iend - k),:) ; % shift to put into med locations vimedt = vimed((istart - k):(iend - k),:) ; % vector of indices (original coordinate system) after truncation % locations in median coordinates, of indices in original data coordinates DataSlmedtres = DataSlt - DataSlmedt ; % dit vector of median residulas % Sorting on truncated log area % vilarea = [] for i = 1:n ; vilarea = [vilarea; sum(DataSlt(:,i))] ; end ; [vilareas,visort] = sort(vilarea) ; DataSlmedtressort = DataSlmedtres(:,visort) ; if ipart == 51 ; % Look at Residuals, Study Truncation vaxall = axisSM(DataSl{ig}) ; vaxtres = axisSM(DataSlmedtres) ; vnum0med = [] ; vmaxrun0 = [] ; for ii = 1:n ; % Loop through cases i = visort(ii) ; figure(1) ; clf ; disp([' Plotting curve ' num2str(i) ' of ' num2str(n)]) ; subplot(2,1,1) ; plot(DataSl{ig}(:,i),'k-') ; axis([0 (d + 1) vaxall(1) vaxall(2)]) ; hold on ; plot(vimed,DataSlmed(:,i),'r--','Linewidth',2) ; plot([istart; istart],[vaxall(1); vaxall(2)],'g-') ; plot([iend; iend],[vaxall(1); vaxall(2)],'g-') ; for icm = 1:length(vcm) ; cm = vcm(icm) ; plot([cm; cm],[vaxall(1); vaxall(2)],'b--') ; end ; hold off ; title(CaseNamesS{i},'Interpreter', 'none') ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; text(0 + 0.1 * ((d + 1) - 0), ... vaxall(1) + 0.9 * (vaxall(2) - vaxall(1)), ... ['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig} ... ' Sorted by Area under Truncated Log Counts']) ; text(0 + 0.1 * ((d + 1) - 0), ... vaxall(1) + 0.8 * (vaxall(2) - vaxall(1)), ... ['Running median of ' num2str(2 * k + 1)],'Color','r') ; text(0 + 0.7 * ((d + 1) - 0), ... vaxall(1) + 0.8 * (vaxall(2) - vaxall(1)), ... ['Case Number ' num2str(i)]) ; subplot(2,1,2) ; plot(vimedt,DataSlmedtres(:,i),'k-') ; hold on ; plot([istart; istart],[vaxtres(1); vaxtres(2)],'g-') ; plot([iend; iend],[vaxtres(1); vaxtres(2)],'g-') ; for icm = 1:length(vcm) ; cm = vcm(icm) ; plot([cm; cm],[vaxtres(1) vaxtres(2)],'b--') ; end ; hold off ; title(['Residuals from Running Median of ' num2str(2 * k + 1)]) ; xlabel('Truncated exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; axis([(istart - 1) (iend + 1) vaxtres(1) vaxtres(2)]) ; flag0med = (DataSlmedtres(:,i) == 0) ; num0med = sum(flag0med) ; maxrun0 = 0 ; currun0 = 0 ; for imedt = 1:length(vimedt) ; % Calculate Length of Maximum Run of 0s if DataSlmedtres(imedt,i) == 0 ; currun0 = currun0 + 1 ; else ; maxrun0 = max(maxrun0,currun0) ; currun0 = 0 ; end ; end ; text((istart - 1) + 0.1 * ((iend + 1) - (istart - 1)), ... vaxtres(1) + 0.9 * (vaxtres(2) - vaxtres(1)), ... ['Number of 0 Median Residuals = ' num2str(num0med)]) ; text((istart - 1) + 0.5 * ((iend + 1) - (istart - 1)), ... vaxtres(1) + 0.9 * (vaxtres(2) - vaxtres(1)), ... ['Longest Run of 0 Median Residuals = ' num2str(maxrun0)]) ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'logDatTruncMediannResid'] ; if ii == 1 ; print('-dpsc2',savestr) ; else ; print('-dpsc2',savestr,'-append') ; end ; vnum0med = [vnum0med; num0med] ; vmaxrun0 = [vmaxrun0; maxrun0] ; end ; % of ii loop through cases % Plot Summaries % figure(2) ; clf ; subplot(2,2,1) ; vaxnum = axisSM(vnum0med) ; plot([1:n]',vnum0med,'k-') ; axis([0 (n + 1) vaxnum(1) vaxnum(2)]) ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; xlabel('Sorted Case Number') ; ylabel('Number of 0 Median Residuals') ; subplot(2,2,2) ; vaxmax = axisSM(vmaxrun0) ; plot([1:n]',vmaxrun0,'k-') ; axis([0 (n + 1) vaxmax(1) min(vaxmax(2),maxrunthresh)]) ; title('Truncated to Central Region Between Green Bars') ; xlabel('Sorted Case Number') ; ylabel('Length of Max Run of 0 Median Residuals') ; subplot(2,2,3) ; plot(vnum0med,vmaxrun0,'ko-') ; axis([vaxnum(1) vaxnum(2) vaxmax(1) vaxmax(2)]) ; xlabel('Number of 0 Median Residuals') ; ylabel('Length of Max Run of 0 Median Residuals') ; subplot(2,2,4) ; vaxarea = axisSM(vilareas) ; plot(vilareas,vnum0med,'ko-') ; axis([vaxarea(1) vaxarea(2) vaxnum(1) vaxnum(2)]) ; xlabel('Area unded Truncated log_{10} Counts Curve') ; ylabel('Number of 0 Median Residuals') ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'MedianResidSummaries'] ; print('-dpsc2',savestr) ; elseif ipart == 52 ; % Standard curvdat & scatplot analyses legendcellstr = {{['Gene = ' GeneNamesS{ig}] ['Cases ' num2str(hestart) ' to 180']}} ; savestr = [genefilepre 'ip' num2str(ipart) 'logDatTruncMediannResidCurvDat'] ; paramstruct = struct('legendcellstr',legendcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; curvdatSM(DataSlmedtressort(:,hestart:end),paramstruct) ; titlecellstr = {{['Gene = ' GeneNamesS{ig}] ['Cases ' num2str(hestart) ' to 180']}} ; savestr = [genefilepre 'ip' num2str(ipart) 'logDatTruncMediannResidScatPlot'] ; paramstruct = struct('npcadiradd',4, ... 'titlecellstr',titlecellstr, ... 'labelcellstr',{{'PC 1','PC 2','PC 3','PC 4'}}, ... 'savestr',savestr, ... 'iscreenwrite',1) ; scatplotSM(DataSlmedtressort(:,hestart:end),[],paramstruct) ; elseif ipart == 53 ; % Zoom in on common minimizers: % In windows of 100 on each side, make overlays % and compare with average and median curves % for detailed view of location of min'rs % In windows of 200 on each side, apply SiZer for icm = 1:length(vcm) ; % Loop through Common Minimizers % Work in windows of 100 around this common minimizer % cm = vcm(icm) ; winflag = abs(vimedt - cm) <= 100 ; mwdata = DataSlmedtressort(winflag,hestart:end) ; dwin = sum(winflag) ; % number of points in this window vwi = vimedt(winflag) ; % vector of original coordinate indices in this window vavg = mean(mwdata,2) ; vmd = median(mwdata,2) ; % Plot curves and minimizers % figure(1) ; clf ; vax = axisSM(mwdata) ; pleft = vwi(1) - 1 ; pright = vwi(end) + 1 ; plot(vwi,mwdata,'k-') ; title(['Chromosome ' num2str(vchrom(ig)) ', Gene = ' GeneNamesS{ig} ... ', Zoomed into Minimum at ' num2str(cm)]) ; axis([pleft pright vax(1) vax(2)]) ; hold on ; plot(vwi,vavg,'g--','LineWidth',3) ; plot(vwi,vmd,'m--','LineWidth',3) ; plot([cm; cm],[vax(1); vax(2)],'b--') ; text(pleft + 0.1 * (pright - pleft), ... vax(1) + 0.2 * (vax(2) - vax(1)), ... 'Average','Color','g') ; text(pleft + 0.1 * (pright - pleft), ... vax(1) + 0.1 * (vax(2) - vax(1)), ... 'Pointwise Median','Color','m') ; hold off ; orient landscape ; savestr = [genefilepre 'ip' num2str(ipart) 'ZoomMin' num2str(icm) 'Curves'] ; print('-dpsc2',savestr) ; % Do curvdatSM analysis % figure(2) ; clf ; legendcellstr = {{['Chromosome ' num2str(vchrom(ig))] ['Gene = ' GeneNamesS{ig}]}} ; savestr = [genefilepre 'ip' num2str(ipart) 'ZoomMin' num2str(icm) 'CurvDat'] ; paramstruct = struct('legendcellstr',legendcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; curvdatSM(mwdata,paramstruct) ; % Find case with longest run of 0 medians, and study medians % vmaxrun0 = [] ; for ic = hestart:n ; iic = ic - hestart + 1 ; flag0med = (mwdata(:,iic) == 0) ; num0med = sum(flag0med) ; maxrun0 = 0 ; currun0 = 0 ; for iit = 1:dwin ; % Calculate Length of Maximum Run of 0s it = vwi(iit) ; if mwdata(iit,iic) == 0 ; currun0 = currun0 + 1 ; else ; maxrun0 = max(maxrun0,currun0) ; currun0 = 0 ; end ; end ; vmaxrun0 = [vmaxrun0 ; maxrun0] ; end ; [maxrun0, imax] = max(vmaxrun0) ; imaxrun0sort = hestart:n ; imaxrun0sort = imaxrun0sort(imax) ; % Index among sorted data set imaxrun0ori = 1:n ; imaxrun0ori = imaxrun0ori(visort) ; imaxrun0ori = imaxrun0ori(hestart:n) ; imaxrun0ori = imaxrun0ori(imax) ; % Index among sorted data set figure(3) ; clf ; subplot(2,1,1) ; vaxraw = axisSM(DataSl{ig}(vwi,imaxrun0ori)) ; plot(vwi,DataSl{ig}(vwi,imaxrun0ori),'k-') ; hold on ; plot(vwi,DataSlmedt(winflag,imaxrun0ori),'ro') ; text((vwi(1) - 1) + 0.85 * (vwi(end) - vwi(1) + 2), ... vaxraw(1) + 0.2 * (vaxraw(2) - vaxraw(1)), ... ['Case # ' num2str(imaxrun0ori)],'Color','k') ; text((vwi(1) - 1) + 0.75 * (vwi(end) - vwi(1) + 2), ... vaxraw(1) + 0.1 * (vaxraw(2) - vaxraw(1)), ... ['length of max run = ' num2str(maxrun0)],'Color','k') ; hold off ; title(['Chromosome ' num2str(vchrom(ig)) ', Gene = ' GeneNamesS{ig} ... ', Window of +-100 near ' num2str(cm)]) ; xlabel('Truncated exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; axis([(vwi(1) - 1) (vwi(end) + 1) vaxraw(1) vaxraw(2)]) ; legend('Raw','Median','Location','Southwest') ; subplot(2,1,2) ; vaxdif = axisSM(mwdata(:,imax)) ; plot(vwi,mwdata(:,imax),'b-') ; title(['Corresponding Median Residuals, for Longest Run of 0 Residuals,' ... ' Case # ' num2str(imaxrun0ori)]) ; xlabel('Truncated exonic nt number, not genomic position') ; axis([(vwi(1) - 1) (vwi(end) + 1) vaxdif(1) vaxdif(2)]) ; orient tall ; savestr = [genefilepre 'ip' num2str(ipart) 'ZoomMin' num2str(icm) 'Raw-Median'] ; print('-dpsc2',savestr) ; % Work in windows of 200 around this common minimizer % cm = vcm(icm) ; winflag = abs(vimedt - cm) <= 200 ; dwin = sum(winflag) ; % number of points in this window mwdata = DataSlmedtressort(winflag,hestart:end) ; ntrunc = size(mwdata,2) ; vwi = vimedt(winflag) ; % vector of original coordinate indices in this window vavg = mean(mwdata,2) ; figure(4) ; clf ; savestr = [genefilepre 'ip' num2str(ipart) 'ZoomMin' num2str(icm) 'SiZer'] ; famoltitle = ['Chromosome ' num2str(vchrom(ig)) ', Gene = ' GeneNamesS{ig} ... ', Zoomed into Minimum at ' num2str(cm)] ; paramstruct = struct('imovie',0,... 'savestr',savestr,... 'famoltitle',famoltitle,... 'iscreenwrite',1,... 'nfh',21,... 'nsh',21) ; sizerSM([vwi vavg],paramstruct) ; % Do SiZer based on all data points, not just averages % vvwi = reshape(vec2matSM(vwi,ntrunc),dwin * ntrunc,1) ; valltdat = reshape(mwdata,dwin * ntrunc,1) ; figure(5) ; clf ; savestr = [genefilepre 'ip' num2str(ipart) 'ZoomMin' num2str(icm) 'SiZerAll'] ; famoltitle = ['Chromosome ' num2str(vchrom(ig)) ', Gene = ' GeneNamesS{ig} ... ', Zoomed into Minimum at ' num2str(cm)] ; paramstruct = struct('imovie',0,... 'savestr',savestr,... 'famoltitle',famoltitle,... 'iscreenwrite',1,... 'nfh',21,... 'nsh',21) ; sizerSM([vvwi valltdat],paramstruct) ; end ; end ; end ; % of inner ipart if-block end ; % of ig loop else ; % ipart > 1, for analyses that are not gene centric if ipart == 101 ; % Scatterplot matrix of all 4 total reads mlogread = [] ; for ig = 1:ng ; vtotread = [] ; for i = 1:n ; % Loop through cases vtotread = [vtotread; sum(DataS{ig,2}(:,i))] ; end ; vlogread = log10(vtotread + 1) ; mlogread = [mlogread vlogread] ; end ; % of ig loop titlecellstr = {{'log_{10} Total Reads' 'For Comparison' 'Across Genes'}} ; labelcellstr = {{['Gene = ' GeneNamesS{1}] ; ... ['Gene = ' GeneNamesS{2}] ; ... ['Gene = ' GeneNamesS{3}] ; ... ['Gene = ' GeneNamesS{4}]}} ; savestr = [outfilepre 'ip' num2str(ipart) 'TotalReadScatPlot'] ; paramstruct = struct('irecenter',0, ... 'titlecellstr',titlecellstr, ... 'labelcellstr',labelcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; scatplotSM(mlogread',eye(4),paramstruct) ; elseif ipart == 102 ; % Brushed Scatterplot matrix of all 4 total reads, % With gene log10 overlays mlogread = [] ; for ig = 1:ng ; vtotread = [] ; for i = 1:n ; % Loop through cases vtotread = [vtotread; sum(DataS{ig,2}(:,i))] ; end ; vlogread = log10(vtotread + 1) ; mlogread = [mlogread vlogread] ; end ; % of ig loop % Set up Color Matrix % mcolor = ones(n,1) * [0 0 0] ; % Start with all black and update vflag = (mlogread(:,1) < 4.2)' ; mcolor(vflag,:) = ones(sum(vflag),1) * [1 0 0] ; vflag = ((4.2 <= mlogread(:,1)) & (mlogread(:,1) < 4.5))' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 1 0] ; vflag = ((3 <= mlogread(:,2)) & (mlogread(:,4) < 4.3))' ; mcolor(vflag,:) = ones(sum(vflag),1) * [0 0 1] ; titlecellstr = {{'log_{10} Total Reads' 'For Comparison' 'Across Genes'}} ; labelcellstr = {{['Gene = ' GeneNamesS{1}] ; ... ['Gene = ' GeneNamesS{2}] ; ... ['Gene = ' GeneNamesS{3}] ; ... ['Gene = ' GeneNamesS{4}]}} ; savestr = [outfilepre 'ip' num2str(ipart) 'TotalReadScatPlotBrushed'] ; paramstruct = struct('irecenter',0, ... 'icolor',mcolor, ... 'titlecellstr',titlecellstr, ... 'labelcellstr',labelcellstr, ... 'savestr',savestr, ... 'iscreenwrite',1) ; scatplotSM(mlogread',eye(4),paramstruct) ; for ig = 1:ng ; figure(ig + 1) ; clf ; nbp = size(DataSl{ig}) ; vibp = (1:nbp)' ; vaxh = axisSM(vibp) ; vaxv = axisSM(DataSl{ig}) ; vaxv(1) = 0 ; plot(DataSl{ig},'k-') ; for i = 1:n ; if sum(mcolor(i,:)) > 0.5 ; % then replot with this color hold on ; plot(DataSl{ig}(:,i),'-','Color',mcolor(i,:)) ; hold off ; end ; end ; title(['Chromosome ' num2str(vchrom(ig)) ' Gene = ' GeneNamesS{ig}]) ; xlabel('exonic nt number, not genomic position') ; ylabel('log_{10}(RNA read depth + 1)') ; axis([vaxh(1) vaxh(2) vaxv(1) vaxv(2)]) ; orient landscape ; savestr = [outfilepre 'ip' num2str(ipart) 'logDataOverlayGene' GeneNamesS{ig}] ; print('-dpsc2',savestr) ; end ; % of ig loop end ; % of inner ipart if-block end ; % of middle ipart if-block end ; % of outer ipart if-block