diff --git a/.travis.yml b/.travis.yml index 94b006e9..7801f32c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -72,9 +72,10 @@ script: - time python test/config_XSectionConfig.py - time python test/cross_section_measurement_00_pick_bins.py - time python test/fix_overflow.py - - time python test/RooFitFit.py - - time python test/TMinuitFit.py - time python test/tools_Calculation.py + - time python test/tools_Fitting_FitData.py + - time python test/tools_Fitting_Minuit.py + - time python test/tools_Fitting_RooFitFit.py - time python test/tools_hist_utilities.py - time python test/tools_Unfolding.py - time python test/Integral_GetBinContent_consistency.py diff --git a/bin/plot_fit_tests b/bin/plot_fit_tests new file mode 100755 index 00000000..6f4c572f --- /dev/null +++ b/bin/plot_fit_tests @@ -0,0 +1,18 @@ +#!/bin/bash +echo "Run this only after run_fit_tests" +echo "This will take a while ... grab a coffee/tea/water." +mkdir -p logs +nohup python src/cross_section_measurement/98b_fit_cross_checks.py -p data/closure_test/ -o 'plots/fitchecks/closure_test' &> logs/plot_fit_checks_closure_test.log & +nohup python src/cross_section_measurement/98b_fit_cross_checks.py -p data/no_merging/closure_test/ -o 'plots/fitchecks/no_merging/closure_test' &> logs/plot_fit_checks_closure_test_no_merging.log & +nohup python src/cross_section_measurement/98b_fit_cross_checks.py -p data/no_constraints/closure_test/ -o 'plots/fitchecks/no_constraints/closure_test' &> logs/plot_fit_checks_closure_test_no_constraints.log & +nohup python src/cross_section_measurement/98b_fit_cross_checks.py -p data/no_constraints_no_merging/closure_test/ -o 'plots/fitchecks/no_constraints_no_merging/closure_test' &> logs/plot_fit_checks_closure_test_NCNM.log & +wait +nohup python src/cross_section_measurement/98b_fit_cross_checks.py -p data/ -o 'plots/fitchecks/' &> logs/plot_fit_checks.log & +nohup python src/cross_section_measurement/98b_fit_cross_checks.py -p data/no_merging/ -o 'plots/fitchecks/no_merging/' &> logs/plot_fit_checks_no_merging.log & +nohup python src/cross_section_measurement/98b_fit_cross_checks.py -p data/no_constraints/ -o 'plots/fitchecks/no_constraints/' &> logs/plot_fit_checks_no_constraints.log & +nohup python src/cross_section_measurement/98b_fit_cross_checks.py -p data/no_constraints_no_merging/ -o 'plots/fitchecks/no_constraints_no_merging/' &> logs/plot_fit_checks_NCNM.log & +wait +DATE=`date +%d.%m.%Y_%H.%M.%S` +tar -czf fit_check_plots_$DATE.tar.gz plots/fitchecks +echo "Created archive to upload and share: fit_check_plots_$DATE.tar.gz" +echo "All done!" diff --git a/bin/run_fit_tests b/bin/run_fit_tests new file mode 100755 index 00000000..a799197b --- /dev/null +++ b/bin/run_fit_tests @@ -0,0 +1,19 @@ +#!/bin/bash +echo "This will take a while ... grab a coffee/tea/water" +mkdir -p logs +for var in absolute_eta M_bl M3 angle_bl absolute_eta,M_bl absolute_eta,M3 absolute_eta,angle_bl absolute_eta,M_bl,M3 absolute_eta,M3,angle_bl absolute_eta,M_bl,angle_bl absolute_eta,M_bl,M3,angle_bl; do + nicevar=`echo $var | sed 's/,/_/g'` + echo "Doing variable set: $nicevar" + nohup time python src/cross_section_measurement/01_get_fit_results.py --fit-variables $var --closure_test &> logs/fit_closure_test_$nicevar.log & + nohup time python src/cross_section_measurement/01_get_fit_results.py --fit-variables $var --closure_test --no_combined_signal -p data/no_merging &> logs/fit_closure_test_no_merging_$nicevar.log & + nohup time python src/cross_section_measurement/01_get_fit_results.py --fit-variables $var --closure_test --disable-constraints -p data/no_constraints &> logs/fit_closure_test_no_constraints_$nicevar.log & + nohup time python src/cross_section_measurement/01_get_fit_results.py --fit-variables $var --closure_test --disable-constraints --no_combined_signal -p data/no_constraints_no_merging &> logs/fit_closure_test_NCNM_$nicevar.log & + wait + nohup time python src/cross_section_measurement/01_get_fit_results.py --fit-variables $var --test &> logs/fit_test_$nicevar.log & + nohup time python src/cross_section_measurement/01_get_fit_results.py --fit-variables $var --test --no_combined_signal -p data/no_merging &> logs/fit_test_no_merging_$nicevar.log & + nohup time python src/cross_section_measurement/01_get_fit_results.py --fit-variables $var --test --disable-constraints -p data/no_constraints &> logs/fit_test_no_constraints_$nicevar.log & + nohup time python src/cross_section_measurement/01_get_fit_results.py --fit-variables $var --test --disable-constraints --no_combined_signal -p data/no_constraints_no_merging &> logs/fit_test_NCNM_$nicevar.log & + wait; +done + +echo "All done!" diff --git a/bin/run_x_section_measurement b/bin/run_x_section_measurement new file mode 100755 index 00000000..a4ed75e6 --- /dev/null +++ b/bin/run_x_section_measurement @@ -0,0 +1,14 @@ +#!/bin/bash +echo "Running differential cross-section analysis for 7 and 8 TeV data" +bash x_01_all_vars +wait +bash x_02_all_vars +wait +bash x_03_all_vars +wait +bash x_04_all_vars +wait +bash x_05_all_vars +wait + +echo "Everything is done. Time to run make_analysis_note and go home!" diff --git a/bin/x_01_all_vars b/bin/x_01_all_vars index 28df3f82..15d61198 100755 --- a/bin/x_01_all_vars +++ b/bin/x_01_all_vars @@ -1,13 +1,24 @@ #!/bin/bash +echo "This will take a while ... grab a coffee/tea/water" mkdir -p logs -python src/cross_section_measurement/01_get_fit_results.py -V &> logs/MET_fit_8TeV.log & -python src/cross_section_measurement/01_get_fit_results.py -V -v HT &> logs/HT_fit_8TeV.log & -python src/cross_section_measurement/01_get_fit_results.py -V -v ST &> logs/ST_fit_8TeV.log & -python src/cross_section_measurement/01_get_fit_results.py -V -v MT &> logs/MT_fit_8TeV.log & -python src/cross_section_measurement/01_get_fit_results.py -V -v WPT &> logs/WPT_fit_8TeV.log & -# 7 TeV -python src/cross_section_measurement/01_get_fit_results.py -V -c 7&> logs/MET_fit_7TeV.log & -python src/cross_section_measurement/01_get_fit_results.py -V -c 7 -v HT &> logs/HT_fit_7TeV.log & -python src/cross_section_measurement/01_get_fit_results.py -V -c 7 -v ST &> logs/ST_fit_7TeV.log & -python src/cross_section_measurement/01_get_fit_results.py -V -c 7 -v MT &> logs/MT_fit_7TeV.log & -python src/cross_section_measurement/01_get_fit_results.py -V -c 7 -v WPT &> logs/WPT_fit_7TeV.log & +fit_var="absolute_eta,M3,angle_bl" +nice_fit_var=`echo $fit_var | sed 's/,/_/g'` +N_JOBS=4 + +echo "Using the fit variable(s): $fit_var" + +i=0 +for var in MET HT ST WPT MT; do + echo "Fitting distribution: $var" + nohup time python src/cross_section_measurement/01_get_fit_results.py -V -v $var --no_combined_signal -c 7 --fit-variables $fit_var &> logs/${var}_fit_7TeV_${nice_fit_var}.log & + let i+=1 + nohup time python src/cross_section_measurement/01_get_fit_results.py -V -v $var --no_combined_signal -c 8 --fit-variables $fit_var &> logs/${var}_fit_8TeV_${nice_fit_var}.log & + let i+=1 + if (( $i % N_JOBS == 0 )) + then + echo "Waiting on the above to finish." + wait; + fi +done + +echo "All done! Time to run x_02_all_vars." diff --git a/bin/x_02_all_vars b/bin/x_02_all_vars index 80396155..1fd8190b 100755 --- a/bin/x_02_all_vars +++ b/bin/x_02_all_vars @@ -1,12 +1,24 @@ #!/bin/bash -python src/cross_section_measurement/02_unfold_and_measure.py &> logs/MET_unfold_8TeV.log & -python src/cross_section_measurement/02_unfold_and_measure.py -v HT &> logs/HT_unfold_8TeV.log & -python src/cross_section_measurement/02_unfold_and_measure.py -v ST &> logs/ST_unfold_8TeV.log & -python src/cross_section_measurement/02_unfold_and_measure.py -v MT &> logs/MT_unfold_8TeV.log & -python src/cross_section_measurement/02_unfold_and_measure.py -v WPT &> logs/WPT_unfold_8TeV.log & -# 7 TeV -python src/cross_section_measurement/02_unfold_and_measure.py -c 7 &> logs/MET_unfold_7TeV.log & -python src/cross_section_measurement/02_unfold_and_measure.py -c 7 -v HT &> logs/HT_unfold_7TeV.log & -python src/cross_section_measurement/02_unfold_and_measure.py -c 7 -v ST &> logs/ST_unfold_7TeV.log & -python src/cross_section_measurement/02_unfold_and_measure.py -c 7 -v MT &> logs/MT_unfold_7TeV.log & -python src/cross_section_measurement/02_unfold_and_measure.py -c 7 -v WPT &> logs/WPT_unfold_7TeV.log & +echo "This will take a whil e ... grab a coffee/tea/water" +mkdir -p logs +fit_var="absolute_eta,M3,angle_bl" +nice_fit_var=`echo $fit_var | sed 's/,/_/g'` +N_JOBS=4 + +echo "Using the fit variable(s): $fit_var" + +i=0 +for var in MET HT ST MT WPT; do + echo "Unfolding distribution: $var" + nohup time python src/cross_section_measurement/02_unfold_and_measure.py -v $var -c 7 -p data/$nice_fit_var &> logs/${var}_unfold_7TeV_${nice_fit_var}.log & + let i+=1 + nohup time python src/cross_section_measurement/02_unfold_and_measure.py -v $var -c 8 -p data/$nice_fit_var &> logs/${var}_unfold_8TeV_${nice_fit_var}.log & + let i+=1 + if (( $i % N_JOBS == 0 )) + then + echo "Waiting on the above to finish." + wait; + fi +done + +echo "All done! Time to run x_03_all_vars." diff --git a/bin/x_03_all_vars b/bin/x_03_all_vars index 15ef4eb0..33bfd553 100755 --- a/bin/x_03_all_vars +++ b/bin/x_03_all_vars @@ -1,12 +1,24 @@ #!/bin/bash -python src/cross_section_measurement/03_calculate_systematics.py -s &> logs/MET_calculate_8TeV.log & -python src/cross_section_measurement/03_calculate_systematics.py -s -v HT &> logs/HT_calculate_8TeV.log & -python src/cross_section_measurement/03_calculate_systematics.py -s -v ST &> logs/ST_calculate_8TeV.log & -python src/cross_section_measurement/03_calculate_systematics.py -s -v MT &> logs/MT_calculate_8TeV.log & -python src/cross_section_measurement/03_calculate_systematics.py -s -v WPT &> logs/WPT_calculate_8TeV.log & -# 7 TeV -python src/cross_section_measurement/03_calculate_systematics.py -c 7 -s &> logs/MET_calculate_7TeV.log & -python src/cross_section_measurement/03_calculate_systematics.py -c 7 -s -v HT &> logs/HT_calculate_7TeV.log & -python src/cross_section_measurement/03_calculate_systematics.py -c 7 -s -v ST &> logs/ST_calculate_7TeV.log & -python src/cross_section_measurement/03_calculate_systematics.py -c 7 -s -v MT &> logs/MT_calculate_7TeV.log & -python src/cross_section_measurement/03_calculate_systematics.py -c 7 -s -v WPT &> logs/WPT_calculate_7TeV.log & \ No newline at end of file +echo "This will take a whil e ... grab a coffee/tea/water" +mkdir -p logs +fit_var="absolute_eta,M3,angle_bl" +nice_fit_var=`echo $fit_var | sed 's/,/_/g'` +N_JOBS=4 + +echo "Using the fit variable(s): $fit_var" + +i=0 +for var in MET HT ST MT WPT; do + echo "Calculating diff. x-section for distribution: $var" + nohup time python src/cross_section_measurement/03_calculate_systematics.py -s -v $var -c 7 -p data/$nice_fit_var &> logs/${var}_calculate_7TeV_${nice_fit_var}.log & + let i+=1 + nohup time python src/cross_section_measurement/03_calculate_systematics.py -s -v $var -c 8 -p data/$nice_fit_var &> logs/${var}_calculate_8TeV_${nice_fit_var}.log & + let i+=1 + if (( $i % N_JOBS == 0 )) + then + echo "Waiting on the above to finish." + wait; + fi +done + +echo "All done! Time to run x_04_all_vars and x_04_all_vars." diff --git a/bin/x_04_all_vars b/bin/x_04_all_vars index ceb5a8f1..c3dca13f 100755 --- a/bin/x_04_all_vars +++ b/bin/x_04_all_vars @@ -1,13 +1,25 @@ #!/bin/bash +echo "This will take a whil e ... grab a coffee/tea/water" +mkdir -p logs mkdir -p plots -python src/cross_section_measurement/04_make_plots_matplotlib.py &> logs/MET_plot_8TeV.log & -python src/cross_section_measurement/04_make_plots_matplotlib.py -v HT &> logs/HT_plot_8TeV.log & -python src/cross_section_measurement/04_make_plots_matplotlib.py -v ST &> logs/ST_plot_8TeV.log & -python src/cross_section_measurement/04_make_plots_matplotlib.py -v MT &> logs/MT_plot_8TeV.log & -python src/cross_section_measurement/04_make_plots_matplotlib.py -v WPT &> logs/WPT_plot_8TeV.log & -# 7 TeV -python src/cross_section_measurement/04_make_plots_matplotlib.py -c 7 &> logs/MET_plot_7TeV.log & -python src/cross_section_measurement/04_make_plots_matplotlib.py -c 7 -v HT &> logs/HT_plot_7TeV.log & -python src/cross_section_measurement/04_make_plots_matplotlib.py -c 7 -v ST &> logs/ST_plot_7TeV.log & -python src/cross_section_measurement/04_make_plots_matplotlib.py -c 7 -v MT &> logs/MT_plot_7TeV.log & -python src/cross_section_measurement/04_make_plots_matplotlib.py -c 7 -v WPT &> logs/WPT_plot_7TeV.log & +fit_var="absolute_eta,M3,angle_bl" +nice_fit_var=`echo $fit_var | sed 's/,/_/g'` +N_JOBS=4 + +echo "Using the fit variable(s): $fit_var" + +i=0 +for var in MET HT ST MT WPT; do + echo "Plotting diff. x-section for distribution: $var" + nohup time python src/cross_section_measurement/04_make_plots_matplotlib.py -v $var -c 7 -p data/$nice_fit_var &> logs/${var}_plot_7TeV_${nice_fit_var}.log & + let i+=1 + nohup time python src/cross_section_measurement/04_make_plots_matplotlib.py -v $var -c 8 -p data/$nice_fit_var &> logs/${var}_plot_8TeV_${nice_fit_var}.log & + let i+=1 + if (( $i % N_JOBS == 0 )) + then + echo "Waiting on the above to finish." + wait; + fi +done + +echo "All done!" diff --git a/bin/x_05_all_vars b/bin/x_05_all_vars index 75534b37..47f1584a 100755 --- a/bin/x_05_all_vars +++ b/bin/x_05_all_vars @@ -1,12 +1,25 @@ #!/bin/bash -python src/cross_section_measurement/05_make_tables.py &> logs/MET_table_8TeV.log & -python src/cross_section_measurement/05_make_tables.py -v HT &> logs/HT_table_8TeV.log & -python src/cross_section_measurement/05_make_tables.py -v ST &> logs/ST_table_8TeV.log & -python src/cross_section_measurement/05_make_tables.py -v MT &> logs/MT_table_8TeV.log & -python src/cross_section_measurement/05_make_tables.py -v WPT &> logs/WPT_table_8TeV.log & -# 7 TeV -python src/cross_section_measurement/05_make_tables.py -c 7 &> logs/MET_table_7TeV.log & -python src/cross_section_measurement/05_make_tables.py -c 7 -v HT &> logs/HT_table_7TeV.log & -python src/cross_section_measurement/05_make_tables.py -c 7 -v ST &> logs/ST_table_7TeV.log & -python src/cross_section_measurement/05_make_tables.py -c 7 -v MT &> logs/MT_table_7TeV.log & -python src/cross_section_measurement/05_make_tables.py -c 7 -v WPT &> logs/WPT_table_7TeV.log & +echo "This will take a whil e ... grab a coffee/tea/water" +mkdir -p logs +mkdir -p plots +fit_var="absolute_eta,M3,angle_bl" +nice_fit_var=`echo $fit_var | sed 's/,/_/g'` +N_JOBS=4 + +echo "Using the fit variable(s): $fit_var" + +i=0 +for var in MET HT ST MT WPT; do + echo "Tabulating diff. x-section for distribution: $var" + nohup time python src/cross_section_measurement/05_make_tables.py -v $var -c 7 -p data/$nice_fit_var &> logs/${var}_table_7TeV_${nice_fit_var}.log & + let i+=1 + nohup time python src/cross_section_measurement/05_make_tables.py -v $var -c 8 -p data/$nice_fit_var &> logs/${var}_table_8TeV_${nice_fit_var}.log & + let i+=1 + if (( $i % N_JOBS == 0 )) + then + echo "Waiting on the above to finish." + wait; + fi +done + +echo "All done!" diff --git a/config/__init__.py b/config/__init__.py index 7a973c87..f82a11f3 100644 --- a/config/__init__.py +++ b/config/__init__.py @@ -3,7 +3,7 @@ __all__ = [ 'XSectionConfig', ] - + class XSectionConfig(): current_analysis_path = '/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_2nd_draft/' known_centre_of_mass_energies = [7, 8] @@ -17,6 +17,9 @@ class XSectionConfig(): 'data_muon_category_templates', 'electron_QCD_MC_file', 'electron_control_region', 'electron_control_region_systematic', + 'fit_boundaries', + 'fit_variable_bin_width', + 'fit_variable_unit', 'general_category_templates', 'generator_systematic_ttbar_templates', 'generator_systematic_vjets_templates', @@ -43,7 +46,7 @@ class XSectionConfig(): 'unfolding_scale_up', 'unfolding_scale_up_raw', 'vjets_theory_systematic_prefix' ] - + def __init__( self, centre_of_mass_energy ): if not centre_of_mass_energy in self.known_centre_of_mass_energies: raise AttributeError( 'Uknown centre of mass energy' ) @@ -55,9 +58,9 @@ def __fill_defaults__( self ): self.path_to_unfolding_histograms = self.path_to_files + 'unfolding/' path_to_files = self.path_to_files path_to_unfolding_histograms = self.path_to_unfolding_histograms - + self.luminosity = self.luminosities[self.centre_of_mass_energy] - + # general self.met_systematics_suffixes = [ "ElectronEnUp", @@ -94,20 +97,47 @@ def __fill_defaults__( self ): 'pf':'PFMET', 'type1':'patType1CorrectedPFMet', } + + self.fit_boundaries = { + 'absolute_eta' : ( 0., 2.4 ), + 'M3' : ( 0, 900 ), + 'M_bl' : ( 0, 400 ), + 'angle_bl' : ( 0, 4 ), + } + # dependent on rebin + self.fit_variable_bin_width = { + 'absolute_eta' : 0.2, + 'M3' : 20, + 'M_bl' : 10, + 'angle_bl' : 0.2, + } + # relates to fit_variable_bin_width + self.rebin = { + 'absolute_eta' : 2, # 2 -> 0.2 + 'M3' : 5, # 5 -> 25 GeV + 'M_bl' : 4, # 2 -> 20 GeV + 'angle_bl' : 2, # 2 -> 0.2 + } + self.fit_variable_unit = { + 'absolute_eta' : '', + 'M3' : 'GeV', + 'M_bl' : 'GeV', + 'angle_bl' : '', + } self.ttbar_theory_systematic_prefix = 'TTJets_' self.vjets_theory_systematic_prefix = 'VJets_' # files self.middle = '_' + str( self.luminosity ) + 'pb_PFElectron_PFMuon_PF2PATJets_PFMET' middle = self.middle - + self.data_file_muon = path_to_files + 'central/SingleMu' + middle + '.root' self.muon_QCD_file = path_to_files + 'QCD_data_mu.root' self.SingleTop_file = path_to_files + 'central/SingleTop' + middle + '.root' self.electron_QCD_MC_file = path_to_files + 'central/QCD_Electron' + middle + '.root' self.muon_QCD_MC_file = path_to_files + 'central/QCD_Muon' + middle + '.root' self.higgs_file = path_to_files + 'central/TTH_Inclusive_M-125' + middle + '.root' - + self.categories_and_prefixes = { 'central':'', 'Electron_down':'_minusElectron', @@ -123,62 +153,62 @@ def __fill_defaults__( self ): 'LightJet_down':'_minusLightJet', 'LightJet_up':'_plusLightJet', } - + # now fill in the centre of mass dependent values # this position is crucial if self.centre_of_mass_energy == 7: self.__fill_defaults_7TeV__() if self.centre_of_mass_energy == 8: self.__fill_defaults_8TeV__() - + self.generator_systematics = [ 'matchingup', 'matchingdown', 'scaleup', 'scaledown'] self.ttbar_generator_systematics = [ 'matchingup', 'matchingdown', 'scaleup', 'scaledown', 'mcatnlo', 'ptreweight'] self.central_general_template = path_to_files + 'central/%s' + middle + '.root' self.generator_systematic_ttbar_templates = { systematic: path_to_files + 'central/TTJets-%s_%dpb_PFElectron_PFMuon_PF2PATJets_PFMET.root' % ( systematic, self.luminosity ) for systematic in self.ttbar_generator_systematics} self.generator_systematic_vjets_templates = { systematic: path_to_files + 'central/VJets-%s_%dpb_PFElectron_PFMuon_PF2PATJets_PFMET.root' % ( systematic, self.luminosity ) for systematic in self.generator_systematics} self.pdf_uncertainty_template = path_to_files + 'PDFWeights/TTJet' + middle + '_PDFWeights_%d.root' - + categories_and_prefixes = self.categories_and_prefixes - + self.general_category_templates = {category: path_to_files + category + '/%s' + middle + prefix + '.root' for category, prefix in categories_and_prefixes.iteritems()} self.ttbar_category_templates = {category: path_to_files + category + '/TTJet' + middle + prefix + '.root' for category, prefix in categories_and_prefixes.iteritems()} self.SingleTop_category_templates = {category: path_to_files + category + '/SingleTop' + middle + prefix + '.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} self.VJets_category_templates = {category: path_to_files + category + '/VJets' + middle + prefix + '.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} self.higgs_category_templates = {category: path_to_files + category + '/TTH_Inclusive_M-125' + middle + prefix + '.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} self.muon_QCD_MC_category_templates = {category: path_to_files + category + '/QCD_Muon' + middle + prefix + '.root' for ( category, prefix ) in categories_and_prefixes.iteritems()} - + self.data_muon_category_templates = { 'central': self.data_file_muon, 'JES_up': path_to_files + 'JES_up/SingleMu' + middle + self.categories_and_prefixes['JES_up'] + '.root', 'JES_down': path_to_files + 'JES_down/SingleMu' + middle + self.categories_and_prefixes['JES_down'] + '.root' } - + self.unfolding_madgraph_raw = path_to_unfolding_histograms + 'unfolding_merged.root' self.unfolding_powheg_raw = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_powheg.root' % self.centre_of_mass_energy self.unfolding_mcatnlo_raw = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_mcatnlo.root' % self.centre_of_mass_energy - + self.unfolding_scale_down_raw = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_scaledown.root' % self.centre_of_mass_energy self.unfolding_scale_up_raw = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_scaleup.root' % self.centre_of_mass_energy self.unfolding_matching_down_raw = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_matchingdown.root' % self.centre_of_mass_energy self.unfolding_matching_up_raw = path_to_unfolding_histograms + 'unfolding_TTJets_%dTeV_matchingup.root' % self.centre_of_mass_energy - + self.unfolding_madgraph = self.unfolding_madgraph_raw.replace( '.root', '_asymmetric.root' ) self.unfolding_powheg = self.unfolding_powheg_raw.replace( '.root', '_asymmetric.root' ) self.unfolding_mcatnlo = self.unfolding_mcatnlo_raw.replace( '.root', '_asymmetric.root' ) - + self.unfolding_scale_down = self.unfolding_scale_down_raw.replace( '.root', '_asymmetric.root' ) self.unfolding_scale_up = self.unfolding_scale_up_raw.replace( '.root', '_asymmetric.root' ) self.unfolding_matching_down = self.unfolding_matching_down_raw.replace( '.root', '_asymmetric.root' ) self.unfolding_matching_up = self.unfolding_matching_up_raw.replace( '.root', '_asymmetric.root' ) - + self.histogram_path_templates = { - 'MET' : 'TTbar_plus_X_analysis/%s/Ref selection/Binned_MET_Analysis/%s_bin_%s/%s_absolute_eta', - 'HT' : 'TTbar_plus_X_analysis/%s/Ref selection/Binned_HT_Analysis/HT_bin_%s/%s_absolute_eta', - 'ST': 'TTbar_plus_X_analysis/%s/Ref selection/Binned_ST_Analysis/ST_with_%s_bin_%s/%s_absolute_eta', - 'MT': 'TTbar_plus_X_analysis/%s/Ref selection/Binned_MT_Analysis/MT_with_%s_bin_%s/%s_absolute_eta', - 'WPT': 'TTbar_plus_X_analysis/%s/Ref selection/Binned_WPT_Analysis/WPT_with_%s_bin_%s/%s_absolute_eta' + 'MET' : 'TTbar_plus_X_analysis/%s/Ref selection/Binned_MET_Analysis/%s_bin_%s/%s', + 'HT' : 'TTbar_plus_X_analysis/%s/Ref selection/Binned_HT_Analysis/HT_bin_%s/%s', + 'ST': 'TTbar_plus_X_analysis/%s/Ref selection/Binned_ST_Analysis/ST_with_%s_bin_%s/%s', + 'MT': 'TTbar_plus_X_analysis/%s/Ref selection/Binned_MT_Analysis/MT_with_%s_bin_%s/%s', + 'WPT': 'TTbar_plus_X_analysis/%s/Ref selection/Binned_WPT_Analysis/WPT_with_%s_bin_%s/%s' } - + # optimal regularisation parameters self.k_values_electron = { 'MET' : 3, @@ -209,19 +239,18 @@ def __fill_defaults__( self ): self.muon_control_region = 'QCD non iso mu+jets ge3j' self.muon_control_region_systematic = 'QCD non iso mu+jets ge3j' # no systematic yet - self.rebin = 2 self.include_higgs = False - + self.luminosity_scale = self.new_luminosity / self.luminosity - + def __fill_defaults_7TeV__( self ): middle = self.middle path_to_files = self.path_to_files self.new_luminosity = self.luminosity # pb^-1 self.ttbar_xsection = 164 # pb - + self.data_file_electron = path_to_files + 'central/ElectronHad' + middle + '.root' self.rate_changing_systematics = { 'luminosity': 0.022, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupSystematicErrors @@ -230,9 +259,9 @@ def __fill_defaults_7TeV__( self ): } self.categories_and_prefixes['PU_down'] = '_PU_64600mb' self.categories_and_prefixes['PU_up'] = '_PU_71400mb' - + self.special_muon_histogram = 'etaAbs_ge2j_data' - + self.data_electron_category_templates = {'central': self.data_file_electron, 'JES_up': path_to_files + 'JES_up/ElectronHad' + middle + self.categories_and_prefixes['JES_up'] + '.root', 'JES_down': path_to_files + 'JES_down/ElectronHad' + middle + self.categories_and_prefixes['JES_down'] + '.root' @@ -244,7 +273,7 @@ def __fill_defaults_8TeV__( self ): self.new_luminosity = 19712 # pb^-1 self.ttbar_xsection = 245.8 # pb - + self.data_file_electron = path_to_files + 'central/SingleElectron' + middle + '.root' self.rate_changing_systematics = { 'luminosity': 0.026, # https://hypernews.cern.ch/HyperNews/CMS/get/physics-announcements/2526.html @@ -253,9 +282,9 @@ def __fill_defaults_8TeV__( self ): } self.categories_and_prefixes['PU_down'] = '_PU_65835mb' self.categories_and_prefixes['PU_up'] = '_PU_72765mb' - + self.special_muon_histogram = 'muon_AbsEta_0btag' - + self.data_electron_category_templates = {'central': self.data_file_electron, 'JES_up': path_to_files + 'JES_up/SingleElectron' + middle + self.categories_and_prefixes['JES_up'] + '.root', 'JES_down': path_to_files + 'JES_down/SingleElectron' + middle + self.categories_and_prefixes['JES_down'] + '.root' diff --git a/config/latex_labels.py b/config/latex_labels.py index 4114b580..d2ead6e9 100644 --- a/config/latex_labels.py +++ b/config/latex_labels.py @@ -81,5 +81,13 @@ 'WJets':'W $\\rightarrow \ell\\nu$', 'ZJets':'Z/$\gamma^*$ + jets', 'TTJet':'$\mathrm{t}\\bar{\mathrm{t}}$', - 'SingleTop':'Single-Top' + 'SingleTop':'Single-Top' , + 'V+Jets' : 'W/Z + jets' } + +fit_variables_latex = { + 'absolute_eta' : r'lepton $|\eta|$', + 'M3' : r'$M3$', + 'M_bl' : r'$M(b,l)$', + 'angle_bl' : r'angle$(b,l)$', + } diff --git a/config/variable_binning.py b/config/variable_binning.py index 61aa20c5..511d88a2 100644 --- a/config/variable_binning.py +++ b/config/variable_binning.py @@ -1,5 +1,9 @@ - -eta_bin_edges = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0] +fit_variable_bin_edges = { + 'absolute_eta' : [round( i * 0.2, 2 ) for i in range ( int( 3 / 0.2 ) + 1 )], + 'M3' : [i * 25 for i in range ( int( 1000 / 25 ) + 1 )], + 'M_bl' : [i * 10 for i in range ( int( 1000 / 10 ) + 1 )], + 'angle_bl' : [round( i * 0.2, 2 ) for i in range ( int( 4 / 0.2 ) + 1 )], + } bin_edges = { 'MET':[0.0, 31.0, 58.0, 96.0, 142.0, 191.0, 300], diff --git a/src/cross_section_measurement/01_get_fit_results.py b/src/cross_section_measurement/01_get_fit_results.py index b2d69522..6f879f3d 100644 --- a/src/cross_section_measurement/01_get_fit_results.py +++ b/src/cross_section_measurement/01_get_fit_results.py @@ -6,7 +6,7 @@ from __future__ import division from optparse import OptionParser import sys -# rootpy +# rootpy from rootpy.io import File # DailyPythonScripts from config.summations_common import b_tag_summations @@ -14,293 +14,381 @@ from config import XSectionConfig from tools.Calculation import decombine_result, combine_complex_results -from tools.Fitting import TMinuitFit, RooFitFit +from tools.Fitting import Minuit, RooFitFit, FitData, FitDataCollection from tools.file_utilities import write_data_to_JSON from tools.ROOT_utililities import set_root_defaults +from tools.hist_utilities import clean_control_region, adjust_overflow_to_limit +from lib import closure_tests - -def get_histograms(channel, input_files, variable, met_type, variable_bin, b_tag_bin, rebin=1): - global b_tag_bin_VJets +def get_histograms( channel, input_files, variable, met_type, variable_bin, + b_tag_bin, rebin = 1, fit_variable = 'absolute_eta', + scale_factors = None ): + global b_tag_bin_VJets, fit_variables global electron_control_region, muon_control_region + boundaries = measurement_config.fit_boundaries[fit_variable] histograms = {} if not variable in measurement_config.histogram_path_templates.keys(): print 'Fatal Error: unknown variable ', variable sys.exit() - - abs_eta = '' - abs_eta_data = '' - abs_eta_template = measurement_config.histogram_path_templates[variable] + + fit_variable_name = '' + fit_variable_name_data = '' + fit_variable_template = measurement_config.histogram_path_templates[variable] + ht_fill_list, other_fill_list = None, None + if fit_variable == 'absolute_eta': + ht_fill_list = ( analysis_type[channel], variable_bin, channel + '_' + fit_variable ) + other_fill_list = ( analysis_type[channel], met_type, variable_bin, channel + '_' + fit_variable ) + else: + ht_fill_list = ( analysis_type[channel], variable_bin, fit_variable ) + other_fill_list = ( analysis_type[channel], met_type, variable_bin, fit_variable ) if variable == 'HT': - abs_eta = abs_eta_template % (analysis_type[channel], variable_bin, channel) - abs_eta_data = abs_eta + fit_variable_name = fit_variable_template % ht_fill_list + fit_variable_name_data = fit_variable_name else: - abs_eta = abs_eta_template % (analysis_type[channel], met_type, variable_bin, channel) + fit_variable_name = fit_variable_template % other_fill_list if 'JetRes' in met_type: - abs_eta_data = abs_eta.replace('JetResDown', '') - abs_eta_data = abs_eta_data.replace('JetResUp', '') + fit_variable_name_data = fit_variable_name.replace( 'JetResDown', '' ) + fit_variable_name_data = fit_variable_name_data.replace( 'JetResUp', '' ) if 'patPFMet' in met_type: - abs_eta = abs_eta.replace('patPFMet', 'PFMET') + fit_variable_name = fit_variable_name.replace( 'patPFMet', 'PFMET' ) else: - abs_eta_data = abs_eta - + fit_variable_name_data = fit_variable_name + for sample, file_name in input_files.iteritems(): if not file_name: continue - h_abs_eta = None + h_fit_variable = None if sample == 'data': - h_abs_eta = get_histogram(file_name, abs_eta_data, b_tag_bin) + h_fit_variable = get_histogram( file_name, fit_variable_name_data, b_tag_bin ) elif sample == 'V+Jets': - #extracting the V+Jets template from its specific b-tag bin (>=0 by default) and scaling it to analysis b-tag bin - h_abs_eta = get_histogram(file_name, abs_eta, b_tag_bin) - h_abs_eta_VJets_specific_b_tag_bin = get_histogram(file_name, abs_eta, b_tag_bin_VJets) - try: - h_abs_eta_VJets_specific_b_tag_bin.Scale(h_abs_eta.Integral()/h_abs_eta_VJets_specific_b_tag_bin.Integral()) - h_abs_eta = h_abs_eta_VJets_specific_b_tag_bin - except: - print 'WARNING: V+Jets template from ' + str(file_name) + ', histogram ' + abs_eta + ' in ' + b_tag_bin_VJets +\ - ' b-tag bin is empty. Using central bin (' + b_tag_bin + '), integral = ' + str(h_abs_eta.Integral()) + # extracting the V+Jets template from its specific b-tag bin (>=0 by default) and scaling it to analysis b-tag bin + h_fit_variable = get_histogram( file_name, fit_variable_name, b_tag_bin ) + if not '_bl' in fit_variable: + # this procedure is not valid for fit variables requiring at least one b-tag + h_fit_variable_VJets_specific_b_tag_bin = get_histogram( file_name, fit_variable_name, b_tag_bin_VJets ) + try: + scale = h_fit_variable.integral( overflow = True ) / h_fit_variable_VJets_specific_b_tag_bin.integral( overflow = True ) + h_fit_variable_VJets_specific_b_tag_bin.Scale( scale ) + h_fit_variable = h_fit_variable_VJets_specific_b_tag_bin + except: + print 'WARNING: V+Jets template from ' + str( file_name ) + ', histogram ' + fit_variable_name + ' in ' + b_tag_bin_VJets + \ + ' b-tag bin is empty. Using central bin (' + b_tag_bin + '), integral = ' + str( h_fit_variable.Integral() ) else: - h_abs_eta = get_histogram(file_name, abs_eta, b_tag_bin) - h_abs_eta.Rebin(rebin) - histograms[sample] = h_abs_eta + h_fit_variable = get_histogram( file_name, fit_variable_name, b_tag_bin ) + h_fit_variable.Rebin( rebin ) + # this is not working. M3 has fewer data events than absolute_eta + h_fit_variable = adjust_overflow_to_limit( h_fit_variable, boundaries[0], boundaries[1] ) + histograms[sample] = h_fit_variable + + h_qcd = get_qcd_histograms( input_files, variable, variable_bin, + channel, fit_variable_name, rebin ) + histograms['QCD'] = adjust_overflow_to_limit( h_qcd, + boundaries[0], boundaries[1] ) + # normalise histograms + if not measurement_config.luminosity_scale == 1.0: + for sample, histogram in histograms.iteritems(): + if sample == 'data': + continue + histogram.Scale( measurement_config.luminosity_scale ) + + # apply normalisation scale factors for rate-changing systematics + if scale_factors: + for source, factor in scale_factors.iteritems(): + if 'luminosity' in source: + for sample, histogram in histograms.iteritems(): + if sample == 'data': + continue + histogram.Scale( factor ) + for sample, histogram in histograms.iteritems(): + if sample in source: + histogram.Scale( factor ) + return histograms + +def get_qcd_histograms( input_files, variable, variable_bin, channel, + fit_variable_hist_name, rebin = 1 ): + ''' + Retrieves the data-driven QCD template and normalises it to MC prediction. + It uses the inclusive template (across all variable bins) and removes other processes + before normalising the QCD template. + ''' + global electron_QCD_MC_file, muon_QCD_MC_file, analysis_type, \ + electron_control_region, muon_control_region, b_tag_bin + + control_region = '' + control_region_btag = '0btag' + if 'M_bl' in fit_variable_hist_name or 'angle_bl' in fit_variable_hist_name: + control_region_btag = '1orMoreBtag' + qcd_file = '' + samples = ['data', 'V+Jets', 'SingleTop', 'TTJet'] + if channel == 'electron': - global electron_QCD_MC_file - h_abs_eta_mc = get_histogram(electron_QCD_MC_file, abs_eta, b_tag_bin) - h_abs_eta_mc.Rebin(rebin) - # data-driven QCD template extracted from all-inclusive eta distributions - abs_eta = 'TTbar_plus_X_analysis/%s/Ref selection/Electron/electron_AbsEta' % (analysis_type[channel]) - abs_eta = abs_eta.replace('Ref selection', electron_control_region) - h_abs_eta = get_histogram(input_files['data'], abs_eta, '0btag') - h_abs_eta = h_abs_eta - get_histogram(input_files['V+Jets'], abs_eta, '0btag') - h_abs_eta = h_abs_eta - get_histogram(input_files['TTJet'], abs_eta, '0btag') - h_abs_eta = h_abs_eta - get_histogram(input_files['SingleTop'], abs_eta, '0btag') - electron_QCD_normalisation_factor = 1 - h_abs_eta.Rebin(20) - if measurement_config.centre_of_mass_energy == 8: - electron_QCD_normalisation_factor = h_abs_eta_mc.Integral() / h_abs_eta.Integral() - if electron_QCD_normalisation_factor == 0: - electron_QCD_normalisation_factor = 1 / h_abs_eta.Integral() - if measurement_config.centre_of_mass_energy == 7: - # scaling to 10% of data - electron_QCD_normalisation_factor = 0.1 * histograms['data'].Integral() / h_abs_eta.Integral() - - h_abs_eta.Scale(electron_QCD_normalisation_factor) - histograms['QCD'] = h_abs_eta - + control_region = electron_control_region + qcd_file = electron_QCD_MC_file if channel == 'muon': - # data-driven QCD template extracted from all-inclusive eta distributions - global muon_QCD_MC_file - h_abs_eta_mc = get_histogram(muon_QCD_MC_file, abs_eta, b_tag_bin) - h_abs_eta_mc.Rebin(rebin) - abs_eta = 'TTbar_plus_X_analysis/%s/Ref selection/Muon/muon_AbsEta' % (analysis_type[channel]) - abs_eta = abs_eta.replace('Ref selection', muon_control_region) -# abs_eta = measurement_config.special_muon_histogram -# h_abs_eta = get_histogram(muon_QCD_file, abs_eta, '') - h_abs_eta = get_histogram(input_files['data'], abs_eta, '0btag') - h_abs_eta = h_abs_eta - get_histogram(input_files['TTJet'], abs_eta, '0btag') - h_abs_eta = h_abs_eta - get_histogram(input_files['V+Jets'], abs_eta, '0btag') - h_abs_eta = h_abs_eta - get_histogram(input_files['SingleTop'], abs_eta, '0btag') - muon_QCD_normalisation_factor = 1 - h_abs_eta.Rebin(20) - if measurement_config.centre_of_mass_energy == 8: - muon_QCD_normalisation_factor = h_abs_eta_mc.Integral() / h_abs_eta.Integral() - if muon_QCD_normalisation_factor == 0: - muon_QCD_normalisation_factor = 1 / h_abs_eta.Integral() - if measurement_config.centre_of_mass_energy == 7: - muon_QCD_normalisation_factor = 0.05 * histograms['data'].Integral() / h_abs_eta.Integral() - - h_abs_eta.Scale(muon_QCD_normalisation_factor) - histograms['QCD'] = h_abs_eta - - return histograms + control_region = muon_control_region + qcd_file = muon_QCD_MC_file + inclusive_control_region_hists = {} + + for var_bin in variable_bins_ROOT[variable]: + hist_name = fit_variable_hist_name.replace( variable_bin, var_bin ) + + control_region_hist_name = hist_name.replace( 'Ref selection', + control_region ) + for sample in samples: + if not inclusive_control_region_hists.has_key( sample ): + inclusive_control_region_hists[sample] = get_histogram( + input_files[sample], + control_region_hist_name, + control_region_btag, + ) + else: + inclusive_control_region_hists[sample] += get_histogram( + input_files[sample], + control_region_hist_name, + control_region_btag, + ) + for sample in samples: + inclusive_control_region_hists[sample].Rebin( rebin ) + + inclusive_control_region_hists['data'] = clean_control_region( inclusive_control_region_hists, + subtract = ['TTJet', 'V+Jets', 'SingleTop'] ) + # now apply proper normalisation + QCD_normalisation_factor = 1 + signal_region_mc = get_histogram( qcd_file, fit_variable_hist_name, b_tag_bin ) + n_mc = signal_region_mc.Integral() + n_control = inclusive_control_region_hists['data'].Integral() + if not n_control == 0: # scale to MC prediction + if not n_mc == 0: + QCD_normalisation_factor = 1 / n_control * n_mc + else: + QCD_normalisation_factor = 1 / n_control + inclusive_control_region_hists['data'].Scale( QCD_normalisation_factor ) + + return inclusive_control_region_hists['data'] -def get_histogram(input_file, histogram_path, b_tag_bin=''): +def get_histogram( input_file, histogram_path, b_tag_bin = '' ): if not input_file: return None - + b_tag_bin_sum_rules = b_tag_summations histogram = None if b_tag_bin in b_tag_bin_sum_rules.keys(): # summing needed b_tag_bins_to_sum = b_tag_bin_sum_rules[b_tag_bin] - histogram = input_file.Get(histogram_path + '_' + b_tag_bins_to_sum[0]).Clone() + histogram = input_file.Get( histogram_path + '_' + b_tag_bins_to_sum[0] ).Clone() for bin_i in b_tag_bins_to_sum[1:]: - histogram += input_file.Get(histogram_path + '_' + bin_i) + histogram += input_file.Get( histogram_path + '_' + bin_i ) else: if b_tag_bin == '': - histogram = input_file.Get(histogram_path) + histogram = input_file.Get( histogram_path ) else: - histogram = input_file.Get(histogram_path + '_' + b_tag_bin) + histogram = input_file.Get( histogram_path + '_' + b_tag_bin ) return histogram.Clone() -def get_fitted_normalisation(channel, input_files, variable, met_type, b_tag_bin, JSON=False, scale_factors = None): - if JSON: - return get_fitted_normalisation_from_JSON(channel, input_files, variable, met_type) # no b_tag_bin as files are specific - else: - return get_fitted_normalisation_from_ROOT(channel, input_files, variable, met_type, b_tag_bin, scale_factors) - -def get_fitted_normalisation_from_JSON(channel, input_files, variable, met_type): - pass - -def get_fitted_normalisation_from_ROOT(channel, input_files, variable, met_type, b_tag_bin, scale_factors): - global use_fitter, measurement_config, verbose +def get_fitted_normalisation_from_ROOT( channel, input_files, variable, met_type, b_tag_bin, scale_factors = None ): + ''' + Retrieves the number of ttbar events from fits to one or more distribution + (fit_variables) for each bin in the variable. + ''' + global use_fitter, measurement_config, verbose, fit_variables, options + # results and initial values are the same across different fit variables + # templates are not results = {} initial_values = {} - templates = {} + templates = {fit_variable: {} for fit_variable in fit_variables} for variable_bin in variable_bins_ROOT[variable]: - histograms = get_histograms(channel, - input_files, - variable=variable, - met_type=met_type, - variable_bin=variable_bin, - b_tag_bin=b_tag_bin, - rebin=measurement_config.rebin - ) - # prepare histograms - # normalise histograms - if not measurement_config.luminosity_scale == 1.0: - for sample, histogram in histograms.iteritems(): - if sample == 'data': - continue - histogram.Scale(measurement_config.luminosity_scale) - - # apply normalisation scale factors for rate-changing systematics - if scale_factors: - for source, factor in scale_factors.iteritems(): - if 'luminosity' in source: - for sample, histogram in histograms.iteritems(): - if sample == 'data': - continue - histogram.Scale(factor) - for sample, histogram in histograms.iteritems(): - if sample in source: - histogram.Scale(factor) - - # create signal histograms - h_eta_signal = None - if measurement_config.include_higgs: - h_eta_signal = histograms['TTJet'] + histograms['SingleTop'] + histograms['Higgs'] - else: - h_eta_signal = histograms['TTJet'] + histograms['SingleTop'] - fit_histograms = { - 'data':histograms['data'], - 'signal':h_eta_signal, -# 'background':histograms['V+Jets']+histograms['QCD'] - 'V+Jets':histograms['V+Jets'], - 'QCD':histograms['QCD'], - } fitter = None - if use_fitter == 'TMinuit': - fitter = TMinuitFit( histograms = fit_histograms, verbose = verbose ) - elif use_fitter == 'RooFit': - fitter = RooFitFit( histograms = fit_histograms, fit_boundries = (0., 2.4) ) - else: #not recognised - sys.stderr.write('Do not recognise fitter "%s". Using default (TMinuit).\n' % fitter) - fitter = TMinuitFit ( histograms=fit_histograms, verbose = verbose ) + fit_data_collection = FitDataCollection() - fitter.set_fit_constraints({'QCD': 2.0, 'V+Jets': 0.5}) + for fit_variable in fit_variables: + + histograms = get_histograms( channel, + input_files, + variable = variable, + met_type = met_type, + variable_bin = variable_bin, + b_tag_bin = b_tag_bin, + rebin = measurement_config.rebin[fit_variable], + fit_variable = fit_variable, + ) + # create data sets + h_fit_variable_signal = None + mc_histograms = None + if options.make_combined_signal: + if measurement_config.include_higgs: + h_fit_variable_signal = histograms['TTJet'] + histograms['SingleTop'] + histograms['Higgs'] + else: + h_fit_variable_signal = histograms['TTJet'] + histograms['SingleTop'] + mc_histograms = { + 'signal' : h_fit_variable_signal, + 'V+Jets': histograms['V+Jets'], + 'QCD': histograms['QCD'], + } + else: + mc_histograms = { + 'TTJet': histograms['TTJet'], + 'SingleTop': histograms['SingleTop'], + 'V+Jets': histograms['V+Jets'], + 'QCD': histograms['QCD'], + } + h_data = histograms['data'] + if options.closure_test: + ct_type = options.closure_test_type + ct_norm = closure_tests[ct_type] + h_data = histograms['TTJet'] * ct_norm['TTJet'] + histograms['SingleTop'] * ct_norm['SingleTop'] + histograms['V+Jets'] * ct_norm['V+Jets'] + histograms['QCD'] * ct_norm['QCD'] + fit_data = FitData( h_data, + mc_histograms, + fit_boundaries = measurement_config.fit_boundaries[fit_variable] ) + fit_data_collection.add( fit_data, name = fit_variable ) + if options.enable_constraints: + fit_data_collection.set_normalisation_constraints( {'QCD': 2.0, 'V+Jets': 0.5} ) + + if use_fitter == 'RooFit': + fitter = RooFitFit( fit_data_collection ) + elif use_fitter == 'Minuit': + fitter = Minuit( fit_data_collection ) + else: # not recognised + sys.stderr.write( 'Do not recognise fitter "%s". Using default (Minuit).\n' % fitter ) + fitter = Minuit ( fit_data_collection ) if verbose: print "FITTING: " + channel + '_' + variable + '_' + variable_bin + '_' + met_type + '_' + b_tag_bin fitter.fit() fit_results = fitter.readResults() - normalisation = fitter.normalisation - normalisation_errors = fitter.normalisation_errors - N_ttbar_before_fit = histograms['TTJet'].Integral() - N_SingleTop_before_fit = histograms['SingleTop'].Integral() - N_ttbar_error_before_fit = sum(histograms['TTJet'].yerravg()) - N_SingleTop_error_before_fit = sum(histograms['SingleTop'].yerravg()) - N_Higgs_before_fit = 0 - N_Higgs_error_before_fit = 0 - if measurement_config.include_higgs: - N_Higgs_before_fit = histograms['Higgs'].Integral() - N_Higgs_error_before_fit = sum(histograms['Higgs'].yerravg()) - - if (N_SingleTop_before_fit != 0): - TTJet_SingleTop_ratio = (N_ttbar_before_fit + N_Higgs_before_fit) / N_SingleTop_before_fit - else: - print 'Bin ', variable_bin, ': ttbar/singleTop ratio undefined for %s channel! Setting to 0.' % channel - TTJet_SingleTop_ratio = 0 + normalisation = fit_data_collection.mc_normalisation( fit_variables[0] ) + normalisation_errors = fit_data_collection.mc_normalisation_errors( fit_variables[0] ) + if options.make_combined_signal: + N_ttbar_before_fit = histograms['TTJet'].Integral() + N_SingleTop_before_fit = histograms['SingleTop'].Integral() + N_ttbar_error_before_fit = sum(histograms['TTJet'].yerravg()) + N_SingleTop_error_before_fit = sum(histograms['SingleTop'].yerravg()) + N_Higgs_before_fit = 0 + N_Higgs_error_before_fit = 0 + if measurement_config.include_higgs: + N_Higgs_before_fit = histograms['Higgs'].Integral() + N_Higgs_error_before_fit = sum(histograms['Higgs'].yerravg()) + + if (N_SingleTop_before_fit != 0): + TTJet_SingleTop_ratio = (N_ttbar_before_fit + N_Higgs_before_fit) / N_SingleTop_before_fit + else: + print 'Bin ', variable_bin, ': ttbar/singleTop ratio undefined for %s channel! Setting to 0.' % channel + TTJet_SingleTop_ratio = 0 + + N_ttbar_all, N_SingleTop = decombine_result(fit_results['signal'], TTJet_SingleTop_ratio) + if (N_Higgs_before_fit != 0): + TTJet_Higgs_ratio = N_ttbar_before_fit/ N_Higgs_before_fit + else: + # print 'Bin ', variable_bin, ': ttbar/higgs ratio undefined for %s channel! Setting to 0.' % channel + TTJet_Higgs_ratio = 0 + + N_ttbar, N_Higgs = decombine_result(N_ttbar_all, TTJet_Higgs_ratio) + + fit_results['TTJet'] = N_ttbar + fit_results['SingleTop'] = N_SingleTop + fit_results['Higgs'] = N_Higgs + + normalisation['TTJet'] = N_ttbar_before_fit + normalisation['SingleTop'] = N_SingleTop_before_fit + normalisation['Higgs'] = N_Higgs_before_fit + normalisation_errors['TTJet'] = N_ttbar_error_before_fit + normalisation_errors['SingleTop'] = N_SingleTop_error_before_fit + normalisation_errors['Higgs'] = N_Higgs_error_before_fit - N_ttbar_all, N_SingleTop = decombine_result(fit_results['signal'], TTJet_SingleTop_ratio) - if (N_Higgs_before_fit != 0): - TTJet_Higgs_ratio = N_ttbar_before_fit/ N_Higgs_before_fit - else: -# print 'Bin ', variable_bin, ': ttbar/higgs ratio undefined for %s channel! Setting to 0.' % channel - TTJet_Higgs_ratio = 0 - - N_ttbar, N_Higgs = decombine_result(N_ttbar_all, TTJet_Higgs_ratio) - - fit_results['TTJet'] = N_ttbar - fit_results['SingleTop'] = N_SingleTop - fit_results['Higgs'] = N_Higgs - - normalisation['TTJet'] = N_ttbar_before_fit - normalisation['SingleTop'] = N_SingleTop_before_fit - normalisation['Higgs'] = N_Higgs_before_fit - normalisation_errors['TTJet'] = N_ttbar_error_before_fit - normalisation_errors['SingleTop'] = N_SingleTop_error_before_fit - normalisation_errors['Higgs'] = N_Higgs_error_before_fit - if results == {}: # empty - initial_values['data'] = [(normalisation['data'], normalisation_errors['data'])] - templates['data'] = [fitter.vectors['data']] + initial_values['data'] = [( normalisation['data'], normalisation_errors['data'] )] + for fit_variable in fit_variables: + templates[fit_variable]['data'] = [fit_data_collection.vectors( fit_variable )['data']] for sample in fit_results.keys(): results[sample] = [fit_results[sample]] - initial_values[sample] = [(normalisation[sample], normalisation_errors[sample])] + initial_values[sample] = [( normalisation[sample], normalisation_errors[sample] )] if not sample in ['TTJet', 'SingleTop', 'Higgs']: - templates[sample] = [fitter.vectors[sample]] + for fit_variable in fit_variables: + templates[fit_variable][sample] = [fit_data_collection.vectors( fit_variable )[sample]] else: - initial_values['data'].append([normalisation['data'], normalisation_errors['data']]) - templates['data'].append(fitter.vectors['data']) + initial_values['data'].append( [normalisation['data'], normalisation_errors['data']] ) + for fit_variable in fit_variables: + templates[fit_variable]['data'].append( fit_data_collection.vectors( fit_variable )['data'] ) for sample in fit_results.keys(): - results[sample].append(fit_results[sample]) - initial_values[sample].append([normalisation[sample], normalisation_errors[sample]]) + results[sample].append( fit_results[sample] ) + initial_values[sample].append( [normalisation[sample], normalisation_errors[sample]] ) if not sample in ['TTJet', 'SingleTop', 'Higgs']: - templates[sample].append(fitter.vectors[sample]) - + for fit_variable in fit_variables: + templates[fit_variable][sample].append( fit_data_collection.vectors( fit_variable )[sample] ) + return results, initial_values, templates - -def write_fit_results_and_initial_values(channel, category, fit_results, initial_values, templates): - global variable, met_type, output_path - output_folder = output_path + '/' + str(measurement_config.centre_of_mass_energy) + 'TeV/' + variable + '/fit_results/' + category + '/' - - write_data_to_JSON(fit_results, output_folder + 'fit_results_' + channel + '_' + met_type + '.txt') - write_data_to_JSON(initial_values, output_folder + 'initial_values_' + channel + '_' + met_type + '.txt') - write_data_to_JSON(templates, output_folder + 'templates_' + channel + '_' + met_type + '.txt') -def write_fit_results(channel, category, fit_results): +def write_fit_results_and_initial_values( channel, category, fit_results, initial_values, templates ): global variable, met_type, output_path - output_folder = output_path + '/' + str(measurement_config.centre_of_mass_energy) + 'TeV/' + variable + '/fit_results/' + category + '/' - - write_data_to_JSON(fit_results, output_folder + 'fit_results_' + channel + '_' + met_type + '.txt') - - + folder_template = '%(path)s/%(fit_vars)s/%(CoM)dTeV/%(variable)s/fit_results/%(category)s/' + inputs = { + 'path':output_path, + 'CoM': measurement_config.centre_of_mass_energy, + 'variable': variable, + 'category': category, + 'fit_vars': '_'.join( fit_variables ) + } + output_folder = folder_template % inputs + + write_data_to_JSON( fit_results, output_folder + 'fit_results_' + channel + '_' + met_type + '.txt' ) + write_data_to_JSON( initial_values, output_folder + 'initial_values_' + channel + '_' + met_type + '.txt' ) + write_data_to_JSON( templates, output_folder + 'templates_' + channel + '_' + met_type + '.txt' ) + +def write_fit_results( channel, category, fit_results ): + global variable, met_type, output_path + folder_template = '%(path)s/%(fit_vars)s/%(CoM)dTeV/%(variable)s/fit_results/%(category)s/' + inputs = { + 'path':output_path, + 'CoM': measurement_config.centre_of_mass_energy, + 'variable': variable, + 'category': category, + 'fit_vars': '_'.join( fit_variables ) + } + output_folder = folder_template % inputs + + write_data_to_JSON( fit_results, output_folder + 'fit_results_' + channel + '_' + met_type + '.txt' ) + + if __name__ == '__main__': set_root_defaults() # setup parser = OptionParser() - parser.add_option("-p", "--path", dest="path", default='data', - help="set output path for JSON files") - parser.add_option("-v", "--variable", dest="variable", default='MET', - help="set the variable to analyse (MET, HT, ST, MT)") - parser.add_option("-b", "--bjetbin", dest="bjetbin", default='2m', - help="set b-jet multiplicity for analysis. Options: exclusive: 0-3, inclusive (N or more): 0m, 1m, 2m, 3m, 4m") - parser.add_option("--bjetbin-vjets", dest="bjetbin_VJets", default='0m', - help="set b-jet multiplicity for V+Jets samples. Options: exclusive: 0-3, inclusive (N or more): 0m, 1m, 2m, 3m, 4m") - parser.add_option("-m", "--metType", dest="metType", default='type1', - help="set MET type for analysis of MET, ST or MT") - parser.add_option("-c", "--centre-of-mass-energy", dest="CoM", default=8, type=int, - help="set the centre of mass energy for analysis. Default = 8 [TeV]") - parser.add_option('--fitter', dest = "use_fitter", default='TMinuit', - help = 'Fitter to be used: TMinuit|RooFit. Default = TMinuit.') - parser.add_option('-V', '--verbose', dest = "verbose", action="store_true", - help="Print the fit info and correlation matrix") + parser.add_option( "-p", "--path", dest = "path", default = 'data', + help = "set output path for JSON files" ) + parser.add_option( "-v", "--variable", dest = "variable", default = 'MET', + help = "set the variable to analyse (MET, HT, ST, MT)" ) + parser.add_option( "-f", "--fit-variables", dest = "fit_variables", default = 'absolute_eta', + help = "set the fit variable to use in the minimalisation" + + " (absolute_eta, M3, M_bl, angle_bl) or any" + + " combination separated by commas" ) + parser.add_option( "-b", "--bjetbin", dest = "bjetbin", default = '2m', + help = "set b-jet multiplicity for analysis. Options: exclusive: 0-3, inclusive (N or more): 0m, 1m, 2m, 3m, 4m" ) + parser.add_option( "--bjetbin-vjets", dest = "bjetbin_VJets", default = '0m', + help = "set b-jet multiplicity for V+Jets samples. Options: exclusive: 0-3, inclusive (N or more): 0m, 1m, 2m, 3m, 4m" ) + parser.add_option( "-m", "--metType", dest = "metType", default = 'type1', + help = "set MET type for analysis of MET, ST or MT" ) + parser.add_option( "-c", "--centre-of-mass-energy", dest = "CoM", default = 8, type = int, + help = "set the centre of mass energy for analysis. Default = 8 [TeV]" ) + parser.add_option( '--fitter', dest = "use_fitter", default = 'Minuit', + help = 'Fitter to be used: Minuit|RooFit. Default = Minuit.' ) + parser.add_option( '-V', '--verbose', dest = "verbose", action = "store_true", + help = "Print the fit info and correlation matrix" ) + parser.add_option( '--closure_test', dest = "closure_test", action = "store_true", + help = "Perform fit on data == sum(MC) * scale factor (MC process)" ) + parser.add_option( '--closure_test_type', dest = "closure_test_type", default = 'simple', + help = "Type of closure test (relative normalisation):" + '|'.join( closure_tests.keys() ) ) + parser.add_option( '--no_combined_signal', dest = "make_combined_signal", + action = "store_false", default=True, + help = "Do not make a combined template from TTbar and single top" ) + parser.add_option( '--test', dest = "test", action = "store_true", + help = "Just run the central measurement" ) + parser.add_option( '--disable-constraints', dest = "enable_constraints", + action = "store_false", default=True, + help = "Do not make a combined template from TTbar and single top" ) translate_options = { '0':'0btag', @@ -316,11 +404,11 @@ def write_fit_results(channel, category, fit_results): 'pf':'PFMET', 'type1':'patType1CorrectedPFMet' } - - - (options, args) = parser.parse_args() - - measurement_config = XSectionConfig(options.CoM) + + + ( options, args ) = parser.parse_args() + + measurement_config = XSectionConfig( options.CoM ) # caching of variables for shorter access ttbar_theory_systematic_prefix = measurement_config.ttbar_theory_systematic_prefix vjets_theory_systematic_prefix = measurement_config.vjets_theory_systematic_prefix @@ -328,131 +416,139 @@ def write_fit_results(channel, category, fit_results): categories_and_prefixes = measurement_config.categories_and_prefixes met_systematics_suffixes = measurement_config.met_systematics_suffixes analysis_type = measurement_config.analysis_types - + run_just_central = options.closure_test or options.test + variable = options.variable met_type = translate_options[options.metType] b_tag_bin = translate_options[options.bjetbin] b_tag_bin_VJets = translate_options[options.bjetbin_VJets] path_to_files = measurement_config.path_to_files output_path = options.path - + if options.closure_test: + output_path += '/closure_test/' use_fitter = options.use_fitter verbose = options.verbose - + fit_variables = options.fit_variables.split( ',' ) + # possible options: # --continue : continue from saved - skips ROOT files, reads from JSON? - + # get data from histograms or JSON files - # data and muon_QCD file with SFs are the same for central measurement and all systematics - data_file_electron = File(measurement_config.data_file_electron) - data_file_muon = File(measurement_config.data_file_muon) - - SingleTop_file = File(measurement_config.SingleTop_file) - muon_QCD_MC_file = File(measurement_config.muon_QCD_MC_file) - electron_QCD_MC_file = File(measurement_config.electron_QCD_MC_file) - TTJet_file = File(measurement_config.ttbar_category_templates['central']) - VJets_file = File(measurement_config.VJets_category_templates['central']) + # data and muon_QCD file with SFs are the same for central measurement and all systematics + data_file_electron = File( measurement_config.data_file_electron ) + data_file_muon = File( measurement_config.data_file_muon ) + + SingleTop_file = File( measurement_config.SingleTop_file ) + muon_QCD_MC_file = File( measurement_config.muon_QCD_MC_file ) + electron_QCD_MC_file = File( measurement_config.electron_QCD_MC_file ) + TTJet_file = File( measurement_config.ttbar_category_templates['central'] ) + VJets_file = File( measurement_config.VJets_category_templates['central'] ) electron_control_region = measurement_config.electron_control_region muon_control_region = measurement_config.muon_control_region Higgs_file = None if measurement_config.include_higgs: - Higgs_file = File(measurement_config.higgs_category_templates['central']) + Higgs_file = File( measurement_config.higgs_category_templates['central'] ) # matching/scale up/down systematics for ttbar + jets for systematic, filename in measurement_config.generator_systematic_ttbar_templates.iteritems(): - TTJet_file = File(filename) + if run_just_central: + continue + TTJet_file = File( filename ) if verbose: print "\n" + systematic + "\n" - fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation('electron', - input_files={ + fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation_from_ROOT( 'electron', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_electron, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, ) - - fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation('muon', - input_files={ + + fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation_from_ROOT( 'muon', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_muon, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, ) - - write_fit_results_and_initial_values('electron', ttbar_theory_systematic_prefix + systematic, fit_results_electron, initial_values_electron, templates_electron) - write_fit_results_and_initial_values('muon', ttbar_theory_systematic_prefix + systematic, fit_results_muon, initial_values_muon, templates_muon) + + write_fit_results_and_initial_values( 'electron', ttbar_theory_systematic_prefix + systematic, fit_results_electron, initial_values_electron, templates_electron ) + write_fit_results_and_initial_values( 'muon', ttbar_theory_systematic_prefix + systematic, fit_results_muon, initial_values_muon, templates_muon ) write_fit_results( 'combined', ttbar_theory_systematic_prefix + systematic, combine_complex_results( fit_results_electron, fit_results_muon ) ) TTJet_file.Close() - - TTJet_file = File(measurement_config.ttbar_category_templates['central']) + + TTJet_file = File( measurement_config.ttbar_category_templates['central'] ) # matching/scale up/down systematics for V+Jets for systematic in generator_systematics: - VJets_file = File(measurement_config.generator_systematic_vjets_templates[systematic]) + if run_just_central: # no systematics for closure test + continue + VJets_file = File( measurement_config.generator_systematic_vjets_templates[systematic] ) if verbose: print "\n" + systematic + "\n" - fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation('electron', - input_files={ + fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation_from_ROOT( 'electron', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_electron, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, ) - - fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation('muon', - input_files={ + + fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation_from_ROOT( 'muon', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_muon, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, ) - - write_fit_results_and_initial_values('electron', vjets_theory_systematic_prefix + systematic, fit_results_electron, initial_values_electron, templates_electron) - write_fit_results_and_initial_values('muon', vjets_theory_systematic_prefix + systematic, fit_results_muon, initial_values_muon, templates_muon) + + write_fit_results_and_initial_values( 'electron', vjets_theory_systematic_prefix + systematic, fit_results_electron, initial_values_electron, templates_electron ) + write_fit_results_and_initial_values( 'muon', vjets_theory_systematic_prefix + systematic, fit_results_muon, initial_values_muon, templates_muon ) write_fit_results( 'combined', vjets_theory_systematic_prefix + systematic, combine_complex_results( fit_results_electron, fit_results_muon ) ) VJets_file.Close() - - VJets_file = File(measurement_config.VJets_category_templates['central']) - - # central measurement and the rest of the systematics + VJets_file = File( measurement_config.VJets_category_templates['central'] ) + + # central measurement and the rest of the systematics last_systematic = '' for category, prefix in categories_and_prefixes.iteritems(): - TTJet_file = File(measurement_config.ttbar_category_templates[category]) - SingleTop_file = File(measurement_config.SingleTop_category_templates[category]) - VJets_file = File(measurement_config.VJets_category_templates[category]) - muon_QCD_MC_file = File(measurement_config.muon_QCD_MC_category_templates[category]) - data_file_electron = File(measurement_config.data_electron_category_templates['central']) - data_file_muon = File(measurement_config.data_muon_category_templates['central']) + if run_just_central and not category == 'central': + continue + TTJet_file = File( measurement_config.ttbar_category_templates[category] ) + SingleTop_file = File( measurement_config.SingleTop_category_templates[category] ) + VJets_file = File( measurement_config.VJets_category_templates[category] ) + muon_QCD_MC_file = File( measurement_config.muon_QCD_MC_category_templates[category] ) + data_file_electron = File( measurement_config.data_electron_category_templates['central'] ) + data_file_muon = File( measurement_config.data_muon_category_templates['central'] ) if measurement_config.include_higgs: - Higgs_file = File(measurement_config.higgs_category_templates[category]) - + Higgs_file = File( measurement_config.higgs_category_templates[category] ) + # Setting up systematic MET for JES up/down samples met_type = translate_options[options.metType] if category in ['JES_up', 'JES_down']: # these systematics affect the data as well data_file_electron.Close() data_file_muon.Close() - data_file_electron = File(measurement_config.data_electron_category_templates[category]) - data_file_muon = File(measurement_config.data_muon_category_templates[category]) - + data_file_electron = File( measurement_config.data_electron_category_templates[category] ) + data_file_muon = File( measurement_config.data_muon_category_templates[category] ) + if category == 'JES_up': met_type += 'JetEnUp' if met_type == 'PFMETJetEnUp': @@ -465,36 +561,36 @@ def write_fit_results(channel, category, fit_results): if verbose: print "\n" + category + "\n" - fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation('electron', - input_files={ + fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation_from_ROOT( 'electron', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_electron, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, ) - - fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation('muon', - input_files={ + + fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation_from_ROOT( 'muon', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_muon, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, ) - write_fit_results_and_initial_values('electron', category, fit_results_electron, initial_values_electron, templates_electron) - write_fit_results_and_initial_values('muon', category, fit_results_muon, initial_values_muon, templates_muon) + write_fit_results_and_initial_values( 'electron', category, fit_results_electron, initial_values_electron, templates_electron ) + write_fit_results_and_initial_values( 'muon', category, fit_results_muon, initial_values_muon, templates_muon ) write_fit_results( 'combined', category, combine_complex_results( fit_results_electron, fit_results_muon ) ) last_systematic = category - + TTJet_file.Close() SingleTop_file.Close() VJets_file.Close() @@ -503,141 +599,147 @@ def write_fit_results(channel, category, fit_results): data_file_muon.Close() if Higgs_file: Higgs_file.Close() - + # now do PDF uncertainty - data_file_electron = File(measurement_config.data_electron_category_templates['central']) - data_file_muon = File(measurement_config.data_muon_category_templates['central']) - SingleTop_file = File(measurement_config.SingleTop_category_templates['central']) - VJets_file = File(measurement_config.VJets_category_templates['central']) - muon_QCD_MC_file = File(measurement_config.muon_QCD_MC_category_templates['central']) - data_file_electron = File(measurement_config.data_electron_category_templates['central']) - data_file_muon = File(measurement_config.data_muon_category_templates['central']) + data_file_electron = File( measurement_config.data_electron_category_templates['central'] ) + data_file_muon = File( measurement_config.data_muon_category_templates['central'] ) + SingleTop_file = File( measurement_config.SingleTop_category_templates['central'] ) + VJets_file = File( measurement_config.VJets_category_templates['central'] ) + muon_QCD_MC_file = File( measurement_config.muon_QCD_MC_category_templates['central'] ) + data_file_electron = File( measurement_config.data_electron_category_templates['central'] ) + data_file_muon = File( measurement_config.data_muon_category_templates['central'] ) if measurement_config.include_higgs: - Higgs_file = File(measurement_config.higgs_category_templates['central']) - - for index in range(1, 45): + Higgs_file = File( measurement_config.higgs_category_templates['central'] ) + + for index in range( 1, 45 ): + if run_just_central: # no systematics for closure test + continue category = 'PDFWeights_%d' % index - TTJet_file = File(measurement_config.pdf_uncertainty_template % index) + TTJet_file = File( measurement_config.pdf_uncertainty_template % index ) if verbose: - print "\nPDF_" + str(index) + "\n" + print "\nPDF_" + str( index ) + "\n" - fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation('electron', - input_files={ + fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation_from_ROOT( 'electron', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_electron, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, ) - - fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation('muon', - input_files={ + + fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation_from_ROOT( 'muon', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_muon, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, ) - write_fit_results_and_initial_values('electron', category, fit_results_electron, initial_values_electron, templates_electron) - write_fit_results_and_initial_values('muon', category, fit_results_muon, initial_values_muon, templates_muon) + write_fit_results_and_initial_values( 'electron', category, fit_results_electron, initial_values_electron, templates_electron ) + write_fit_results_and_initial_values( 'muon', category, fit_results_muon, initial_values_muon, templates_muon ) write_fit_results( 'combined', category, combine_complex_results( fit_results_electron, fit_results_muon ) ) TTJet_file.Close() - TTJet_file = File(measurement_config.ttbar_category_templates['central']) - + TTJet_file = File( measurement_config.ttbar_category_templates['central'] ) + for met_systematic in met_systematics_suffixes: - #all MET uncertainties except JES & JER - as this is already included - if 'JetEn' in met_systematic or 'JetRes' in met_systematic or variable == 'HT':#HT is not dependent on MET! + if run_just_central: + continue + # all MET uncertainties except JES & JER - as this is already included + if 'JetEn' in met_systematic or 'JetRes' in met_systematic or variable == 'HT': # HT is not dependent on MET! continue category = met_type + met_systematic if 'PFMET' in met_type: - category = category.replace('PFMET', 'patPFMet') - + category = category.replace( 'PFMET', 'patPFMet' ) + if verbose: print "\n" + met_systematic + "\n" - fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation('electron', - input_files={ + fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation_from_ROOT( 'electron', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_electron, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=category, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = category, + b_tag_bin = b_tag_bin, ) - - fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation('muon', - input_files={ + + fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation_from_ROOT( 'muon', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_muon, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=category, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = category, + b_tag_bin = b_tag_bin, ) - write_fit_results_and_initial_values('electron', category, fit_results_electron, initial_values_electron, templates_electron) - write_fit_results_and_initial_values('muon', category, fit_results_muon, initial_values_muon, templates_muon) + write_fit_results_and_initial_values( 'electron', category, fit_results_electron, initial_values_electron, templates_electron ) + write_fit_results_and_initial_values( 'muon', category, fit_results_muon, initial_values_muon, templates_muon ) write_fit_results( 'combined', category, combine_complex_results( fit_results_electron, fit_results_muon ) ) + + # QCD systematic + if not run_just_central: + electron_control_region = measurement_config.electron_control_region_systematic - #QCD systematic + if verbose: + print "\nQCD shape systematic\n" - electron_control_region = measurement_config.electron_control_region_systematic - - if verbose: - print "\nQCD shape systematic\n" - - fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation('electron', - input_files={ + fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation_from_ROOT( 'electron', + input_files = { + 'TTJet': TTJet_file, + 'SingleTop': SingleTop_file, + 'V+Jets': VJets_file, + 'data': data_file_electron, + 'Higgs' : Higgs_file, + }, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, + ) + + muon_control_region = measurement_config.muon_control_region_systematic + fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation_from_ROOT( 'muon', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, - 'data': data_file_electron, + 'data': data_file_muon, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, ) - muon_control_region = measurement_config.muon_control_region_systematic - fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation('muon', - input_files={ - 'TTJet': TTJet_file, - 'SingleTop': SingleTop_file, - 'V+Jets': VJets_file, - 'data': data_file_muon, - 'Higgs' : Higgs_file, - }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, - ) - - systematic = 'QCD_shape' - write_fit_results_and_initial_values('electron', systematic, fit_results_electron, initial_values_electron, templates_electron) - write_fit_results_and_initial_values('muon', systematic, fit_results_muon, initial_values_muon, templates_muon) - write_fit_results( 'combined', systematic, combine_complex_results( fit_results_electron, fit_results_muon ) ) + systematic = 'QCD_shape' + write_fit_results_and_initial_values( 'electron', systematic, fit_results_electron, initial_values_electron, templates_electron ) + write_fit_results_and_initial_values( 'muon', systematic, fit_results_muon, initial_values_muon, templates_muon ) + write_fit_results( 'combined', systematic, combine_complex_results( fit_results_electron, fit_results_muon ) ) electron_control_region = measurement_config.electron_control_region muon_control_region = measurement_config.muon_control_region - #rate-changing systematics + # rate-changing systematics for systematic, shift in measurement_config.rate_changing_systematics.iteritems(): + if run_just_central: # no systematics for closure test + continue factor = 1.0 for variation in ['+', '-']: if variation == '+': @@ -650,37 +752,34 @@ def write_fit_results(channel, category, fit_results): scale_factors = {} scale_factors[systematic + variation] = factor - fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation('electron', - input_files={ + fit_results_electron, initial_values_electron, templates_electron = get_fitted_normalisation_from_ROOT( 'electron', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_electron, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, scale_factors = scale_factors ) - - fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation('muon', - input_files={ + + fit_results_muon, initial_values_muon, templates_muon = get_fitted_normalisation_from_ROOT( 'muon', + input_files = { 'TTJet': TTJet_file, 'SingleTop': SingleTop_file, 'V+Jets': VJets_file, 'data': data_file_muon, 'Higgs' : Higgs_file, }, - variable=variable, - met_type=met_type, - b_tag_bin=b_tag_bin, + variable = variable, + met_type = met_type, + b_tag_bin = b_tag_bin, scale_factors = scale_factors ) - write_fit_results_and_initial_values('electron', systematic + variation, fit_results_electron, initial_values_electron, templates_electron) - write_fit_results_and_initial_values('muon', systematic + variation, fit_results_muon, initial_values_muon, templates_muon) + write_fit_results_and_initial_values( 'electron', systematic + variation, fit_results_electron, initial_values_electron, templates_electron ) + write_fit_results_and_initial_values( 'muon', systematic + variation, fit_results_muon, initial_values_muon, templates_muon ) write_fit_results( 'combined', systematic + variation, combine_complex_results( fit_results_electron, fit_results_muon ) ) - - - diff --git a/src/cross_section_measurement/04_make_plots_matplotlib.py b/src/cross_section_measurement/04_make_plots_matplotlib.py index eb1488af..9d61b277 100644 --- a/src/cross_section_measurement/04_make_plots_matplotlib.py +++ b/src/cross_section_measurement/04_make_plots_matplotlib.py @@ -4,8 +4,8 @@ from copy import deepcopy from config.latex_labels import variables_latex, measurements_latex, \ -met_systematics_latex, b_tag_bins_latex -from config.variable_binning import bin_edges, variable_bins_ROOT, eta_bin_edges +met_systematics_latex, b_tag_bins_latex, fit_variables_latex +from config.variable_binning import bin_edges, variable_bins_ROOT, fit_variable_bin_edges from config import XSectionConfig from tools.file_utilities import read_data_from_JSON, make_folder_if_not_exists from tools.hist_utilities import value_error_tuplelist_to_hist, \ @@ -117,133 +117,141 @@ def read_fit_templates_and_results_as_histograms( category, channel ): templates = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/templates_' + channel + '_' + met_type + '.txt' ) data_values = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/initial_values_' + channel + '_' + met_type + '.txt' )['data'] fit_results = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt' ) - template_histograms = {} - fit_results_histograms = {} + fit_variables = templates.keys() + template_histograms = {fit_variable: {} for fit_variable in fit_variables} + fit_results_histograms = {fit_variable: {} for fit_variable in fit_variables} for bin_i, variable_bin in enumerate( variable_bins_ROOT[variable] ): - h_template_data = value_tuplelist_to_hist( templates['data'][bin_i], eta_bin_edges ) - h_template_signal = value_tuplelist_to_hist( templates['signal'][bin_i], eta_bin_edges ) - h_template_VJets = value_tuplelist_to_hist( templates['V+Jets'][bin_i], eta_bin_edges ) - h_template_QCD = value_tuplelist_to_hist( templates['QCD'][bin_i], eta_bin_edges ) - template_histograms[variable_bin] = { - 'signal':h_template_signal, - 'V+Jets':h_template_VJets, - 'QCD':h_template_QCD - } - h_data = h_template_data.Clone() - h_signal = h_template_signal.Clone() - h_VJets = h_template_VJets.Clone() - h_QCD = h_template_QCD.Clone() - - data_normalisation = data_values[bin_i][0] - signal_normalisation = fit_results['signal'][bin_i][0] - VJets_normalisation = fit_results['V+Jets'][bin_i][0] - QCD_normalisation = fit_results['QCD'][bin_i][0] - - h_data.Scale( data_normalisation ) - h_signal.Scale( signal_normalisation ) - h_VJets.Scale( VJets_normalisation ) - h_QCD.Scale( QCD_normalisation ) - h_background = h_VJets + h_QCD - - for bin_i in range( len( h_data ) ): - h_data.SetBinError( bin_i + 1, sqrt( h_data.GetBinContent( bin_i + 1 ) ) ) - - fit_results_histograms[variable_bin] = { - 'data':h_data, - 'signal':h_signal, - 'background':h_background - } + for fit_variable in fit_variables: + print len(templates[fit_variable]['data']), len(variable_bins_ROOT[variable]), bin_i + print fit_variable, len(fit_variable_bin_edges[fit_variable]), len(templates[fit_variable]['data'][bin_i]) + h_template_data = value_tuplelist_to_hist( templates[fit_variable]['data'][bin_i], fit_variable_bin_edges[fit_variable] ) + h_template_signal = value_tuplelist_to_hist( templates[fit_variable]['signal'][bin_i], fit_variable_bin_edges[fit_variable] ) + h_template_VJets = value_tuplelist_to_hist( templates[fit_variable]['V+Jets'][bin_i], fit_variable_bin_edges[fit_variable] ) + h_template_QCD = value_tuplelist_to_hist( templates[fit_variable]['QCD'][bin_i], fit_variable_bin_edges[fit_variable] ) + template_histograms[fit_variable][variable_bin] = { + 'signal':h_template_signal, + 'V+Jets':h_template_VJets, + 'QCD':h_template_QCD + } + h_data = h_template_data.Clone() + h_signal = h_template_signal.Clone() + h_VJets = h_template_VJets.Clone() + h_QCD = h_template_QCD.Clone() + + data_normalisation = data_values[bin_i][0] + signal_normalisation = fit_results['signal'][bin_i][0] + VJets_normalisation = fit_results['V+Jets'][bin_i][0] + QCD_normalisation = fit_results['QCD'][bin_i][0] + + h_data.Scale( data_normalisation ) + h_signal.Scale( signal_normalisation ) + h_VJets.Scale( VJets_normalisation ) + h_QCD.Scale( QCD_normalisation ) + h_background = h_VJets + h_QCD + + for bin_i_data in range( len( h_data ) ): + h_data.SetBinError( bin_i_data + 1, sqrt( h_data.GetBinContent( bin_i_data + 1 ) ) ) + + fit_results_histograms[fit_variable][variable_bin] = { + 'data':h_data, + 'signal':h_signal, + 'background':h_background + } return template_histograms, fit_results_histograms def make_template_plots( histograms, category, channel ): global variable, output_folder - + fit_variables = histograms.keys() for variable_bin in variable_bins_ROOT[variable]: path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_templates/' make_folder_if_not_exists( path ) - plotname = path + channel + '_templates_bin_' + variable_bin + for fit_variable in fit_variables: + plotname = path + channel + '_' + fit_variable + '_template_bin_' + variable_bin - # check if template plots exist already - for output_format in output_formats: - if os.path.isfile( plotname + '.' + output_format ): - continue - - # plot with matplotlib - h_signal = histograms[variable_bin]['signal'] - h_VJets = histograms[variable_bin]['V+Jets'] - h_QCD = histograms[variable_bin]['QCD'] - - h_signal.linecolor = 'red' - h_VJets.linecolor = 'green' - h_QCD.linecolor = 'gray' - h_VJets.linestyle = 'dashed' - h_QCD.linestyle = 'dotted' # currently not working - # bug report: http://trac.sagemath.org/sage_trac/ticket/13834 - - h_signal.linewidth = 5 - h_VJets.linewidth = 5 - h_QCD.linewidth = 5 - - plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' ) - axes = plt.axes() - axes.minorticks_on() - - plt.xlabel( r'lepton $|\eta|$', CMS.x_axis_title ) - plt.ylabel( 'normalised to unit area/(0.2)', CMS.y_axis_title ) - plt.tick_params( **CMS.axis_label_major ) - plt.tick_params( **CMS.axis_label_minor ) - - rplt.hist( h_signal, axes = axes, label = 'signal' ) - if ( h_VJets.Integral() != 0 ): - rplt.hist( h_VJets, axes = axes, label = 'V+Jets' ) - else: - print "WARNING: in %s bin %s, %s category, %s channel, V+Jets template is empty: not plotting." % ( variable, variable_bin, category, channel ) - if ( h_QCD.Integral() != 0 ): - rplt.hist( h_QCD, axes = axes, label = 'QCD' ) - else: - print "WARNING: in %s bin %s, %s category, %s channel, QCD template is empty: not plotting." % ( variable, variable_bin, category, channel ) - axes.set_ylim( [0, 0.2] ) + # check if template plots exist already + for output_format in output_formats: + if os.path.isfile( plotname + '.' + output_format ): + continue + + # plot with matplotlib + h_signal = histograms[fit_variable][variable_bin]['signal'] + h_VJets = histograms[fit_variable][variable_bin]['V+Jets'] + h_QCD = histograms[fit_variable][variable_bin]['QCD'] + + h_signal.linecolor = 'red' + h_VJets.linecolor = 'green' + h_QCD.linecolor = 'gray' + h_VJets.linestyle = 'dashed' + h_QCD.linestyle = 'dotted' # currently not working + # bug report: http://trac.sagemath.org/sage_trac/ticket/13834 + + h_signal.linewidth = 5 + h_VJets.linewidth = 5 + h_QCD.linewidth = 5 - plt.legend( numpoints = 1, loc = 'upper right', prop = CMS.legend_properties ) - plt.title( get_cms_labels( channel ), CMS.title ) - plt.tight_layout() + plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' ) + axes = plt.axes() + axes.minorticks_on() + + plt.xlabel( fit_variables_latex[fit_variable], CMS.x_axis_title ) + plt.ylabel( 'normalised to unit area/(%s)' % get_unit_string(fit_variable), CMS.y_axis_title ) + plt.tick_params( **CMS.axis_label_major ) + plt.tick_params( **CMS.axis_label_minor ) - for output_format in output_formats: - plt.savefig( plotname + '.' + output_format ) + rplt.hist( h_signal, axes = axes, label = 'signal' ) + if ( h_VJets.Integral() != 0 ): + rplt.hist( h_VJets, axes = axes, label = 'V+Jets' ) + else: + print "WARNING: in %s bin %s, %s category, %s channel, V+Jets template is empty: not plotting." % ( variable, variable_bin, category, channel ) + if ( h_QCD.Integral() != 0 ): + rplt.hist( h_QCD, axes = axes, label = 'QCD' ) + else: + print "WARNING: in %s bin %s, %s category, %s channel, QCD template is empty: not plotting." % ( variable, variable_bin, category, channel ) + axes.set_ylim( [0, 0.2] ) + axes.set_xlim( measurement_config.fit_boundaries[fit_variable] ) + + plt.legend( numpoints = 1, loc = 'upper right', prop = CMS.legend_properties ) + plt.title( get_cms_labels( channel ), CMS.title ) + plt.tight_layout() - plt.close() - gc.collect() + for output_format in output_formats: + plt.savefig( plotname + '.' + output_format ) + + plt.close() + gc.collect() def plot_fit_results( histograms, category, channel ): global variable, b_tag_bin, output_folder from tools.plotting import Histogram_properties, make_data_mc_comparison_plot - + fit_variables = histograms.keys() for variable_bin in variable_bins_ROOT[variable]: path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_results/' make_folder_if_not_exists( path ) - plotname = channel + '_bin_' + variable_bin - # check if template plots exist already - for output_format in output_formats: - if os.path.isfile( plotname + '.' + output_format ): - continue + for fit_variable in fit_variables: + plotname = channel + '_' + fit_variable + '_bin_' + variable_bin + # check if template plots exist already + for output_format in output_formats: + if os.path.isfile( plotname + '.' + output_format ): + continue + + # plot with matplotlib + h_data = histograms[fit_variable][variable_bin]['data'] + h_signal = histograms[fit_variable][variable_bin]['signal'] + h_background = histograms[fit_variable][variable_bin]['background'] - # plot with matplotlib - h_data = histograms[variable_bin]['data'] - h_signal = histograms[variable_bin]['signal'] - h_background = histograms[variable_bin]['background'] - - histogram_properties = Histogram_properties() - histogram_properties.name = plotname - histogram_properties.x_axis_title = channel + ' $\left|\eta\\right|$' - histogram_properties.y_axis_title = 'Events/(0.2)' - histogram_properties.title = get_cms_labels( channel ) - - make_data_mc_comparison_plot( [h_data, h_background, h_signal], - ['data', 'background', 'signal'], - ['black', 'green', 'red'], histogram_properties, - save_folder = path, save_as = output_formats ) + histogram_properties = Histogram_properties() + histogram_properties.name = plotname + histogram_properties.x_axis_title = channel + ' ' + fit_variables_latex[fit_variable] + histogram_properties.y_axis_title = 'Events/(%s)' % get_unit_string(fit_variable) + histogram_properties.title = get_cms_labels( channel ) + histogram_properties.x_limits = measurement_config.fit_boundaries[fit_variable] + + make_data_mc_comparison_plot( [h_data, h_background, h_signal], + ['data', 'background', 'signal'], + ['black', 'green', 'red'], histogram_properties, + save_folder = path, save_as = output_formats ) def get_cms_labels( channel ): global b_tag_bin @@ -472,6 +480,17 @@ def plot_central_and_systematics( channel, systematics, exclude = [], suffix = ' plt.close() gc.collect() +def get_unit_string(fit_variable): + unit = measurement_config.fit_variable_unit[fit_variable] + fit_variable_bin_width = measurement_config.fit_variable_bin_width[fit_variable] + unit_string = '' + if unit == '': + unit_string = fit_variable_bin_width + else: + unit_string = '%f %s' % (fit_variable_bin_width, unit) + + return unit_string + if __name__ == '__main__': set_root_defaults() parser = OptionParser() diff --git a/src/cross_section_measurement/98b_fit_cross_checks.py b/src/cross_section_measurement/98b_fit_cross_checks.py new file mode 100644 index 00000000..638d8e96 --- /dev/null +++ b/src/cross_section_measurement/98b_fit_cross_checks.py @@ -0,0 +1,171 @@ +from optparse import OptionParser +from config import XSectionConfig +from config.variable_binning import bin_edges +from lib import read_fit_results, closure_tests +# from tools.file_utilities import read_data_from_JSON +# from tools.plotting import Histogram_properties + +# from matplotlib import pyplot as plt +# import rootpy.plotting.root2matplotlib as rplt +from tools.hist_utilities import value_error_tuplelist_to_hist, spread_x, \ + limit_range_y +from tools.plotting import compare_measurements, Histogram_properties +from config.latex_labels import fit_variables_latex, samples_latex +from src.cross_section_measurement.lib import read_fit_input +# import linecache +# +# from config.variable_binning import variable_bins_ROOT +def plot_fit_results( fit_results, initial_values, channel ): + global variable, output_folder + + title = electron_histogram_title if channel == 'electron' else muon_histogram_title + + + histogram_properties = Histogram_properties() + histogram_properties.title = title + + histogram_properties.x_axis_title = variable + ' [GeV]' + histogram_properties.mc_error = 0.0 + histogram_properties.legend_location = 'upper right' + # we will need 4 histograms: TTJet, SingleTop, QCD, V+Jets + for sample in ['TTJet', 'SingleTop', 'QCD', 'V+Jets']: + histograms = {} + # absolute eta measurement as baseline + h_absolute_eta = None + h_before = None + histogram_properties.y_axis_title = 'Fitted number of events for ' + samples_latex[sample] + + for fit_var_input in fit_results.keys(): + latex_string = create_latex_string( fit_var_input ) + fit_data = fit_results[fit_var_input][sample] + h = value_error_tuplelist_to_hist( fit_data, + bin_edges[variable] ) + if fit_var_input == 'absolute_eta': + h_absolute_eta = h + elif fit_var_input == 'before': + h_before = h + else: + histograms[latex_string] = h + graphs = spread_x( histograms.values(), bin_edges[variable] ) + for key, graph in zip( histograms.keys(), graphs ): + histograms[key] = graph + filename = sample.replace( '+', '_' ) + '_fit_var_comparison_' + channel + histogram_properties.name = filename + histogram_properties.y_limits = 0, limit_range_y( h_absolute_eta )[1] * 1.3 + histogram_properties.x_limits = bin_edges[variable][0], bin_edges[variable][-1] + + h_initial_values = value_error_tuplelist_to_hist( initial_values[sample], + bin_edges[variable] ) + h_initial_values.Scale(closure_tests['simple'][sample]) + + compare_measurements( models = {fit_variables_latex['absolute_eta']:h_absolute_eta, + 'initial values' : h_initial_values, + 'before': h_before}, + measurements = histograms, + show_measurement_errors = True, + histogram_properties = histogram_properties, + save_folder = output_folder, + save_as = ['png'] ) + +def create_latex_string( fit_var_input ): + known_fit_vars = fit_variables_latex.keys() + if fit_var_input in known_fit_vars: + return fit_variables_latex[fit_var_input] + + if fit_var_input == 'before': + return 'before' + # now we are left with multi-var + latex_string = fit_var_input + # replace all known variables with their latex equivalent + for known_var in known_fit_vars: + latex_string = latex_string.replace( known_var, + fit_variables_latex[known_var] ) + # lastly replace the remaining '_' with commas + latex_string = latex_string.replace( '_', ',' ) + + return latex_string + +if __name__ == '__main__': + parser = OptionParser() + parser.add_option( "-p", "--path", dest = "path", default = 'data/', + help = "set path to JSON files" ) + parser.add_option( "-v", "--variable", dest = "variable", default = 'MET', + help = "set the variable to analyse (MET, HT, ST, MT)" ) + parser.add_option( "-o", "--output_folder", dest = "output_folder", default = 'plots/fitchecks/', + help = "set path to save plots" ) + parser.add_option( "-m", "--metType", dest = "metType", default = 'type1', + help = "set MET type used in the analysis of MET-dependent variables" ) + parser.add_option( "-c", "--category", dest = "category", default = 'central', + help = "set the category to take the fit results from (default: central)" ) + parser.add_option( "-e", "--centre-of-mass-energy", dest = "CoM", default = 8, type = int, + help = "set the centre of mass energy for analysis. Default = 8 [TeV]" ) + + ( options, args ) = parser.parse_args() + measurement_config = XSectionConfig( options.CoM ) + # caching of variables for shorter access + translate_options = measurement_config.translate_options + lumi = measurement_config.luminosity + come = measurement_config.centre_of_mass_energy + + path_to_JSON = options.path + output_folder = options.output_folder + if not output_folder.endswith( '/' ): + output_folder += '/' + category = options.category + met_type = translate_options[options.metType] + variable = options.variable + + electron_histogram_title = 'CMS Preliminary, $\mathcal{L}$ = %.1f fb$^{-1}$ at $\sqrt{s}$ = %d TeV \n e+jets, $\geq$4 jets' % ( lumi/1000, come ) + muon_histogram_title = 'CMS Preliminary, $\mathcal{L}$ = %.1f fb$^{-1}$ at $\sqrt{s}$ = %d TeV \n $\mu$+jets, $\geq$4 jets' % ( lumi/1000, come ) + + fit_var_inputs = ['absolute_eta', 'M3', 'M_bl', 'angle_bl', + 'absolute_eta_angle_bl', + 'absolute_eta_M3', + 'absolute_eta_M_bl', + 'absolute_eta_M_bl_angle_bl', + 'absolute_eta_M3_angle_bl', + 'absolute_eta_M_bl_M3', + 'absolute_eta_M_bl_M3_angle_bl' ] + + fit_results_electron = {} + fit_results_muon = {} + initial_values_electron = {} + initial_values_muon = {} + for fit_var_input in fit_var_inputs: + path = path_to_JSON + fit_var_input + '/' + str( come ) + 'TeV/' + fit_results_electron[fit_var_input] = read_fit_results( path, + variable, + category, + 'electron', + met_type ) + fit_results_muon[fit_var_input] = read_fit_results( path, + variable, + category, + 'muon', + met_type ) + # it doesn't matter which one to use, all of them are identical + # so lets use the 2011 and 2012 default, |eta| + initial_values_electron = read_fit_input( path_to_JSON + 'absolute_eta' + '/' + str( come ) + 'TeV/', + variable, + category, + 'electron', + met_type ) + initial_values_muon = read_fit_input( path_to_JSON + 'absolute_eta' + '/' + str( come ) + 'TeV/', + variable, + category, + 'muon', + met_type ) + + if not 'closure' in path_to_JSON: + fit_results_electron['before'] = read_fit_results( 'data_single_var_fit/8TeV/', + variable, + category, + 'electron', + met_type ) + fit_results_muon['before'] = read_fit_results( 'data_single_var_fit/8TeV/', + variable, + category, + 'muon', + met_type ) + plot_fit_results( fit_results_electron, initial_values_electron, 'electron' ) + plot_fit_results( fit_results_muon, initial_values_muon, 'muon' ) diff --git a/src/cross_section_measurement/lib.py b/src/cross_section_measurement/lib.py index 0bbdca35..1158f508 100644 --- a/src/cross_section_measurement/lib.py +++ b/src/cross_section_measurement/lib.py @@ -6,6 +6,48 @@ from tools.hist_utilities import value_error_tuplelist_to_hist, value_errors_tuplelist_to_graph from tools.file_utilities import read_data_from_JSON from tools.Timer import Timer + +closure_tests = { + 'simple' : {'V+Jets': 1.1, 'SingleTop': 1.2, 'TTJet': 1.3, 'QCD': 1.5}, + 'ttbar_only' : {'V+Jets': 0, 'SingleTop': 0, 'TTJet': 1, 'QCD': 0}, + 'singletop_only' : {'V+Jets': 0, 'SingleTop': 1, 'TTJet': 0, 'QCD': 0}, + 'vjets_only' : {'V+Jets': 1, 'SingleTop': 0, 'TTJet': 0, 'QCD': 0}, + 'qcd_only' : {'V+Jets': 0, 'SingleTop': 0, 'TTJet': 0, 'QCD': 1}, + } + +def read_fit_results(path_to_JSON = 'data', + variable = 'MET', + category = 'central', + channel = 'combined', + met_type = 'patType1CorrectedPFMet'): + return read_from_fit_results_folder(path_to_JSON, variable, category, channel, met_type, 'fit_results') + +def read_fit_input(path_to_JSON = 'data', + variable = 'MET', + category = 'central', + channel = 'combined', + met_type = 'patType1CorrectedPFMet'): + return read_from_fit_results_folder(path_to_JSON, variable, category, channel, met_type, 'initial_values') + +def read_fit_templates(path_to_JSON = 'data', + variable = 'MET', + category = 'central', + channel = 'combined', + met_type = 'patType1CorrectedPFMet'): + return read_from_fit_results_folder(path_to_JSON, variable, category, channel, met_type, 'templates') + +def read_from_fit_results_folder(path_to_JSON = 'data', + variable = 'MET', + category = 'central', + channel = 'combined', + met_type = 'patType1CorrectedPFMet', + data_type = 'fit_results'): + filename = path_to_JSON + '/' + variable + '/fit_results/' + category + '/' + filename += data_type + '_' + channel + '_' + met_type + '.txt' + results = read_data_from_JSON( filename ) + + return results + def read_xsection_measurement_results( path_to_JSON, variable, bin_edges, category, channel, diff --git a/src/cross_section_measurement/make_fit_variable_plots.py b/src/cross_section_measurement/make_fit_variable_plots.py index 6d08a0d8..d8c551a4 100644 --- a/src/cross_section_measurement/make_fit_variable_plots.py +++ b/src/cross_section_measurement/make_fit_variable_plots.py @@ -3,10 +3,14 @@ @author: kreczko ''' +from collections import OrderedDict +from copy import copy, deepcopy + from tools.ROOT_utililities import get_histograms_from_files -from copy import copy -from tools.hist_utilities import prepare_histograms -from tools.plotting import make_data_mc_comparison_plot, Histogram_properties, make_shape_comparison_plot +from tools.hist_utilities import prepare_histograms, clean_control_region +from tools.file_utilities import make_folder_if_not_exists +from tools.plotting import make_data_mc_comparison_plot, Histogram_properties, make_shape_comparison_plot,\ + compare_measurements from config.latex_labels import b_tag_bins_latex from config.latex_labels import samples_latex from config import XSectionConfig @@ -16,17 +20,18 @@ muon_fit_variables = copy( common_fit_variables ) muon_fit_variables.append( 'muon_absolute_eta' ) +measurement_config = XSectionConfig( 8 ) fit_variable_properties = { - 'M3': {'min':0, 'max':600, 'rebin':4, 'x-title': 'M3 [GeV]', 'y-title': 'Events/20 GeV'}, - 'M_bl': {'min':0, 'max':400, 'rebin':4, 'x-title': 'M(b,l) [GeV]', 'y-title': 'Events/20 GeV'}, - 'angle_bl': {'min':0, 'max':4, 'rebin':1, 'x-title': 'angle(b,l)', 'y-title': 'Events/(0.1)'}, - 'electron_absolute_eta': {'min':0, 'max':2.6, 'rebin':1, 'x-title': '$\left|\eta(e)\\right|$', 'y-title': 'Events/(0.1)'}, - 'muon_absolute_eta': {'min':0, 'max':2.6, 'rebin':1, 'x-title': '$\left|\eta(\mu)\\right|$', 'y-title': 'Events/(0.1)'}, + 'M3': {'min':0, 'max':1000, 'rebin':5, 'x-title': 'M3 [GeV]', 'y-title': 'Events/25 GeV'}, + 'M_bl': {'min':0, 'max':400, 'rebin':2, 'x-title': 'M(b,l) [GeV]', 'y-title': 'Events/10 GeV'}, + 'angle_bl': {'min':0, 'max':3.5, 'rebin':2, 'x-title': 'angle(b,l)', 'y-title': 'Events/(0.2)'}, + 'electron_absolute_eta': {'min':0, 'max':2.6, 'rebin':2, 'x-title': '$\left|\eta(e)\\right|$', 'y-title': 'Events/(0.2)'}, + 'muon_absolute_eta': {'min':0, 'max':2.6, 'rebin':2, 'x-title': '$\left|\eta(\mu)\\right|$', 'y-title': 'Events/(0.2)'}, } b_tag_bin = '2orMoreBtags' +b_tag_bin_ctl = '0orMoreBtag' category = 'central' -measurement_config = XSectionConfig(8) histogram_files = { 'data' : measurement_config.data_file_electron, 'TTJet': measurement_config.ttbar_category_templates[category], @@ -40,39 +45,104 @@ variable_bins_ROOT = { 'MET': ['0-31', '31-58', '58-96', '96-142', '142-191', '191-inf'], } + +save_as = ['pdf', 'png'] +save_as = ['pdf'] def main(): - global electron_fit_variables, muon_fit_variables, fit_variable_properties, b_tag_bin, category, histogram_files, variables, variable_bins_ROOT + global electron_fit_variables, muon_fit_variables, fit_variable_properties + global b_tag_bin, category, histogram_files, variables, variable_bins_ROOT + global b_tag_bin_ctl title_template = 'CMS Preliminary, $\mathcal{L} = %.1f$ fb$^{-1}$ at $\sqrt{s}$ = %d TeV \n %s' e_title = title_template % ( measurement_config.new_luminosity / 1000., measurement_config.centre_of_mass_energy, 'e+jets, $\geq$ 4 jets' ) met_type = 'patType1CorrectedPFMet' for variable in variables: variable_bins = variable_bins_ROOT[variable] - histogram_template = '' - if variable == 'HT': - histogram_template = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/Binned_HT_Analysis/HT_bin_%(bin_range)s/%(fit_variable)s_%(b_tag_bin)s' - elif variable == 'MET': - histogram_template = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/Binned_MET_Analysis/%(met_type)s_bin_%(bin_range)s/%(fit_variable)s_%(b_tag_bin)s' - else: - histogram_template = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/Binned_%s_Analysis/%(variable)s_with_%(met_type)s_bin_%(bin_range)s/%(fit_variable)s_%(b_tag_bin)s' + histogram_template = get_histogram_template( variable ) - for bin_range in variable_bins: - for fit_variable in electron_fit_variables: + for fit_variable in electron_fit_variables: + if '_bl' in fit_variable: + b_tag_bin_ctl = '1orMoreBtag' + else: + b_tag_bin_ctl = '0orMoreBtag' + save_path = 'plots/fit_variables/' + variable + '/' + fit_variable + '/' + make_folder_if_not_exists(save_path) + make_folder_if_not_exists(save_path + 'qcd/') + make_folder_if_not_exists(save_path + 'vjets/') + for bin_range in variable_bins: params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable} fit_variable_distribution = histogram_template % params qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions' ) + qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl ) histograms = get_histograms_from_files( [fit_variable_distribution, qcd_fit_variable_distribution], histogram_files ) - plot_fit_variable( histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, e_title ) - + plot_fit_variable( histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, e_title, save_path ) + compare_qcd_control_regions(variable, met_type, e_title) + compare_vjets_btag_regions(variable, met_type, e_title) + +def get_histogram_template( variable ): + histogram_template = '' + if variable == 'HT': + histogram_template = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/Binned_HT_Analysis/HT_bin_%(bin_range)s/%(fit_variable)s_%(b_tag_bin)s' + elif variable == 'MET': + histogram_template = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/Binned_MET_Analysis/%(met_type)s_bin_%(bin_range)s/%(fit_variable)s_%(b_tag_bin)s' + else: + histogram_template = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/Binned_%s_Analysis/%(variable)s_with_%(met_type)s_bin_%(bin_range)s/%(fit_variable)s_%(b_tag_bin)s' + return histogram_template + def plot_fit_variable( histograms, fit_variable, variable, bin_range, fit_variable_distribution, qcd_fit_variable_distribution, - title ): - global fit_variable_properties, b_tag_bin + title, save_path ): + global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl mc_uncertainty = 0.10 prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale ) - qcd_from_data = histograms['data'][qcd_fit_variable_distribution].Clone() + histogram_properties = Histogram_properties() + histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title'] + histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title'] + histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']] + + histogram_lables = ['data', 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet']] + histogram_colors = ['black', 'yellow', 'green', 'magenta', 'red'] +# qcd_from_data = histograms['data'][qcd_fit_variable_distribution].Clone() + # clean against other processes + histograms_for_cleaning = {'data':histograms['data'][qcd_fit_variable_distribution], + 'V+Jets':histograms['V+Jets'][qcd_fit_variable_distribution], + 'SingleTop':histograms['SingleTop'][qcd_fit_variable_distribution], + 'TTJet':histograms['TTJet'][qcd_fit_variable_distribution]} + qcd_from_data = clean_control_region( histograms_for_cleaning, subtract = ['TTJet', 'V+Jets', 'SingleTop'] ) + + + histograms_to_draw = [histograms['data'][qcd_fit_variable_distribution], + histograms['QCD'][qcd_fit_variable_distribution], + histograms['V+Jets'][qcd_fit_variable_distribution], + histograms['SingleTop'][qcd_fit_variable_distribution], + histograms['TTJet'][qcd_fit_variable_distribution]] + + histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin_ctl] + histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_%s_QCDConversions' % b_tag_bin_ctl + make_data_mc_comparison_plot( histograms_to_draw, histogram_lables, histogram_colors, + histogram_properties, + save_folder = save_path + '/qcd/', + show_ratio = False, + save_as = save_as, + ) + + histograms_to_draw = [qcd_from_data, + histograms['QCD'][qcd_fit_variable_distribution], + ] + + histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_%s_QCDConversions_subtracted' % b_tag_bin_ctl + make_data_mc_comparison_plot( histograms_to_draw, + histogram_lables = ['data', 'QCD'], + histogram_colors = ['black', 'yellow'], + histogram_properties = histogram_properties, + save_folder = save_path + '/qcd/', + show_ratio = False, + save_as = save_as, + ) + + # scale QCD to predicted n_qcd_predicted_mc = histograms['QCD'][fit_variable_distribution].Integral() n_qcd_fit_variable_distribution = qcd_from_data.Integral() if not n_qcd_fit_variable_distribution == 0: @@ -81,30 +151,141 @@ def plot_fit_variable( histograms, fit_variable, variable, bin_range, histograms_to_draw = [histograms['data'][fit_variable_distribution], qcd_from_data, histograms['V+Jets'][fit_variable_distribution], histograms['SingleTop'][fit_variable_distribution], histograms['TTJet'][fit_variable_distribution]] - histogram_lables = ['data', 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet']] - histogram_colors = ['black', 'yellow', 'green', 'magenta', 'red'] - histogram_properties = Histogram_properties() - histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin] - histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title'] - histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title'] - histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']] + histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin + make_data_mc_comparison_plot( histograms_to_draw, + histogram_lables, + histogram_colors, + histogram_properties, + save_folder = save_path, + show_ratio = False, + save_as = save_as, + ) histogram_properties.mc_error = mc_uncertainty histogram_properties.mc_errors_label = '$\mathrm{t}\\bar{\mathrm{t}}$ uncertainty' - - make_data_mc_comparison_plot( histograms_to_draw, histogram_lables, histogram_colors, - histogram_properties, save_folder = 'plots/fit_variables/', show_ratio = False ) - histogram_properties.name += '_templates' + histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin + '_templates' + # change histogram order for better visibility + histograms_to_draw = [histograms['TTJet'][fit_variable_distribution] + histograms['SingleTop'][fit_variable_distribution], + histograms['TTJet'][fit_variable_distribution], + histograms['SingleTop'][fit_variable_distribution], + histograms['V+Jets'][fit_variable_distribution], + qcd_from_data] + histogram_lables = ['QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet'], samples_latex['TTJet'] + ' + ' + 'Single-Top'] + histogram_lables.reverse() # change QCD color to orange for better visibility - histogram_colors = ['black', 'orange', 'green', 'magenta', 'red'] - make_shape_comparison_plot( shapes = histograms_to_draw[1:], - names = histogram_lables[1:], - colours = histogram_colors[1:], - histogram_properties = histogram_properties, - fill_area = False, - alpha = 1, - save_folder = 'plots/fit_variables/') + histogram_colors = ['orange', 'green', 'magenta', 'red', 'black'] + histogram_colors.reverse() + make_shape_comparison_plot( shapes = histograms_to_draw, + names = histogram_lables, + colours = histogram_colors, + histogram_properties = histogram_properties, + fill_area = False, + alpha = 1, + save_folder = save_path, + save_as = save_as, + ) + +def compare_qcd_control_regions( variable = 'MET', met_type = 'patType1CorrectedPFMet', title = 'Untitled'): + ''' Compares the templates from the control regions in different bins + of the current variable''' + global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl + variable_bins = variable_bins_ROOT[variable] + histogram_template = get_histogram_template( variable ) + + for fit_variable in electron_fit_variables: + all_hists = {} + inclusive_hist = None + if '_bl' in fit_variable: + b_tag_bin_ctl = '1orMoreBtag' + else: + b_tag_bin_ctl = '0orMoreBtag' + save_path = 'plots/fit_variables/' + variable + '/' + fit_variable + '/' + make_folder_if_not_exists(save_path + '/qcd/') + + max_bins = 3 + for bin_range in variable_bins[0:max_bins]: + + params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable} + fit_variable_distribution = histogram_template % params + qcd_fit_variable_distribution = fit_variable_distribution.replace( 'Ref selection', 'QCDConversions' ) + qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl ) + # format: histograms['data'][qcd_fit_variable_distribution] + histograms = get_histograms_from_files( [qcd_fit_variable_distribution], histogram_files ) + prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale ) + + histograms_for_cleaning = {'data':histograms['data'][qcd_fit_variable_distribution], + 'V+Jets':histograms['V+Jets'][qcd_fit_variable_distribution], + 'SingleTop':histograms['SingleTop'][qcd_fit_variable_distribution], + 'TTJet':histograms['TTJet'][qcd_fit_variable_distribution]} + qcd_from_data = clean_control_region( histograms_for_cleaning, subtract = ['TTJet', 'V+Jets', 'SingleTop'] ) + # clean + all_hists[bin_range] = qcd_from_data + + # create the inclusive distributions + inclusive_hist = deepcopy(all_hists[variable_bins[0]]) + for bin_range in variable_bins[1:max_bins]: + inclusive_hist += all_hists[bin_range] + for bin_range in variable_bins[0:max_bins]: + if not all_hists[bin_range].Integral() == 0: + all_hists[bin_range].Scale(1/all_hists[bin_range].Integral()) + # normalise all histograms + inclusive_hist.Scale(1/inclusive_hist.Integral()) + # now compare inclusive to all bins + histogram_properties = Histogram_properties() + histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title'] + histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title'] + histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace('Events', 'a.u.') + histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']] +# histogram_properties.y_limits = [0, 0.5] + histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin_ctl] + histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin_ctl + '_QCD_template_comparison' + measurements = {bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems()} + measurements = OrderedDict(sorted(measurements.items())) + compare_measurements(models = {'inclusive' : inclusive_hist}, + measurements = measurements, + show_measurement_errors = True, + histogram_properties = histogram_properties, + save_folder = save_path + '/qcd/', + save_as = save_as) + +def compare_vjets_btag_regions( variable = 'MET', met_type = 'patType1CorrectedPFMet', title = 'Untitled'): + ''' Compares the V+Jets template in different b-tag bins''' + global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl + b_tag_bin_ctl = '0orMoreBtag' + variable_bins = variable_bins_ROOT[variable] + histogram_template = get_histogram_template( variable ) + + for fit_variable in electron_fit_variables: + if '_bl' in fit_variable: + b_tag_bin_ctl = '1orMoreBtag' + else: + b_tag_bin_ctl = '0orMoreBtag' + save_path = 'plots/fit_variables/' + variable + '/' + fit_variable + '/' + make_folder_if_not_exists(save_path + '/vjets/') + histogram_properties = Histogram_properties() + histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title'] + histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title'] + histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace('Events', 'a.u.') + histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']] + histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin_ctl] + for bin_range in variable_bins: + params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable} + fit_variable_distribution = histogram_template % params + fit_variable_distribution_ctl = fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl ) + # format: histograms['data'][qcd_fit_variable_distribution] + histograms = get_histograms_from_files( [fit_variable_distribution, fit_variable_distribution_ctl], {'V+Jets' : histogram_files['V+Jets']} ) + prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale ) + histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin_ctl + '_VJets_template_comparison' + histograms['V+Jets'][fit_variable_distribution].Scale(1/histograms['V+Jets'][fit_variable_distribution].Integral()) + histograms['V+Jets'][fit_variable_distribution_ctl].Scale(1/histograms['V+Jets'][fit_variable_distribution_ctl].Integral()) + compare_measurements(models = {'no b-tag' : histograms['V+Jets'][fit_variable_distribution_ctl]}, + measurements = {'$>=$ 2 b-tags': histograms['V+Jets'][fit_variable_distribution]}, + show_measurement_errors = True, + histogram_properties = histogram_properties, + save_folder = save_path + '/vjets/', + save_as = save_as) + if __name__ == '__main__': main() diff --git a/test/RooFitFit.py b/test/RooFitFit.py deleted file mode 100644 index 3bb77c44..00000000 --- a/test/RooFitFit.py +++ /dev/null @@ -1,77 +0,0 @@ -''' -Created on 31 Oct 2012 - -@author: kreczko -''' -import unittest -from tools.Fitting import RooFitFit -from rootpy.plotting import Hist -from math import sqrt - -import numpy as np -N_bkg1 = 9000 -N_signal = 1000 -N_bkg1_obs = 10000 -N_signal_obs = 2000 -N_data = N_bkg1_obs + N_signal_obs -mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5 -x1 = mu1 + sigma1 * np.random.randn(N_bkg1) -x2 = mu2 + sigma2 * np.random.randn(N_signal) -x1_obs = mu1 + sigma1 * np.random.randn(N_bkg1_obs) -x2_obs = mu2 + sigma2 * np.random.randn(N_signal_obs) - -class Test(unittest.TestCase): - - - def setUp(self): - - - # create histograms - h1 = Hist(100, 40, 200, title='Background') - h2 = h1.Clone(title='Signal') - h3 = h1.Clone(title='Data') - h3.markersize=1.2 - - # fill the histograms with our distributions - map(h1.Fill, x1) - map(h2.Fill, x2) - map(h3.Fill, x1_obs) - map(h3.Fill, x2_obs) - - histograms = {'signal': h2, - 'bkg1': h1, - 'data': h3} - self.roofitFitter = RooFitFit(histograms, dataLabel='data', fit_boundries = (40, 200)) - self.roofitFitter.fit() - - - def tearDown(self): - pass - - def testTemplateKeys(self): - templateKeys = sorted(self.roofitFitter.templates.keys()) - self.assertEqual(templateKeys, sorted(['signal', 'bkg1', 'data'])) - - def testNormalisation(self): - normalisation = self.roofitFitter.normalisation - self.assertAlmostEqual(normalisation["data"], N_data, delta = sqrt(N_data)) - self.assertAlmostEqual(normalisation["bkg1"], N_bkg1, delta = sqrt(N_bkg1)) - self.assertAlmostEqual(normalisation["signal"], N_signal, delta = sqrt(N_signal)) - - def testSignalResult(self): - results = self.roofitFitter.readResults() - self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1]) - self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1]) - - def testConstraints(self): - self.roofitFitter.set_fit_constraints({'signal': 0.8, 'bkg1': 0.5}) - self.roofitFitter.fit() - results = self.roofitFitter.readResults() - self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1]) - self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1]) - - - -if __name__ == "__main__": - #import sys;sys.argv = ['', 'Test.testTemplates'] - unittest.main() diff --git a/test/TMinuitFit.py b/test/TMinuitFit.py deleted file mode 100644 index 564c77cd..00000000 --- a/test/TMinuitFit.py +++ /dev/null @@ -1,71 +0,0 @@ -''' -Created on 31 Oct 2012 - -@author: kreczko -''' -import unittest -from tools.Fitting import TMinuitFit -from rootpy.plotting import Hist -from math import sqrt -from ROOT import RooFit, RooRealVar, RooDataHist, RooArgList, RooHistPdf, RooArgSet, RooAddPdf - -import numpy as np -N_bkg1 = 9000 -N_signal = 1000 -N_bkg1_obs = 10000 -N_signal_obs = 2000 -N_data = N_bkg1_obs + N_signal_obs -mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5 -x1 = mu1 + sigma1 * np.random.randn(N_bkg1) -x2 = mu2 + sigma2 * np.random.randn(N_signal) -x1_obs = mu1 + sigma1 * np.random.randn(N_bkg1_obs) -x2_obs = mu2 + sigma2 * np.random.randn(N_signal_obs) - -class Test(unittest.TestCase): - - - def setUp(self): - - - # create histograms - h1 = Hist(100, 40, 200, title='Background') - h2 = h1.Clone(title='Signal') - h3 = h1.Clone(title='Data') - h3.markersize=1.2 - - # fill the histograms with our distributions - map(h1.Fill, x1) - map(h2.Fill, x2) - map(h3.Fill, x1_obs) - map(h3.Fill, x2_obs) - - histograms = {'signal': h2, - 'bkg1': h1, - 'data': h3} - self.minuitFitter = TMinuitFit(histograms, dataLabel='data') - self.minuitFitter.fit() - - - def tearDown(self): - pass - - def testTemplateKeys(self): - templateKeys = sorted(self.minuitFitter.templates.keys()) - self.assertEqual(templateKeys, sorted(['signal', 'bkg1', 'data'])) - - def testNormalisation(self): - normalisation = self.minuitFitter.normalisation - self.assertAlmostEqual(normalisation["data"], N_data, delta = sqrt(N_data)) - self.assertAlmostEqual(normalisation["bkg1"], N_bkg1, delta = sqrt(N_bkg1)) - self.assertAlmostEqual(normalisation["signal"], N_signal, delta = sqrt(N_signal)) - - def testSignalResult(self): - results = self.minuitFitter.readResults() - self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1]) - self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1]) - - - -if __name__ == "__main__": - #import sys;sys.argv = ['', 'Test.testTemplates'] - unittest.main() diff --git a/test/tools_Fitting_FitData.py b/test/tools_Fitting_FitData.py new file mode 100644 index 00000000..d5bda19c --- /dev/null +++ b/test/tools_Fitting_FitData.py @@ -0,0 +1,169 @@ +''' +Created on 31 Oct 2012 + +@author: kreczko +''' +import unittest +from tools.Fitting import FitData, FitDataCollection +from rootpy.plotting import Hist + +import numpy as np +from tools.hist_utilities import adjust_overflow_to_limit +N_bkg1 = 9000 +N_signal = 1000 +N_bkg1_obs = 10000 +N_signal_obs = 2000 +N_data = N_bkg1_obs + N_signal_obs +mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5 +x1 = mu1 + sigma1 * np.random.randn( N_bkg1 ) +x2 = mu2 + sigma2 * np.random.randn( N_signal ) +x1_obs = mu1 + sigma1 * np.random.randn( N_bkg1_obs ) +x2_obs = mu2 + sigma2 * np.random.randn( N_signal_obs ) + +x3 = mu2 + sigma1 * np.random.randn( N_bkg1 ) +x4 = mu1 + sigma2 * np.random.randn( N_signal ) +x3_obs = mu2 + sigma1 * np.random.randn( N_bkg1_obs ) +x4_obs = mu1 + sigma2 * np.random.randn( N_signal_obs ) + +x_min = 40 +x_max = 200 + +data_scale = 1.2 +N_data = N_data * data_scale + +class Test( unittest.TestCase ): + + + def setUp( self ): + + # create histograms + h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' ) + h_signal_1 = h_bkg1_1.Clone( title = 'Signal' ) + h_data_1 = h_bkg1_1.Clone( title = 'Data' ) + h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' ) + h_signal_2 = h_bkg1_1.Clone( title = 'Signal' ) + h_data_2 = h_bkg1_1.Clone( title = 'Data' ) + + # fill the histograms with our distributions + map( h_bkg1_1.Fill, x1 ) + map( h_signal_1.Fill, x2 ) + map( h_data_1.Fill, x1_obs ) + map( h_data_1.Fill, x2_obs ) + + map( h_bkg1_2.Fill, x3 ) + map( h_signal_2.Fill, x4 ) + map( h_data_2.Fill, x3_obs ) + map( h_data_2.Fill, x4_obs ) + + h_data_1.Scale(data_scale) + h_data_2.Scale(data_scale) + + self.histograms_1 = {'signal': h_signal_1, + 'bkg1': h_bkg1_1} + + self.histograms_2 = {'signal': h_signal_2, + 'bkg1': h_bkg1_2} + + self.histograms_3 = {'var1': h_signal_1, + 'bkg1': h_bkg1_1} + + self.fit_data_1 = FitData( h_data_1, self.histograms_1, fit_boundaries = ( x_min, x_max )) + self.fit_data_2 = FitData( h_data_2, self.histograms_2, fit_boundaries = ( x_min, x_max )) + self.fit_data_3 = FitData( h_data_1, self.histograms_3, fit_boundaries = ( x_min, x_max )) + + self.collection_1 = FitDataCollection() + self.collection_1.add( self.fit_data_1, 'signal region' ) + self.collection_1.add( self.fit_data_2, 'control region' ) + self.collection_1.set_normalisation_constraints({'bkg1': 0.5}) + + self.collection_2 = FitDataCollection() + self.collection_2.add( self.fit_data_1 ) + self.collection_2.add( self.fit_data_2 ) + self.collection_2.set_normalisation_constraints({'bkg1': 0.5}) + + self.single_collection = FitDataCollection() + self.single_collection.add( self.fit_data_1 ) + self.single_collection.set_normalisation_constraints({'bkg1': 0.5}) + + self.non_simultaneous_fit_collection = FitDataCollection() + self.non_simultaneous_fit_collection.add( self.fit_data_1 ) + self.non_simultaneous_fit_collection.add( self.fit_data_3 ) + + self.h_data = h_data_1 + self.h_bkg1 = h_bkg1_1 + self.h_signal = h_signal_1 + + def tearDown( self ): + pass + + def test_is_valid_for_simultaneous_fit( self ): + self.assertTrue( self.collection_1.is_valid_for_simultaneous_fit(), msg = 'has_same_n_samples: ' + str(self.collection_1.has_same_n_samples) + ', has_same_n_data: ' + str(self.collection_1.has_same_n_data) ) + self.assertTrue( self.collection_2.is_valid_for_simultaneous_fit(), msg = 'has_same_n_samples: ' + str(self.collection_1.has_same_n_samples) + ', has_same_n_data: ' + str(self.collection_1.has_same_n_data) ) + self.assertFalse( self.non_simultaneous_fit_collection.is_valid_for_simultaneous_fit() ) + + def test_samples( self ): + samples = sorted( self.histograms_1.keys() ) + samples_from_fit_data = sorted( self.fit_data_1.samples ) + samples_from_fit_data_collection = self.collection_1.mc_samples() + self.assertEqual( samples, samples_from_fit_data ) + self.assertEqual( samples, samples_from_fit_data_collection ) + + def test_normalisation( self ): + normalisation = {name:adjust_overflow_to_limit(histogram, x_min, x_max).Integral() for name, histogram in self.histograms_1.iteritems()} + normalisation_from_fit_data = self.fit_data_1.normalisation + normalisation_from_single_collection = self.single_collection.mc_normalisation() + normalisation_from_collection = self.collection_1.mc_normalisation( 'signal region' ) + normalisation_from_collection_1 = self.collection_1.mc_normalisation()['signal region'] + for sample in normalisation.keys(): + self.assertEqual( normalisation[sample], normalisation_from_fit_data[sample] ) + self.assertEqual( normalisation[sample], normalisation_from_single_collection[sample] ) + self.assertEqual( normalisation[sample], normalisation_from_collection[sample] ) + self.assertEqual( normalisation[sample], normalisation_from_collection_1[sample] ) + + # data normalisation + normalisation = self.h_data.integral( overflow = True ) + normalisation_from_fit_data = self.fit_data_1.n_data() + normalisation_from_single_collection = self.single_collection.n_data() + normalisation_from_collection = self.collection_1.n_data( 'signal region' ) + normalisation_from_collection_1 = self.collection_1.n_data()['signal region'] + self.assertEqual( normalisation, normalisation_from_fit_data ) + self.assertEqual( normalisation, normalisation_from_single_collection ) + self.assertEqual( normalisation, normalisation_from_collection ) + self.assertEqual( normalisation, normalisation_from_collection_1 ) + + self.assertAlmostEqual(normalisation, self.collection_1.max_n_data(), delta = 1 ) + + def test_real_data( self ): + real_data = self.fit_data_1.real_data_histogram() + self.assertEqual( self.h_data.integral( overflow = True ), real_data.Integral() ) + + def test_overwrite_warning( self ): + c = FitDataCollection() + c.add( self.fit_data_1, 'var1' ) + self.assertRaises( UserWarning, c.add, ( self.fit_data_1, 'var1' ) ) + + def test_vectors( self ): + h_signal = adjust_overflow_to_limit( self.h_signal, x_min, x_max ) + h_signal.Scale(1/h_signal.Integral()) + h_bkg1 = adjust_overflow_to_limit( self.h_bkg1, x_min, x_max ) + h_bkg1.Scale(1/h_bkg1.Integral()) + signal = list( h_signal.y() ) + bkg1 = list( h_bkg1.y() ) + + v_from_fit_data = self.fit_data_1.vectors + v_from_single_collection = self.single_collection.vectors() +# v_from_collection = self.collection_1.vectors( 'signal region' ) +# v_from_collection_1 = self.collection_1.vectors()['signal region'] + self.assertEqual(signal, v_from_fit_data['signal']) + self.assertEqual(bkg1, v_from_fit_data['bkg1']) + + self.assertEqual(signal, v_from_single_collection['signal']) + self.assertEqual(bkg1, v_from_single_collection['bkg1']) + + def test_constraints(self): + constraint_from_single_collection = self.single_collection.constraints()['bkg1'] + self.assertEqual(0.5, constraint_from_single_collection) + +if __name__ == "__main__": + # import sys;sys.argv = ['', 'Test.testTemplates'] + unittest.main() diff --git a/test/tools_Fitting_Minuit.py b/test/tools_Fitting_Minuit.py new file mode 100644 index 00000000..99a936a2 --- /dev/null +++ b/test/tools_Fitting_Minuit.py @@ -0,0 +1,136 @@ +''' +Created on 31 Oct 2012 + +@author: kreczko +''' +import unittest +from tools.Fitting import Minuit, FitData, FitDataCollection +from rootpy.plotting import Hist +from math import sqrt + +import numpy as np +N_bkg1 = 9000 +N_signal = 1000 +N_bkg1_obs = 10000 +N_signal_obs = 2000 +N_data = N_bkg1_obs + N_signal_obs +mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5 +x1 = mu1 + sigma1 * np.random.randn( N_bkg1 ) +x2 = mu2 + sigma2 * np.random.randn( N_signal ) +x1_obs = mu1 + sigma1 * np.random.randn( N_bkg1_obs ) +x2_obs = mu2 + sigma2 * np.random.randn( N_signal_obs ) + +x3 = mu2 + sigma1 * np.random.randn( N_bkg1 ) +x4 = mu1 + sigma2 * np.random.randn( N_signal ) +x3_obs = mu2 + sigma1 * np.random.randn( N_bkg1_obs ) +x4_obs = mu1 + sigma2 * np.random.randn( N_signal_obs ) + +data_scale = 1.2 +N_data = N_data * data_scale + +class Test( unittest.TestCase ): + + + def setUp( self ): + + + # create histograms + h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' ) + h_signal_1 = h_bkg1_1.Clone( title = 'Signal' ) + h_data_1 = h_bkg1_1.Clone( title = 'Data' ) + h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' ) + h_signal_2 = h_bkg1_1.Clone( title = 'Signal' ) + h_data_2 = h_bkg1_1.Clone( title = 'Data' ) + + # fill the histograms with our distributions + map( h_bkg1_1.Fill, x1 ) + map( h_signal_1.Fill, x2 ) + map( h_data_1.Fill, x1_obs ) + map( h_data_1.Fill, x2_obs ) + + map( h_bkg1_2.Fill, x3 ) + map( h_signal_2.Fill, x4 ) + map( h_data_2.Fill, x3_obs ) + map( h_data_2.Fill, x4_obs ) + + h_data_1.Scale( data_scale ) + h_data_2.Scale( data_scale ) + + histograms_1 = {'signal': h_signal_1, + 'bkg1': h_bkg1_1} + + histograms_2 = {'signal': h_signal_2, + 'bkg1': h_bkg1_2} + + fit_data_1 = FitData( h_data_1, histograms_1, fit_boundaries = ( 40, 200 ) ) + fit_data_2 = FitData( h_data_2, histograms_2, fit_boundaries = ( 40, 200 ) ) + + single_fit_collection = FitDataCollection() + single_fit_collection.add( fit_data_1 ) + + collection_1 = FitDataCollection() + collection_1.add( fit_data_1, 'var1' ) + collection_1.add( fit_data_2, 'var2' ) + + collection_2 = FitDataCollection() + collection_2.add( fit_data_1, 'var1' ) + collection_2.add( fit_data_2, 'var2' ) + collection_2.set_normalisation_constraints( {'bkg1':0.5} ) + + collection_3 = FitDataCollection() + collection_3.add( fit_data_1, 'var1' ) + collection_3.add( fit_data_2, 'var2' ) + collection_3.set_normalisation_constraints( {'bkg1':0.001} ) + + self.minuit_fitter = Minuit( single_fit_collection ) + self.minuit_fitter.fit() + + self.simultaneous_fit = Minuit( collection_1 ) + self.simultaneous_fit.fit() + + + self.simultaneous_fit_with_constraints = Minuit( collection_2 ) + self.simultaneous_fit_with_constraints.fit() + + self.simultaneous_fit_with_bad_constraints = Minuit( collection_3 ) + self.simultaneous_fit_with_bad_constraints.fit() + + + def tearDown( self ): + pass + + def test_normalisation( self ): + normalisation = self.minuit_fitter.normalisation + self.assertAlmostEqual( normalisation["data"], N_data, delta = sqrt( N_data ) ) + self.assertAlmostEqual( normalisation["bkg1"], N_bkg1, delta = sqrt( N_bkg1 ) ) + self.assertAlmostEqual( normalisation["signal"], N_signal, delta = sqrt( N_signal ) ) + + def test_result( self ): + results = self.minuit_fitter.readResults() + self.assertAlmostEqual( N_signal_obs * data_scale, results['signal'][0], delta = 2 * results['signal'][1] ) + self.assertAlmostEqual( N_bkg1_obs * data_scale, results['bkg1'][0], delta = 2 * results['bkg1'][1] ) + + def test_result_simultaneous( self ): + results = self.simultaneous_fit.readResults() + self.assertAlmostEqual( N_signal_obs * data_scale, results['signal'][0], delta = 2 * results['signal'][1] ) + self.assertAlmostEqual( N_bkg1_obs * data_scale, results['bkg1'][0], delta = 2 * results['bkg1'][1] ) + + def test_result_simultaneous_with_constraints( self ): + results = self.simultaneous_fit_with_constraints.readResults() + self.assertAlmostEqual( N_signal_obs * data_scale, results['signal'][0], delta = 2 * results['signal'][1] ) + self.assertAlmostEqual( N_bkg1_obs * data_scale, results['bkg1'][0], delta = 2 * results['bkg1'][1] ) + + def test_result_simultaneous_with_bad_constraints( self ): + results = self.simultaneous_fit_with_bad_constraints.readResults() + self.assertNotAlmostEqual( N_signal_obs * data_scale, results['signal'][0], delta = results['signal'][1] ) + self.assertNotAlmostEqual( N_bkg1_obs * data_scale, results['bkg1'][0], delta = results['bkg1'][1] ) + + def test_relative_error( self ): + results = self.minuit_fitter.readResults() + self.assertLess( results['signal'][1] / results['signal'][0], 0.1 ) + self.assertLess( results['bkg1'][1] / results['bkg1'][0], 0.1 ) + + +if __name__ == "__main__": + # import sys;sys.argv = ['', 'Test.testTemplates'] + unittest.main() diff --git a/test/tools_Fitting_RooFitFit.py b/test/tools_Fitting_RooFitFit.py new file mode 100644 index 00000000..676c112f --- /dev/null +++ b/test/tools_Fitting_RooFitFit.py @@ -0,0 +1,80 @@ +''' +Created on 31 Oct 2012 + +@author: kreczko +''' +import unittest +from tools.Fitting import RooFitFit, FitData, FitDataCollection +from rootpy.plotting import Hist +from math import sqrt + +import numpy as np +N_bkg1 = 9000 +N_signal = 1000 +N_bkg1_obs = 10000 +N_signal_obs = 2000 +N_data = N_bkg1_obs + N_signal_obs +mu1, mu2, sigma1, sigma2 = 100, 140, 15, 5 +x1 = mu1 + sigma1 * np.random.randn(N_bkg1) +x2 = mu2 + sigma2 * np.random.randn(N_signal) +x1_obs = mu1 + sigma1 * np.random.randn(N_bkg1_obs) +x2_obs = mu2 + sigma2 * np.random.randn(N_signal_obs) + +class Test(unittest.TestCase): + + def setUp(self): + # create histograms + h_bkg1_1 = Hist(100, 40, 200, title='Background') + h_signal_1 = h_bkg1_1.Clone(title='Signal') + h_data_1 = h_bkg1_1.Clone(title='Data') + + # fill the histograms with our distributions + map(h_bkg1_1.Fill, x1) + map(h_signal_1.Fill, x2) + map(h_data_1.Fill, x1_obs) + map(h_data_1.Fill, x2_obs) + + histograms_1 = {'signal': h_signal_1, + 'bkg1': h_bkg1_1, +# 'data': h_data_1 + } + fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200)) + self.single_fit_collection = FitDataCollection() + self.single_fit_collection.add( fit_data_1 ) + +# self.roofitFitter = RooFitFit(histograms_1, dataLabel='data', fit_boundries=(40, 200)) + self.roofitFitter = RooFitFit(self.single_fit_collection) + + def tearDown(self): + pass + + def test_normalisation(self): + normalisation = self.roofitFitter.normalisation + self.assertAlmostEqual(normalisation["data"], N_data, delta=sqrt(N_data)) + self.assertAlmostEqual(normalisation["bkg1"], N_bkg1, delta=sqrt(N_bkg1)) + self.assertAlmostEqual(normalisation["signal"], N_signal, delta=sqrt(N_signal)) + + def test_signal_result(self): + self.roofitFitter.fit() + results = self.roofitFitter.readResults() + self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1]) + self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1]) + + def test_constraints(self): + self.single_fit_collection.set_normalisation_constraints({'signal': 0.8, 'bkg1': 0.5}) + self.roofitFitter = RooFitFit(self.single_fit_collection) +# self.roofitFitter.set_fit_constraints({'signal': 0.8, 'bkg1': 0.5}) + self.roofitFitter.fit() + results = self.roofitFitter.readResults() + self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1]) + self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1]) + +# def test_relative_error(self): +# results = self.roofitFitter.readResults() +# self.roofitFitter.saved_result.Print("v"); +# self.assertLess(results['signal'][1]/results['signal'][0], 0.1) +# self.assertLess(results['bkg1'][1]/results['bkg1'][0], 0.1) + +if __name__ == "__main__": + # import sys;sys.argv = ['', 'Test.testTemplates'] + unittest.main() diff --git a/test/tools_hist_utilities.py b/test/tools_hist_utilities.py index 122ac245..69f8c2ec 100644 --- a/test/tools_hist_utilities.py +++ b/test/tools_hist_utilities.py @@ -5,18 +5,24 @@ ''' from __future__ import division import unittest -from rootpy.plotting import Hist2D -from tools.hist_utilities import rebin_2d +from rootpy.plotting import Hist, Hist2D +from tools.hist_utilities import rebin_2d, adjust_overflow_to_limit, hist_to_value_error_tuplelist import numpy as np +N_bkg1 = 9000 +mu1, sigma1 = 50, 20 +x1 = mu1 + sigma1 * np.random.randn( N_bkg1 ) class Test( unittest.TestCase ): def setUp( self ): # create histograms + self.h1 = Hist( 100, 0, 100, title = '1D' ) self.h2 = Hist2D( 60, 40, 100, 60, 40, 100 ) + self.simple = Hist( 100, 0, 100, title = '1D' ) # fill the histograms with our distributions + map( self.h1.Fill, x1 ) self.h2.fill_array( np.random.multivariate_normal( mean = ( 50, 50 ), cov = np.arange( 4 ).reshape( 2, 2 ), @@ -62,6 +68,9 @@ def setUp( self ): # [2, 0, 0] self.bin_edges_not_on_boundaries = [0, 3.5, 6, 20] self.result_not_on_boundaries = [2, 1, 2] + + for i in range(100): + self.simple.Fill(i, 1) def tearDown( self ): pass @@ -105,5 +114,48 @@ def test_rebin_2d_not_on_boundaries( self ): self.result_not_on_boundaries[i - 1], 'histogram contents do not match' ) + + def test_adjust_overflow_to_limit_simple( self ): + x_min = 0 + x_max = 95 + adjusted = adjust_overflow_to_limit(self.simple, x_min, x_max) +# for entry_1, entry_2 in zip(hist_to_value_error_tuplelist(self.simple), hist_to_value_error_tuplelist(adjusted)): +# print entry_1, entry_2 +# print self.simple.integral( overflow = True ), adjusted.integral() +# print self.simple.GetBinContent(1), self.simple.GetBinContent(self.simple.nbins()) + # number of events should be unchanged + # the adjusted histogram should have no overflow for this example + self.assertEqual( self.simple.integral( overflow = True ), adjusted.integral() ) + # first bin (x_min) should contain all events + # with x <= x_min + x_min_bin = self.simple.FindBin(x_min) + x_max_bin = self.simple.FindBin(x_max) + self.assertEqual(self.simple.integral(0, x_min_bin), + adjusted.GetBinContent(x_min_bin)) + # last bin (x_max) should contain all events + # with x >= x_max + self.assertEqual( self.simple.integral( x_max_bin, self.simple.nbins() + 1), + adjusted.GetBinContent( x_max_bin ) ) + + + def test_adjust_overflow_to_limit( self ): + x_min = 40 + x_max = 80 + adjusted = adjust_overflow_to_limit(self.h1, x_min, x_max) + # number of events should be unchanged + # the adjusted histogram should have no overflow for this example + self.assertEqual(self.h1.integral( overflow = True ), adjusted.Integral()) + # first bin (x_min) should contain all events + # with x <= x_min + x_min_bin = self.h1.FindBin(x_min) + x_max_bin = self.h1.FindBin(x_max) + self.assertEqual(self.h1.integral(0, x_min_bin), + adjusted.GetBinContent(x_min_bin)) + # last bin (x_max) should contain all events + # with x >= x_max + self.assertEqual( self.h1.integral( x_max_bin, self.h1.nbins() + 1 ), + adjusted.GetBinContent( x_max_bin ) ) + + if __name__ == "__main__": unittest.main() diff --git a/tools/Fitting.py b/tools/Fitting.py index 15417f41..7cb58736 100644 --- a/tools/Fitting.py +++ b/tools/Fitting.py @@ -5,127 +5,225 @@ ''' from ROOT import TMinuit, TMath, Long, Double +from ROOT import RooFit, RooRealVar, RooDataHist, RooArgList, RooHistPdf, \ +RooArgSet, RooAddPdf, RooMsgService, RooProdPdf, RooGaussian, RooLinkedList, \ +RooCategory, RooSimultaneous, RooDataSet, RooRealSumPdf, RooHistFunc from array import array import math import logging -from ROOT import RooFit, RooRealVar, RooDataHist, RooArgList, RooHistPdf, RooArgSet, RooAddPdf, RooMsgService -from ROOT import RooProdPdf, RooGaussian, RooLinkedList +from hist_utilities import adjust_overflow_to_limit +import rootpy.stl as stl +from copy import deepcopy # RooFit is really verbose. Make it stop RooMsgService.instance().setGlobalKillBelow( RooFit.FATAL ) # from scipy.optimize import curve_fit +# Create python mappings to map and pair -class TemplateFit(): - def __init__( self, histograms, data_label = 'data' ): - self.performedFit = False - self.results = {} - self.normalisation = {} - self.normalisation_errors = {} - self.data_label = data_label - self.histograms = histograms - self.templates = {} - self.module = None - # samples = all inputs except data - keys = sorted( histograms.keys() ) - keys.remove( data_label ) +def generate_templates_and_normalisation( histograms ): + normalisation = {} + normalisation_errors = {} + templates = {} + for sample, histogram in histograms.iteritems(): + normalisation[sample] = histogram.Integral() + normalisation_errors[sample] = sum( histogram.yerravg() ) + temp = histogram.Clone( sample + '_' + 'template' ) + nEntries = temp.Integral() + if not nEntries == 0: + temp.Scale( 1 / nEntries ) + templates[sample] = temp + return templates, normalisation, normalisation_errors + +def vectorise( histograms ): + values = {} + errors = {} + for sample in histograms.keys(): + hist = histograms[sample] + nBins = hist.GetNbinsX() + for bin_i in range( 1, nBins + 1 ): + if not values.has_key( sample ): + values[sample] = [] + errors[sample] = [] + values[sample].append( hist.GetBinContent( bin_i ) ) + errors[sample].append( hist.GetBinError( bin_i ) ) + return values, errors + +class FitData(): + data_label = 'data' + + def __init__( self, real_data, mc_histograms, fit_boundaries ): + self.histograms_ = deepcopy( mc_histograms ) + self.histograms_[FitData.data_label] = deepcopy( real_data ) + # put all events outside the range into the + # first or last bin + for sample, histogram in self.histograms_.iteritems(): + self.histograms_[sample] = adjust_overflow_to_limit( histogram, fit_boundaries[0], fit_boundaries[1] ) + self.fit_boundaries = fit_boundaries + + keys = sorted( mc_histograms.keys() ) self.samples = keys - - self.templates, self.normalisation, self.normalisation_errors = TemplateFit.generateTemplatesAndNormalisation( histograms ) - self.vectors, self.errors = TemplateFit.vectorise( self.templates ) - self.param_indices = {} - # check for consistency - # vectors and templates all same size!! - data_length = len( self.vectors[data_label] ) - error = False - for sample in self.vectors.keys(): - current_length = len( self.vectors[sample] ) - if not data_length == current_length: - # TODO: This should be a while loop - if current_length < data_length: - for entry in range( current_length, data_length ): - self.vectors[sample].append( 0. ) - self.errors[sample].append( 0. ) - else: - print 'Error:' - print 'Sample "', sample, '" has different length' - print 'Expected:', data_length, ', found:', current_length - error = True - if error: - import sys - sys.exit( -1 ) - - def readResults( self ): - return self.results - - @staticmethod - def generateTemplatesAndNormalisation( histograms ): - normalisation = {} - normalisation_errors = {} - templates = {} - for sample, histogram in histograms.iteritems(): - normalisation[sample] = histogram.Integral() - normalisation_errors[sample] = sum( histogram.yerravg() ) - temp = histogram.Clone( sample + '_' + 'template' ) - nEntries = temp.Integral() - if not nEntries == 0: - temp.Scale( 1 / nEntries ) - templates[sample] = temp - return templates, normalisation, normalisation_errors - - @staticmethod - def vectorise( histograms ): - values = {} - errors = {} - for sample in histograms.keys(): - hist = histograms[sample] - nBins = hist.GetNbinsX() - for bin_i in range( 1, nBins + 1 ): - if not values.has_key( sample ): - values[sample] = [] - errors[sample] = [] - values[sample].append( hist.GetBinContent( bin_i ) ) - errors[sample].append( hist.GetBinError( bin_i ) ) - return values, errors - -class TMinuitFit( TemplateFit ): - + self.templates, self.normalisation, self.normalisation_errors = generate_templates_and_normalisation( self.histograms_ ) + + self.vectors, self.errors = vectorise( self.templates ) + + def n_data( self ): + return self.normalisation[FitData.data_label] + + def real_data_histogram( self ): + return self.histograms_[FitData.data_label] + + def real_data_roofit_histogram( self ): + return self.roofit_histograms[self.data_label] + +class FitDataCollection(): + default_name_prefix = 'var' + def __init__( self ): + self.fit_data = {} + self.n_fit_data = 0 + self.normalisation = {} + self.constraint_type = '' + self.constraints_ = {} + + def add( self, fit_data, name = '' ): + if name == '': + name = self.default_name_prefix + str( self.n_fit_data ) + + if self.fit_data.has_key( name ): + raise UserWarning( name + 'is already registered with this collection. It will be overwritten!' ) + self.normalisation[name] = fit_data.normalisation + self.fit_data[name] = fit_data + self.n_fit_data += 1 + + def is_valid_for_simultaneous_fit( self ): + if self.n_fit_data == 1: + return True + + data_normalisation = [] + n_samples = [] + for _, data in self.fit_data.iteritems(): + data_normalisation.append( data.n_data() ) + n_samples.append( len( data.samples ) ) + # check if all are the same + has_same_n_data = reduce( lambda x, y: x if round( abs( x - y ) ) <= ((x + y)/2 * 0.01) else False, data_normalisation ) != False + mc_samples = self.mc_samples() + n_samples.append( len( mc_samples ) ) + has_same_n_samples = reduce( lambda x, y: x if x == y else False, n_samples ) != False + self.has_same_n_data = has_same_n_data + self.has_same_n_samples = has_same_n_samples + + return has_same_n_data and has_same_n_samples + + def mc_samples( self ): + samples = [] + for _, data in self.fit_data.iteritems(): + samples.extend( data.samples ) + return sorted( set( samples ) ) + + def mc_normalisation( self, name = '' ): + if self.n_fit_data == 1: + return self.fit_data.items()[0][1].normalisation + if not name == '': + return self.fit_data[name].normalisation + else: + normalisation = {} + for dist, data in self.fit_data.iteritems(): + normalisation[dist] = data.normalisation + return normalisation + + def mc_normalisation_errors( self, name = '' ): + if self.n_fit_data == 1: + return self.fit_data.items()[0][1].normalisation_errors + if not name == '': + return self.fit_data[name].normalisation_errors + else: + normalisation_errors = {} + for dist, data in self.fit_data.iteritems(): + normalisation_errors[dist] = data.normalisation_errors + return normalisation_errors + + def n_data( self, name = '' ): + if self.n_fit_data == 1: + return self.fit_data.items()[0][1].n_data() + if not name == '': + return self.fit_data[name].n_data() + else: + normalisation = {} + for dist, data in self.fit_data.iteritems(): + normalisation[dist] = data.n_data() + return normalisation + + def max_n_data( self ): + n_data = self.n_data() + if type( n_data ) == dict: + return max( [n for _, n in n_data.iteritems()] ) + else: + return n_data + + def vectors( self, name = '' ): + if self.n_fit_data == 1: + return self.fit_data.items()[0][1].vectors + if not name == '': + return self.fit_data[name].vectors + else: + vectors = {} + for dist, data in self.fit_data.iteritems(): + vectors[dist] = data.vectors + return vectors + + def set_normalisation_constraints( self, constraints ): + self.constraint_type = 'normalisation' + self.constraints_ = constraints + + def constraints( self ): + return self.constraints_ + +class Minuit(): + fitfunction = None - - def __init__( self, histograms = {}, dataLabel = 'data', method = 'logLikelihood', verbose = False ): - TemplateFit.__init__( self, histograms, dataLabel ) + + def __init__( self, fit_data_collection, method = 'logLikelihood', verbose = False ): + # only simultaneous fit data is supported! + assert( fit_data_collection.is_valid_for_simultaneous_fit() ) self.method = method self.logger = logging.getLogger( 'TMinuitFit' ) self.constraints = {} self.constraint_type = '' self.verbose = verbose # prints the correlation matrix, fit info - + self.fit_data_collection = fit_data_collection + self.samples = fit_data_collection.mc_samples() + self.normalisation = fit_data_collection.mc_normalisation() + self.n_distributions = fit_data_collection.n_fit_data + self.distributions = fit_data_collection.fit_data.keys() + self.vectors = fit_data_collection.vectors() + self.param_indices = {} + def fit( self ): numberOfParameters = len( self.samples ) gMinuit = TMinuit( numberOfParameters ) if self.method == 'logLikelihood': # set function for minimisation gMinuit.SetFCN( self.logLikelihood ) - + # set Minuit print level - # printlevel = -1 quiet (also suppresse all warnings) + # printlevel = -1 quiet (also suppress all warnings) # = 0 normal # = 1 verbose - # = 2 additional output giving intermediate results. - # = 3 maximum output, showing progress of minimizations. + # = 2 additional output giving intermediate results. + # = 3 maximum output, showing progress of minimizations. gMinuit.SetPrintLevel( -1 ) - + # Error definition: 1 for chi-squared, 0.5 for negative log likelihood # SETERRDEF: Sets the value of UP (default value= 1.), defining parameter errors. # Minuit defines parameter errors as the change in parameter value required to change the function value by UP. # Normally, for chisquared fits UP=1, and for negative log likelihood, UP=0.5. gMinuit.SetErrorDef( 1 ) - + # error flag for functions passed as reference.set to as 0 is no error errorFlag = Long( 2 ) - - N_total = self.normalisation[self.data_label] * 2 + + N_total = self.fit_data_collection.max_n_data() * 2 N_min = 0 - + param_index = 0 - + # MNPARM # Implements one parameter definition: # mnparm(k, cnamj, uk, wk, a, b, ierflg) @@ -138,25 +236,23 @@ def fit( self ): # Output: IERFLG =0 if no problems # >0 if MNPARM unable to implement definition for sample in self.samples: # all samples but data - gMinuit.mnparm( param_index, sample, self.normalisation[sample], 10.0, N_min, N_total, errorFlag ) + if self.n_distributions > 1: + gMinuit.mnparm( param_index, sample, self.normalisation[self.distributions[0]][sample], 10.0, N_min, N_total, errorFlag ) + else: + gMinuit.mnparm( param_index, sample, self.normalisation[sample], 10.0, N_min, N_total, errorFlag ) param_index += 1 - - -# N_signal = self.normalisation['signal'] -# gMinuit.mnparm(0, "N_signal(ttbar+single_top)", N_signal, 10.0, N_min, N_total, errorFlag) -# gMinuit.mnparm(1, "bkg1", 10, 10.0, N_min, N_total, errorFlag) - + arglist = array( 'd', 10 * [0.] ) - + # minimisation strategy: 1 standard, 2 try to improve minimum (a bit slower) arglist[0] = 2 - + # minimisation itself # SET STRategy: Sets the strategy to be used in calculating first and second derivatives and in certain minimization methods. # In general, low values of mean fewer function calls and high values mean more reliable minimization. - # Currently allowed values are 0, 1 (default), and 2. + # Currently allowed values are 0, 1 (default), and 2. gMinuit.mnexcm( "SET STR", arglist, 1, errorFlag ) - + gMinuit.Migrad() if self.verbose: @@ -164,10 +260,10 @@ def fit( self ): self.module = gMinuit self.performedFit = True - + if not self.module: raise Exception( 'No fit results available. Please run fit method first' ) - + results = {} param_index = 0 for sample in self.samples: @@ -177,90 +273,107 @@ def fit( self ): if ( math.isnan( temp_err ) ): self.logger.warning( 'Template fit error is NAN, setting to sqrt(N).' ) temp_err = math.sqrt( temp_par ) - + results[sample] = ( temp_par, temp_err ) param_index += 1 - + self.results = results - + def logLikelihood( self, nParameters, gin, f, par, iflag ): # nParameters= number of free parameters involved in the minimisation # gin = computed gradient values (optional) # f = the function value itself # par = vector of constant and variable parameters # flag = to switch between several actions of FCN - + + lnL = 0.0 + if self.n_distributions > 1: + lnL = sum( [self.build_single_LL( self.vectors[distribution][FitData.data_label], + self.vectors[distribution], + self.normalisation[distribution], + par ) for distribution in self.distributions] ) + else: + lnL = self.build_single_LL( self.vectors[FitData.data_label], + self.vectors, + self.normalisation, + par ) + + f[0] = -2.0 * lnL + + # Adding the QCD and V+jets constraints + if self.fit_data_collection.constraint_type == 'normalisation': + f[0] += self.get_fit_normalisation_constraints( par ) + + def build_single_LL( self, data_vector, mc_vectors, normalisation, par ): lnL = 0.0 - data_vector = self.vectors[self.data_label] - - vector_entry = 0 - for data in data_vector: + for vector_entry, data_i in enumerate( data_vector ): x_i = 0 -# sumxerrorssquared_i = 0 param_index = 0 for sample in self.samples: - x_i += par[param_index] * self.vectors[sample][vector_entry] -# sumxerrorssquared_i += par[param_index] * self.errors[sample][vector_entry]**2 + x_i += par[param_index] * mc_vectors[sample][vector_entry] self.param_indices[sample] = param_index param_index += 1 - data_i = self.normalisation[self.data_label] * data - if not data == 0 and not x_i == 0: - L = TMath.Poisson( data_i, x_i ) -# L = TMath.Gaus(data_i, x_i, ((data_i + sumxerrorssquared_i)**0.5)) -# L = TMath.Gaus(data_i, x_i, (x_i**0.5)) - lnL += math.log( L ) - - - - vector_entry += 1 - - f[0] = -2.0 * lnL - - # Adding the QCD and V+jets constraints - if self.constraint_type == 'normalisation': - f[0] += self.get_fit_normalisation_constraints( par ) + data_term = normalisation[FitData.data_label] * data_i + if not data_term == 0 and not x_i == 0: + L = TMath.Poisson( data_term, x_i ) + if not L == 0: + lnL += math.log( L ) + return lnL - # constraints = {sample: constraint} - def set_fit_constraints( self, constraints, constraint_type = 'normalisation' ): - self.constraints = constraints - self.constraint_type = constraint_type - def get_fit_normalisation_constraints( self, params ): + ''' Only valid for simultanous fit for now''' result = 0 - for sample, constraint in self.constraints.iteritems(): - if self.normalisation[sample] != 0: - result += ( params[self.param_indices[sample]] - self.normalisation[sample] ) ** 2 / ( constraint * self.normalisation[sample] ) ** 2 + for sample, constraint in self.fit_data_collection.constraints().iteritems(): + normalisation = 0 + if self.n_distributions > 1: + normalisation = self.normalisation.items()[0][1][sample] + else: + normalisation = self.normalisation[sample] + + if normalisation != 0: + result += ( params[self.param_indices[sample]] - normalisation ) ** 2 / ( constraint * normalisation ) ** 2 return result -# This class name won't cause any confusion, right? -class RooFitFit( TemplateFit ): - - def __init__( self, histograms = {}, dataLabel = 'data', method = 'TMinuit', fit_boundries = ( 0., 2.4 ) ): + def readResults( self ): + return self.results + +# This class name won't cause any confusion, right? +class RooFitFit(): + + def __init__( self, fit_data_collection, method = 'TMinuit' ): RooMsgService.instance().setGlobalKillBelow( RooFit.FATAL ) - TemplateFit.__init__( self, histograms, dataLabel ) self.method = method self.logger = logging.getLogger( 'RooFit' ) - self.fit_boundries = fit_boundries - self.constraints = {} + self.constraints = fit_data_collection.constraints() self.constraint_type = '' self.saved_result = None - + + self.fit_data_collection = fit_data_collection + self.samples = fit_data_collection.mc_samples() + self.normalisation = fit_data_collection.mc_normalisation() + + self.fit_data_1 = self.fit_data_collection.fit_data.items()[0][1] + self.fit_boundaries = self.fit_data_1.fit_boundaries + self.data_label = FitData.data_label + self.histograms = self.fit_data_1.histograms_ + def fit( self ): - fit_variable = RooRealVar( "fit_variable", "fit_variable", self.fit_boundries[0], self.fit_boundries[1] ) + fit_variable = RooRealVar( "fit_variable", "fit_variable", self.fit_boundaries[0], self.fit_boundaries[1] ) + fit_variable.setBins( self.histograms[self.data_label].nbins() ) variables = RooArgList() variables.add( fit_variable ) variable_set = RooArgSet() variable_set.add( fit_variable ) - n_event_obs = self.histograms[self.data_label].Integral() + roofit_histograms = {} roofit_pdfs = {} roofit_variables = {} - - N_total = self.normalisation[self.data_label] * 2 - N_min = 0 + + N_total = self.normalisation[self.data_label] * 2. + N_min = 0. pdf_arglist = RooArgList() variable_arglist = RooArgList() - + roofit_histograms[self.data_label] = RooDataHist( self.data_label, self.data_label, variables, @@ -268,18 +381,20 @@ def fit( self ): for sample in self.samples: roofit_histogram = RooDataHist( sample, sample, variables, self.histograms[sample] ) roofit_histograms[sample] = roofit_histogram - roofit_pdf = RooHistPdf ( 'pdf' + sample, 'pdf' + sample, variable_set, roofit_histogram, 0 ) + roofit_pdf = RooHistPdf ( 'pdf' + sample, 'pdf' + sample, variable_set, roofit_histogram) roofit_pdfs[sample] = roofit_pdf - roofit_variable = RooRealVar( sample, "number of " + sample + " events", self.normalisation[sample], N_min, N_total, "event" ) + roofit_variable = RooRealVar( sample, sample + " events", + self.normalisation[sample], + N_min, + N_total ) roofit_variables[sample] = roofit_variable pdf_arglist.add( roofit_pdf ) variable_arglist.add( roofit_variable ) - - model = RooAddPdf( "model", - "sum of all known", - pdf_arglist, - variable_arglist - ) + + model = RooAddPdf( 'model', + 'sum of all known', + pdf_arglist, + variable_arglist) use_model = model if self.constraints: arg_set = RooArgSet( model ) @@ -287,31 +402,27 @@ def fit( self ): for constraint in constraints: arg_set.add( constraint ) model_with_constraints = RooProdPdf( "model_with_constraints", - "model with gaussian constraints", + "model with gaussian constraints", arg_set, RooLinkedList() ) use_model = model_with_constraints - + if self.method == 'TMinuit': #WARNING: number of cores changes the results!!! - self.saved_result = use_model.fitTo( + self.saved_result = use_model.fitTo( roofit_histograms[self.data_label], RooFit.Minimizer( "Minuit2", "Migrad" ), RooFit.NumCPU( 1 ), RooFit.Extended(), RooFit.Save(), - ) + ) + results = {} for sample in self.samples: - results[sample] = ( roofit_variables[sample].getVal(), roofit_variables[sample].getError() ) + results[sample] = ( roofit_variables[sample].getVal(), roofit_variables[sample].getError() ) self.results = results - - # constraints = {sample: constraint} - def set_fit_constraints( self, constraints, constraint_type = 'normalisation' ): - self.constraints = constraints - self.constraint_type = constraint_type - + def get_fit_normalisation_constraints( self, model, roofit_variables ): result = [] for sample, constraint in self.constraints.iteritems(): @@ -324,26 +435,104 @@ def get_fit_normalisation_constraints( self, model, roofit_variables ): ) result.append( roo_constraint ) return result - -# class CurveFit(): -# defined_functions = ['gaus', 'gauss'] -# -# @staticmethod -# def gauss(x, *p): -# A, mu, sigma = p -# return A*numpy.exp(-(x-mu)**2/(2.*sigma**2)) -# -# @staticmethod -# def fit(hist, function, params): -# if type(function) == type('s'):#string input -# if not function in CurveFit.defined_functions: -# raise Exception('Unknown function') -# if function == 'gaus' or function == 'gauss': -# function = CurveFit.gauss -# x,y = list(hist.x()), list(hist.y()) -# -# coeff, var_matrix = curve_fit(function, x, y, p0=params) -# print coeff, var_matrix -# #this should be transferred into rootpy.TF1 -# hist_fit = function(x, *coeff) -# return hist_fit, x + + def readResults( self ): + return self.results + +# experimental +class Observable(): + def __init__( self, name, title, value, min_value, max_value, unit ): + assert( value >= min_value ) + assert( value <= max_value ) + self.value = value + self.min_value = min_value + self.max_value = max_value + self.name = name + self.title = title + self.roofit = RooRealVar( name, title, value, min_value, max_value ) # , unit ) + + def roofit_object( self ): + return self.roofit + +# experimental +class SimultaneousFit(): + ''' A fit that performs a simultaneous fit in more than one variable. + It expects the input of fit_data which is a dictionary of the form + {variable_name: FitData()}''' + def __init__( self, fit_data ): + MapStrRootPtr = stl.map( stl.string, "TH1*" ) + StrHist = stl.pair( stl.string, "TH1*" ) + self.fit_data = fit_data + self.models = {} + self.sample = RooCategory( 'sample', 'sample' ) + self.roofit_variables = [] + input_hists = MapStrRootPtr() + + # first create observables + # Since we are looking for normalisation in equivalent regions + # the number of events in each sample has to be identical + # Hence, pick one fit_data to create the set of observables + fit_data_1 = fit_data.itervalues().next() + samples = fit_data_1.samples + self.observables = {} + N_min = 0 + N_total = fit_data_1.n_data() * 2 + for sample in samples: + self.observables[sample] = Observable( 'n_' + sample, + 'number of ' + sample + " events", + fit_data_1.normalisation[sample], + N_min, + N_total, + "events" ) + + # next create the models + for variable, fit_input in fit_data.iteritems(): + self.models[variable] = fit_input.get_roofit_model( variable, self.observables ) + self.sample.defineType( variable ) + self.sample.setLabel ( variable ) + data = deepcopy( fit_input.real_data_histogram() ) + input_hists.insert( StrHist( variable, data ) ) + self.roofit_variables.append( fit_input.fit_variable ) + self.comb_data = RooDataHist( "combData", + "combined data", + RooArgList( self.roofit_variables[0] ), + self.sample, + input_hists, + ) + + def fit( self ): + sim_pdf = RooSimultaneous( "simPdf", "simultaneous pdf", self.sample ) + self.individual_results = {} + for name, model in self.models.iteritems(): + fit_input = self.fit_data[name] + model.fitTo( fit_input.real_data_roofit_histogram() ) + self.individual_results[name] = fit_input.get_results() + sim_pdf.addPdf( model, name ) + + argument_list = RooLinkedList() + argument_list.Add( RooFit.Minimizer( "Minuit2", "Migrad" ) ) + argument_list.Add( RooFit.NumCPU( 1 ) ) + argument_list.Add( RooFit.Extended() ) + argument_list.Add( RooFit.Save() ) + + sim_pdf.fitTo( self.comb_data, +# argument_list + ) + +# sim_pdf.fitTo( self.combined_data, +# RooFit.Minimizer( "Minuit2", "Migrad" ) ) + +# sim_pdf.fitTo( data = self.combined_data, +# arg1 = RooFit.Minimizer( "Minuit2", "Migrad" ), +# arg2 = RooFit.NumCPU( 1 ), +# arg3 = RooFit.Extended(), +# arg4 = RooFit.Save() ) +# sim_pdf.fitTo( self.combined_data, +# argument_list ) + + # get fit results + results = {} + for variable, fit_input in self.fit_data.iteritems(): + results[variable] = fit_input.get_results() + self.results = results + return results diff --git a/tools/hist_utilities.py b/tools/hist_utilities.py index 5dccee94..cdcc7cc5 100644 --- a/tools/hist_utilities.py +++ b/tools/hist_utilities.py @@ -12,7 +12,6 @@ from rootpy.plotting.hist import Hist2D import random import string -from cmath import sqrt from copy import deepcopy def hist_to_value_error_tuplelist( hist ): @@ -276,6 +275,57 @@ def conditional_rebin( histogram, bin_edges ): histogram_ = histogram_.rebinned( bin_edges, axis = 1 ) return histogram_ +def clean_control_region(histograms = {}, + data_label = 'data', + subtract = [], + fix_to_zero = True): + '''This function takes a dictionary of histograms (sample_name:histogram) + and will subtract all samples given in the parameter "subtract" from the + data distribution. + ''' + data_hist = deepcopy(histograms[data_label]) + # first subtract all necessary samples + for sample, histogram in histograms.iteritems(): + if sample in subtract: + data_hist -= histogram + # next make sure there are no negative events + if fix_to_zero: + for bin_i, y in enumerate(data_hist.y()): + if y < 0: + data_hist.SetBinContent(bin_i + 1, 0) + # add the difference to 0 to the existing error + data_hist.SetBinError(bin_i, data_hist.GetBinError(bin_i + 1) + abs(y)) + return data_hist + +def adjust_overflow_to_limit(histogram, x_min, x_max): + ''' Adjust the first and last bin of the histogram such that it becomes + the new under- and overflow bin''' + # get the bin before x_min + histogram_ = deepcopy(histogram) + underflow_bin = histogram_.FindBin(x_min) + overflow_bin = histogram_.FindBin(x_max) + n_bins = histogram_.nbins() + underflow, underflow_error = 0, 0 + overflow, overflow_error = 0, 0 + if not underflow_bin < 1: + underflow, underflow_error = histogram_.integral(0, underflow_bin, error=True) + for i in range(underflow_bin + 1): + histogram_.SetBinContent(i, 0) + histogram_.SetBinError(i, 0) + + if not overflow_bin > n_bins: + overflow, overflow_error = histogram_.integral(overflow_bin, n_bins + 1, error=True) + for i in range(overflow_bin, n_bins + 2): + histogram_.SetBinContent(i, 0) + histogram_.SetBinError(i, 0) + + histogram_.SetBinContent(underflow_bin, underflow) + histogram_.SetBinError(underflow_bin, underflow_error) + histogram_.SetBinContent(overflow_bin, overflow) + histogram_.SetBinError(overflow_bin, overflow_error) + + return histogram_ + if __name__ == '__main__': value_error_tuplelist = [( 0.006480446927374301, 0.0004647547547401945 ), ( 0.012830288388947605, 0.0010071677178938234 ), diff --git a/tools/plotting.py b/tools/plotting.py index 89ef0589..c4e43b1b 100644 --- a/tools/plotting.py +++ b/tools/plotting.py @@ -4,6 +4,7 @@ @author: kreczko ''' import matplotlib as mpl +from tools.file_utilities import make_folder_if_not_exists mpl.use('agg') import matplotlib.pyplot as plt import rootpy.plotting.root2matplotlib as rplt @@ -287,7 +288,8 @@ def make_shape_comparison_plot( shapes = [], shape.legendstyle = 'F' shape.linewidth = 5 - + if not histogram_properties.y_limits: + histogram_properties.y_limits = [0, get_best_max_y(shapes_, False)] # plot with matplotlib plt.figure( figsize = CMS.figsize, dpi = CMS.dpi, facecolor = CMS.facecolor ) gs = gridspec.GridSpec( 2, 1, height_ratios = [5, 1] ) @@ -407,6 +409,7 @@ def compare_measurements( models = {}, measurements = {}, prescription as the models parameter. @param histogram_properties: a Histogram_properties object to describe the look of the histogram """ + make_folder_if_not_exists(save_folder) # plot with matplotlib plt.figure( figsize = CMS.figsize, dpi = CMS.dpi, facecolor = CMS.facecolor ) axes = plt.axes() @@ -427,6 +430,8 @@ def compare_measurements( models = {}, measurements = {}, linecycler = cycle( lines ) for label, histogram in models.iteritems(): + if not histogram: # skip empty ones + continue histogram.linewidth = 2 histogram.color = next( colorcycler ) histogram.linestyle = next( linecycler ) @@ -487,3 +492,9 @@ def adjust_axis_limits( axes, histogram_properties ): axes.set_ylim( ymin = y_limits[0], ymax = y_limits[1] ) else: axes.set_ylim( ymin = 0 ) + +def get_best_max_y(histograms, include_error = True): + return max([histogram.max(include_error = include_error) for histogram in histograms]) + +def get_best_min_y(histograms, include_error = True): + return min([histogram.min(include_error = include_error) for histogram in histograms])