mirror of
https://github.com/redoules/redoules.github.io.git
synced 2025-12-12 15:59:34 +00:00
234 lines
15 KiB
HTML
234 lines
15 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="fr">
|
|
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
|
|
<meta name="description" content="Data Science for Political and Social Phenomena">
|
|
<meta name="author" content="Guillaume Redoulès">
|
|
<link rel="icon" href="../favicon.ico">
|
|
|
|
<title>Day 0 - Median, mean, mode and weighted mean - Blog</title>
|
|
|
|
<!-- JQuery -->
|
|
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
|
|
<script>
|
|
window.jQuery || document.write('<script src="../theme/js/jquery.min.js"><\/script>')
|
|
</script>
|
|
|
|
<!-- Bootstrap core CSS -->
|
|
<link rel="stylesheet" href="../theme/css/bootstrap.css" />
|
|
<!-- IE10 viewport hack for Surface/desktop Windows 8 bug -->
|
|
<link rel="stylesheet" type="text/css" href="../theme/css/ie10-viewport-bug-workaround.css" />
|
|
<!-- Custom styles for this template -->
|
|
<link rel="stylesheet" type="text/css" href="../theme/css/style.css" />
|
|
<link rel="stylesheet" type="text/css" href="../theme/css/notebooks.css" />
|
|
<link href='https://fonts.googleapis.com/css?family=PT+Serif:400,700|Roboto:400,500,700' rel='stylesheet' type='text/css'>
|
|
|
|
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
|
<!--[if lt IE 9]>
|
|
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
|
|
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
|
<![endif]-->
|
|
|
|
|
|
<meta name="tags" content="Basics" />
|
|
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<div class="navbar navbar-fixed-top">
|
|
<div class="container">
|
|
<div class="navbar-header">
|
|
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
|
|
<span class="icon-bar"></span>
|
|
<span class="icon-bar"></span>
|
|
<span class="icon-bar"></span>
|
|
</button>
|
|
<a class="navbar-brand" href="..">Guillaume Redoulès</a>
|
|
</div>
|
|
<div class="navbar-collapse collapse" id="searchbar">
|
|
|
|
<ul class="nav navbar-nav navbar-right">
|
|
<li class="dropdown">
|
|
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">About<span class="caret"></span></a>
|
|
<ul class="dropdown-menu">
|
|
<li><a href="../pages/about.html">About Guillaume</a></li>
|
|
<li><a href="https://github.com/redoules">GitHub</a></li>
|
|
<li><a href="https://www.linkedin.com/in/guillaume-redoul%C3%A8s-33923860/">LinkedIn</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="dropdown">
|
|
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Data Science<span class="caret"></span></a>
|
|
<ul class="dropdown-menu">
|
|
<li><a href="..#Blog">Blog</a></li>
|
|
<li><a href="..#Python">Python</a></li>
|
|
<li><a href="..#Bash">Bash</a></li>
|
|
<li><a href="..#SQL">SQL</a></li>
|
|
<li><a href="..#Mathematics">Mathematics</a></li>
|
|
<li><a href="..#Machine_Learning">Machine Learning</a></li>
|
|
<li><a href="..#Projects">Projects</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="dropdown">
|
|
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Projects<span class="caret"></span></a>
|
|
<ul class="dropdown-menu">
|
|
<li><a href="https://github.com/redoules/redoules.github.io">Notes (Github)</a></li>
|
|
</ul>
|
|
</li>
|
|
|
|
<!--<li class="dropdown">
|
|
<a href="../feeds/blog.rss.xml">Blog RSS</a>
|
|
</li>-->
|
|
|
|
|
|
</ul>
|
|
|
|
<form class="navbar-form" action="../search.html" onsubmit="return validateForm(this.elements['q'].value);">
|
|
<div class="form-group" style="display:inline;">
|
|
<div class="input-group" style="display:table;">
|
|
<span class="input-group-addon" style="width:1%;"><span class="glyphicon glyphicon-search"></span></span>
|
|
<input class="form-control search-query" name="q" id="tipue_search_input" placeholder="e.g. scikit KNN, pandas merge" required autocomplete="off" type="text">
|
|
</div>
|
|
</div>
|
|
</form>
|
|
|
|
</div>
|
|
<!--/.nav-collapse -->
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
<!-- end of header section -->
|
|
<div class="container">
|
|
<!-- <div class="alert alert-warning" role="alert">
|
|
Did you find this page useful? Please do me a quick favor and <a href="#" class="alert-link">endorse me for data science on LinkedIn</a>.
|
|
</div> -->
|
|
<section id="content" class="body">
|
|
<header>
|
|
<h1>
|
|
Day 0 - Median, mean, mode and weighted mean
|
|
</h1>
|
|
<ol class="breadcrumb">
|
|
<li>
|
|
<time class="published" datetime="2018-11-07T22:12:00+01:00">
|
|
07 novembre 2018
|
|
</time>
|
|
</li>
|
|
<li>Blog</li>
|
|
<li>Basics</li>
|
|
</ol>
|
|
</header>
|
|
<div class='article_content'>
|
|
<h2>A reminder</h2>
|
|
<h3>The median</h3>
|
|
<p>The median is the value separating the higher half from the lower half of a data sample. For a data set, it may be thought of as the middle value.
|
|
For a continuous probability distribution, the median is the value such that a number is equally likely to fall above or below it. </p>
|
|
<h3>The mean</h3>
|
|
<p>The arithmetic mean (or simply mean) of a sample is the sum of the sampled values divided by the number of items.</p>
|
|
<h3>The mode</h3>
|
|
<p>The mode of a set of data values is the value that appears most often. It is the value x at which its probability mass function takes its maximum value. In other words, it is the value that is most likely to be sampled. </p>
|
|
<h2>Implementation in python without using the scientific libraries</h2>
|
|
<div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">median</span><span class="p">(</span><span class="n">l</span><span class="p">):</span>
|
|
<span class="n">l</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">l</span><span class="p">)</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
<span class="k">return</span> <span class="p">(</span><span class="n">l</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">l</span><span class="p">)</span> <span class="o">//</span> <span class="mi">2</span><span class="p">]</span> <span class="o">+</span> <span class="n">l</span><span class="p">[(</span><span class="nb">len</span><span class="p">(</span><span class="n">l</span><span class="p">)</span><span class="o">//</span><span class="mi">2</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)])</span> <span class="o">/</span> <span class="mi">2</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="k">return</span> <span class="n">l</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">l</span><span class="p">)</span><span class="o">//</span><span class="mi">2</span><span class="p">]</span>
|
|
|
|
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="n">l</span><span class="p">):</span>
|
|
<span class="k">return</span> <span class="nb">sum</span><span class="p">(</span><span class="n">l</span><span class="p">)</span><span class="o">/</span><span class="nb">len</span><span class="p">(</span><span class="n">l</span><span class="p">)</span>
|
|
|
|
<span class="k">def</span> <span class="nf">mode</span><span class="p">(</span><span class="n">data</span><span class="p">):</span>
|
|
<span class="n">dico</span> <span class="o">=</span> <span class="p">{</span><span class="n">x</span><span class="p">:</span><span class="n">data</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="n">data</span><span class="p">))}</span>
|
|
<span class="k">return</span> <span class="nb">sorted</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">dico</span><span class="o">.</span><span class="n">items</span><span class="p">()),</span> <span class="n">key</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">reverse</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
|
|
</pre></div>
|
|
|
|
|
|
<div class="highlight"><pre><span></span><span class="n">L</span> <span class="o">=</span> <span class="p">[</span><span class="mi">64630</span><span class="p">,</span><span class="mi">11735</span><span class="p">,</span><span class="mi">14216</span><span class="p">,</span><span class="mi">99233</span><span class="p">,</span><span class="mi">14470</span><span class="p">,</span><span class="mi">4978</span><span class="p">,</span><span class="mi">73429</span><span class="p">,</span><span class="mi">38120</span><span class="p">,</span><span class="mi">51135</span><span class="p">,</span><span class="mi">67060</span><span class="p">,</span> <span class="mi">4978</span><span class="p">,</span> <span class="mi">73429</span><span class="p">]</span>
|
|
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Sample : </span><span class="si">{L}</span><span class="se">\n</span><span class="s2">Mean : {mean(L)}, Median : {median(L)}, Mode : {mode(L)}"</span><span class="p">)</span>
|
|
</pre></div>
|
|
|
|
|
|
<div class="highlight"><pre><span></span><span class="err">Sample : [64630, 11735, 14216, 99233, 14470, 4978, 73429, 38120, 51135, 67060, 4978, 73429]</span>
|
|
<span class="err">Mean : 43117.75, Median : 44627.5, Mode : 4978</span>
|
|
</pre></div>
|
|
|
|
|
|
<h2>The weighted average</h2>
|
|
<p>The weighted arithmetic mean is similar to an ordinary arithmetic mean (the most common type of average), except that instead of each of the data points contributing equally to the final average, some data points contribute more than others.</p>
|
|
<div class="highlight"><pre><span></span><span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span><span class="mi">40</span><span class="p">,</span><span class="mi">30</span><span class="p">,</span><span class="mi">50</span><span class="p">,</span><span class="mi">20</span><span class="p">]</span>
|
|
<span class="n">weights</span> <span class="o">=</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">]</span>
|
|
<span class="n">sum_X</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">([</span><span class="n">x</span><span class="o">*</span><span class="n">w</span> <span class="k">for</span> <span class="n">x</span><span class="p">,</span><span class="n">w</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">data</span><span class="p">,</span><span class="n">weights</span><span class="p">)])</span>
|
|
<span class="nb">print</span><span class="p">(</span><span class="nb">round</span><span class="p">((</span><span class="n">sum_X</span><span class="o">/</span><span class="nb">sum</span><span class="p">(</span><span class="n">weights</span><span class="p">)),</span><span class="mi">1</span><span class="p">))</span>
|
|
</pre></div>
|
|
|
|
|
|
<div class="highlight"><pre><span></span><span class="err">32.0</span>
|
|
</pre></div>
|
|
</div>
|
|
<aside>
|
|
<div class="bug-reporting__panel">
|
|
<h3>Find an error or bug? Have a suggestion?</h3>
|
|
<p>Everything on this site is avaliable on GitHub. Head on over and <a href='https://github.com/redoules/redoules.github.io/issues/new'>submit an issue.</a> You can also message me directly by <a href='mailto:guillaume.redoules@gadz.org'>email</a>.</p>
|
|
</div>
|
|
</aside>
|
|
</section>
|
|
|
|
</div>
|
|
<!-- start of footer section -->
|
|
<footer class="footer">
|
|
<div class="container">
|
|
<p class="text-muted">
|
|
<center>This project contains 115 pages and is available on <a href="https://github.com/redoules/redoules.github.io">GitHub</a>.
|
|
<br/>
|
|
Copyright © Guillaume Redoulès,
|
|
<time datetime="2018">2018</time>.
|
|
</center>
|
|
</p>
|
|
</div>
|
|
</footer>
|
|
|
|
<!-- This jQuery line finds any span that contains code highlighting classes and then selects the parent <pre> tag and adds a border. This is done as a workaround to visually distinguish the code inputs and outputs -->
|
|
<script>
|
|
$( ".hll, .n, .c, .err, .k, .o, .cm, .cp, .c1, .cs, .gd, .ge, .gr, .gh, .gi, .go, .gp, .gs, .gu, .gt, .kc, .kd, .kn, .kp, .kr, .kt, .m, .s, .na, .nb, .nc, .no, .nd, .ni, .ne, .nf, .nl, .nn, .nt, .nv, .ow, .w, .mf, .mh, .mi, .mo, .sb, .sc, .sd, .s2, .se, .sh, .si, .sx, .sr, .s1, .ss, .bp, .vc, .vg, .vi, .il" ).parent( "pre" ).css( "border", "1px solid #DEDEDE" );
|
|
</script>
|
|
|
|
|
|
<!-- Load Google Analytics -->
|
|
<script>
|
|
/*
|
|
(function(i, s, o, g, r, a, m) {
|
|
i['GoogleAnalyticsObject'] = r;
|
|
i[r] = i[r] || function() {
|
|
(i[r].q = i[r].q || []).push(arguments)
|
|
}, i[r].l = 1 * new Date();
|
|
a = s.createElement(o),
|
|
m = s.getElementsByTagName(o)[0];
|
|
a.async = 1;
|
|
a.src = g;
|
|
m.parentNode.insertBefore(a, m)
|
|
})(window, document, 'script', '//www.google-analytics.com/analytics.js', 'ga');
|
|
|
|
ga('create', 'UA-66582-32', 'auto');
|
|
ga('send', 'pageview');
|
|
*/
|
|
</script>
|
|
<!-- End of Google Analytics -->
|
|
|
|
<!-- Bootstrap core JavaScript
|
|
================================================== -->
|
|
<!-- Placed at the end of the document so the pages load faster -->
|
|
<script src="../theme/js/bootstrap.min.js"></script>
|
|
<!-- IE10 viewport hack for Surface/desktop Windows 8 bug -->
|
|
<script src="../theme/js/ie10-viewport-bug-workaround.js"></script>
|
|
|
|
|
|
</body>
|
|
|
|
</html> |