Showing 2 changed files with 487 additions and 154 deletions
+477 -154
analyse-votes-AN 1000644 → 1000755
... ...
@@ -1,59 +1,143 @@
1 1
 #!/bin/bash
2 2
 
3
-groupe=${1:-LREM}
4
-nom=$2
5
-groupe_ref=${3:-GDR}
6
-archive_scrutins=$HOME/archive_scrutins
7
-
8
-declare -A groupes cible_votes ref_votes
9
-groupes[LREM]='Groupe La République en Marche'
10
-groupes[LR]='Groupe Les Républicains'
11
-groupes[MODEM]='Groupe du Mouvement Démocrate et apparentés'
12
-groupes[PS]='Groupe Socialistes et apparentés'
13
-groupes[UDI]='Groupe UDI, Agir et Indépendants'
14
-groupes[NG]='Groupe Nouvelle Gauche'
15
-groupes[FI]='Groupe La France insoumise'
16
-groupes[GDR]='Groupe de la Gauche démocrate et républicaine'
17
-groupes[LT]='Groupe Libertés et Territoires'
18
-groupes[NI]='Non inscrits'
19
-
20
-for g in ${!groupes[@]}; do
21
-    if test "$nom" = $g; then
22
-        groupe_ref=$nom
23
-        unset nom
24
-        break
25
-    fi
3
+for tool in sqlite3 getopt md5sum mktemp; do
4
+    which $tool > /dev/null 2>&1 || {
5
+        echo missing tool $tool
6
+        exit 1
7
+    }
26 8
 done
27 9
 
28
-if test -n "${groupes[$groupe]}" -a -n "${groupes[$groupe_ref]}"; then
29
-    groupes[$groupe]='/<p class="nomgroupe">'${groupes[$groupe]}' <span class="block topmargin">/,/<div class="TTgroupe topmargin-lg">/'
30
-    if test $groupe != $groupe_ref; then
31
-        groupes[$groupe_ref]='/<p class="nomgroupe">'${groupes[$groupe_ref]}' <span class="block topmargin">/,/<div class="TTgroupe topmargin-lg">/'
10
+function create_database () {
11
+    declare -A groupes
12
+    if test -z "${groupes[@]}"; then
13
+        echo "unable to find groupes in $config_file" >&2
14
+        exit 1
32 15
     fi
33
-else
34
-    echo "groupe \"$groupe\" inconnu:"
16
+
17
+    sqlite3 "$in_ram_database" <<< "create table if not exists votes (id integer primary key, nom text)"
18
+    sqlite3 "$in_ram_database" <<< "create table if not exists url (id integer primary key autoincrement, url text)"
19
+    sqlite3 "$in_ram_database" <<< "create table if not exists députés (id integer primary key autoincrement, nom text, groupe integer)"
20
+    sqlite3 "$in_ram_database" <<< "create table if not exists groupes (id integer primary key autoincrement, nom text, nom_court text)"
21
+    sqlite3 "$in_ram_database" <<< "create table if not exists scrutins (num integer primary key, date text not null, intitulé text non null, adoption boolean, url integer)"
22
+    sqlite3 "$in_ram_database" <<< "create table if not exists dépouillement (député integer not null, scrutin integer not null, groupe integer not null, vote integer not null)"
23
+
24
+    v_id=0
25
+    for v in Pour Contre Abstention Non-votant; do
26
+        if test -z $(sqlite3 "$in_ram_database" <<< "select nom from votes where id is $v_id"); then
27
+            sqlite3 "$in_ram_database" <<< "insert into votes values ($v_id, '$v')"
28
+        else
29
+            test -z $(sqlite3 "$in_ram_database" <<< "select nom from votes where id is $v_id and nom is '$v'") \
30
+                && sqlite3 "$in_ram_database" <<< "update votes set nom = '$v' where id is $v_id)"
31
+        fi
32
+        let v_id++
33
+    done
34
+    unset v_id v
35
+
35 36
     for g in ${!groupes[@]}; do
36
-        echo "$g -> ${groupes[$g]}"
37
+        test -z $(sqlite3 "$in_ram_database" <<< "select id from groupes where nom is '${groupes[$g]}' and nom_court is '$g'") \
38
+            && sqlite3 "$in_ram_database" <<< "insert into groupes (nom, nom_court) values ('${groupes[$g]}', '$g')"
37 39
     done
38
-    exit 1
39
-fi
40
+    unset g groupes
40 41
 
41
-tempfile="/dev/shm/scrutin"
42
+    test -z $(sqlite3 "$in_ram_database" <<< "select id from url where id = 0") \
43
+        && sqlite3 "$in_ram_database" <<< "insert into url values (0, '')"
44
+}
42 45
 
43
-id_cols="Scrutin Date Titre Adoption"
44
-typevotes="Pour Contre Abstention Non-votants"
45
-nb_cols=$(wc -w <<< "$id_cols $typevotes $typevotes")
46
+function update_database () {
47
+    test "$no_db_update" = $true_flag && return
48
+    tempfile="/dev/shm/scrutin.$$"
49
+    progress=0
50
+    if test ${first:-0} -lt $last; then
51
+        echo "récupération des scrutins n°$((${first:-0}+1)) à n°$last dans "$database" (à conserver autant que possible)" >&2
46 52
 
47
-if test -n "$nom"; then
48
-    cible="$nom ($groupe)"
49
-else
50
-    cible=$groupe
51
-fi
52
-result="comparaisons $cible avec $groupe_ref"
53
+        url_database=/dev/shm/url_database
54
+        : > "$url_database"
55
+        test $((last % 100)) -ne 0 && last_offset=0
56
+        for offset in $(seq $((last - 100)) -100 ${first:-0} ) $last_offset; do
57
+            wget -qO- "http://www2.assemblee-nationale.fr/scrutins/liste/(offset)/$offset/(legislature)/15/(type)/TOUS/(idDossier)/TOUS" \
58
+                | awk '
59
+                    /<td class="denom">/ {
60
+                        scrutin = gensub(/^.+denom.>([[:digit:]]+).*<.td./,"\\1","1",$0)
61
+                    }
62
+                    /<td class="desc">.+dossier<.a/ {
63
+                        a[scrutin] = gensub(/^.+.<a href="(.+)">dossier<.a>.*$/,"\\1","1",$0)
64
+                    }
65
+                    END {
66
+                        for (i in a)
67
+                            print gensub("*","","1",i) "|" a[i]
68
+                    }' >> "$url_database"
69
+        done
70
+        sort -u "$url_database" > "${url_database}.sorted"
71
+        mv -f "${url_database}.sorted" "$url_database"
72
+
73
+        IFS=$'\n'
74
+        begin=$(date +%s)
75
+        for scrutin in $(seq $((${first:-0}+1)) $last); do
76
+            wget -qO- "http://www2.assemblee-nationale.fr/scrutins/detail/(legislature)/15/(num)/$scrutin" \
77
+                |  sed -r '0,/< *div class="titre-bandeau-bleu +to-print" *>/d; /< *script +type="text\/javascript" *>/,$d' > $tempfile
78
+
79
+            unset title date adoption url id_url
80
+
81
+            title=$(sed -rn '/<h1 class="">Analyse du scrutin n° '$scrutin'/n; s,^.*<h3 class="president-title">(.+).</h3>,\1,p' $tempfile \
82
+                    | sed "s/;//g; s/[ \t][ \t]+/ /g; s/^Scrutin public sur *//; s/^l[ae']s* *//")
83
+            date=$(sed -rn 's,^.*<h1 class="">Analyse du scrutin n° '$scrutin'<br/>(.+) </h1>,\1,p' $tempfile)
84
+            adoption=$(sed -rn 's,^.*<p class="annonce"><span class="annoncevote">(.+).</span></p>.*$,\1,p' $tempfile)
85
+            test -n "$title" -a -n "$date" -a -n "$adoption" || {
86
+                echo "erreur dans la récupération du scrutin $scrutin"
87
+                exit 1
88
+            }
89
+            grep -q 'e a a' <<< "$adoption" && adoption=1 || adoption=0
53 90
 
54
-mkdir -p "/dev/shm/$result/META-INF"
91
+            url=$(awk -F'|' "/^$scrutin\|/{print \$2}" "$url_database")
92
+            id_url=$(sqlite3 "$in_ram_database" <<< "select id from url where url is '$url'")
93
+            if test -z "$id_url"; then
94
+                sqlite3 "$in_ram_database" <<< "insert into url (url) values ('$url')"
95
+                id_url=$(sqlite3 "$in_ram_database" <<< "select id from url where url is '$url'")
96
+            fi
55 97
 
56
-cat > "/dev/shm/$result/META-INF/manifest.xml" << EOmetainf
98
+            sqlite3 "$in_ram_database" <<< "insert into scrutins values ($scrutin, '$date', \"${title//\"}\", $adoption, ${id_url:-0})"
99
+
100
+            for v in $(sqlite3 "$in_ram_database" <<< "select * from votes"); do
101
+                for g in $(sqlite3 "$in_ram_database" <<< "select id,nom from groupes"); do
102
+                    for d in $(sed -rn '/<p class="nomgroupe">'${g#*|}' <span class="block topmargin">/,/<div class="TTgroupe topmargin-lg">/p' $tempfile \
103
+                        | sed -rn '/<p class="typevote">'${v#*|}':/,/<.div>/p' \
104
+                        | sed 's,</li>,\n,g' \
105
+                        | sed -rn '/<p class="typevote">/d; s,^\s*<li>\s*,,; s,&nbsp;, ,g; s/^\s*//; s/M(me|\.) //; s/ \(.*$//; s,<b>,,; s,</b>,,p'); do
106
+                        d_id=$(sqlite3 "$in_ram_database" <<< "select id from députés where nom is \"$d\" and groupe is ${g%|*}")
107
+                        if test -z "$d_id"; then
108
+                            sqlite3 "$in_ram_database" <<< "insert into députés (nom, groupe) values (\"$d\", ${g%|*})"
109
+                            d_id=$(sqlite3 "$in_ram_database" <<< "select id from députés where nom is \"$d\" and groupe is ${g%|*}")
110
+                        fi
111
+                        sqlite3 "$in_ram_database" <<< "insert into dépouillement values ($d_id, $scrutin, ${g%|*}, ${v%|*})"
112
+                    done
113
+                done
114
+            done
115
+
116
+            if test $((($scrutin*100)/$last)) -ne $progress; then
117
+                progress=$((($scrutin*100)/$last))
118
+                if test $(($progress % ${update_progress:-1})) -eq 0; then
119
+                    now=$(date +%s)
120
+                    delta=$(( $now - $begin ))
121
+                    echo $progress%, ETA: $(date +%H:%M:%S -d "$(($delta * ($last - $scrutin) / $scrutin)) seconds")
122
+                fi
123
+            fi
124
+        done
125
+        rm -f "$url_database" "$tempfile"
126
+    fi
127
+}
128
+
129
+function write_comparaison () {
130
+    result="comparaisons $cible avec $groupe_ref${dossier:+ - ${dossier}}"
131
+    content="/dev/shm/$result/content.xml"
132
+    id_cols="Scrutin Date Titre Adoption"
133
+    typevotes=$(sqlite3 "$in_ram_database" <<< "select nom from votes")
134
+    nb_cols=$(wc -w <<< "$id_cols $typevotes $typevotes")
135
+
136
+    echo "génération du fichier $result"
137
+
138
+    mkdir -p "/dev/shm/$result/META-INF"
139
+
140
+    cat > "/dev/shm/$result/META-INF/manifest.xml" << EOmetainf
57 141
 <?xml version="1.0" encoding="UTF-8"?>
58 142
 <manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" manifest:version="1.2">
59 143
  <manifest:file-entry manifest:full-path="/" manifest:version="1.2" manifest:media-type="application/vnd.oasis.opendocument.spreadsheet"/>
... ...
@@ -61,141 +145,380 @@ cat > "/dev/shm/$result/META-INF/manifest.xml" << EOmetainf
61 145
 </manifest:manifest>
62 146
 EOmetainf
63 147
 
64
-printf 'application/vnd.oasis.opendocument.spreadsheet' > "/dev/shm/$result/mimetype"
148
+    printf 'application/vnd.oasis.opendocument.spreadsheet' > "/dev/shm/$result/mimetype"
65 149
 
66
-content="/dev/shm/$result/content.xml"
150
+    echo '<?xml version="1.0" encoding="UTF-8"?>' > "$content"
67 151
 
68
-cat > "$content" << EOcontent
69
-<?xml version="1.0" encoding="UTF-8"?>
70
-<office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:rpt="http://openoffice.org/2005/report" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:calcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:css3t="http://www.w3.org/TR/css3-text/" office:version="1.2">
71
-<office:scripts/>
72
-<office:font-face-decls>
73
-<style:font-face style:name="Liberation Sans" svg:font-family="&apos;Liberation Sans&apos;" style:font-family-generic="swiss" style:font-pitch="variable"/>
74
-<style:font-face style:name="DejaVu Sans" svg:font-family="&apos;DejaVu Sans&apos;" style:font-family-generic="system" style:font-pitch="variable"/>
75
-<style:font-face style:name="FreeSans" svg:font-family="FreeSans" style:font-family-generic="system" style:font-pitch="variable"/>
76
-</office:font-face-decls>
77
-<office:automatic-styles>
152
+    cat >> "$content" << EOcontent
153
+    <office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:rpt="http://openoffice.org/2005/report" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:calcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:css3t="http://www.w3.org/TR/css3-text/" office:version="1.2">
154
+    <office:scripts/>
155
+    <office:font-face-decls>
156
+    <style:font-face style:name="Liberation Sans" svg:font-family="&apos;Liberation Sans&apos;" style:font-family-generic="swiss" style:font-pitch="variable"/>
157
+    <style:font-face style:name="DejaVu Sans" svg:font-family="&apos;DejaVu Sans&apos;" style:font-family-generic="system" style:font-pitch="variable"/>
158
+    <style:font-face style:name="FreeSans" svg:font-family="FreeSans" style:font-family-generic="system" style:font-pitch="variable"/>
159
+    </office:font-face-decls>
160
+    <office:automatic-styles>
161
+EOcontent
162
+
163
+    for i in $(seq $nb_cols); do
164
+        cat >> "$content" << EOcontent
165
+            <style:style style:name="co$i" style:family="table-column">
166
+            <style:table-column-properties fo:break-before="auto" style:column-width="30.00mm"/>
167
+            </style:style>
78 168
 EOcontent
169
+    done
79 170
 
80
-for i in $(seq $nb_cols); do
81 171
     cat >> "$content" << EOcontent
82
-<style:style style:name="co$i" style:family="table-column">
83
-<style:table-column-properties fo:break-before="auto" style:column-width="30.00mm"/>
84
-</style:style>
172
+    <style:style style:name="ro1" style:family="table-row">
173
+    <style:table-row-properties style:row-height="4.52mm" fo:break-before="auto" style:use-optimal-row-height="true"/>
174
+    </style:style>
175
+    <style:style style:name="ta1" style:family="table" style:master-page-name="Default">
176
+    <style:table-properties table:display="true" style:writing-mode="lr-tb"/>
177
+    </style:style>
178
+    <style:style style:name="ce1" style:family="table-cell" style:parent-style-name="Default">
179
+    <style:table-cell-properties fo:background-color="#cccccc"/>
180
+    </style:style>
181
+    </office:automatic-styles>
182
+    <office:body>
183
+    <office:spreadsheet>
184
+    <table:calculation-settings table:automatic-find-labels="false"/>
185
+    <table:table table:name="$result" table:style-name="ta1">
186
+    <office:forms form:automatic-focus="false" form:apply-design-mode="false"/>
187
+    <table:table-column table:style-name="co1" table:number-columns-repeated="$(wc -w <<< $id_cols)" table:default-cell-style-name="Default"/>
85 188
 EOcontent
86
-done
87 189
 
88
-cat >> "$content" << EOcontent
89
-<style:style style:name="ro1" style:family="table-row">
90
-<style:table-row-properties style:row-height="4.52mm" fo:break-before="auto" style:use-optimal-row-height="true"/>
91
-</style:style>
92
-<style:style style:name="ta1" style:family="table" style:master-page-name="Default">
93
-<style:table-properties table:display="true" style:writing-mode="lr-tb"/>
94
-</style:style>
95
-</office:automatic-styles>
96
-<office:body>
97
-<office:spreadsheet>
98
-<table:calculation-settings table:automatic-find-labels="false"/>
99
-<table:table table:name="$result" table:style-name="ta1">
100
-<office:forms form:automatic-focus="false" form:apply-design-mode="false"/>
190
+    for i in $(seq $(wc -w <<< $typevotes)); do
191
+        cat >> "$content" << EOcontent
192
+        <table:table-column table:style-name="co1" table:default-cell-style-name="ce1"/>
193
+        <table:table-column table:style-name="co1" table:default-cell-style-name="Default"/>
101 194
 EOcontent
195
+    done
196
+    echo '<table:table-row table:style-name="ro1">' >> "$content"
102 197
 
103
-for i in $(seq $nb_cols); do
104
-    echo "<table:table-column table:style-name=\"co$i\" table:default-cell-style-name=\"Default\"/>" >> "$content"
105
-done
106
-echo '<table:table-row table:style-name="ro1">' >> "$content"
198
+    for colonne in $id_cols; do
199
+        cat >> "$content" << EOcontent
200
+            <table:table-cell office:value-type="string" calcext:value-type="string">
201
+            <text:p>$colonne</text:p>
202
+            </table:table-cell>
203
+EOcontent
204
+    done
107 205
 
108
-for colonne in $id_cols; do
109
-    cat >> "$content" << EOcontent
110
-<table:table-cell office:value-type="string" calcext:value-type="string">
111
-<text:p>$colonne</text:p>
112
-</table:table-cell>
206
+    for typevote in $typevotes; do
207
+        for g in "$cible" $groupe_ref; do
208
+            cat >> "$content" << EOcontent
209
+                <table:table-cell office:value-type="string" calcext:value-type="string">
210
+                <text:p>$typevote - $g</text:p>
211
+                </table:table-cell>
113 212
 EOcontent
114
-done
213
+        done
214
+    done
215
+
216
+    echo '</table:table-row>' >> "$content"
217
+
218
+    progress=0
219
+    begin=$(date +%s)
220
+    line=1
221
+    for scrutin in $(eval ${seq:-seq $first $last}); do
222
+
223
+        data=$(sqlite3 "$in_ram_database" <<< "select date,intitulé,adoption,url.url from scrutins inner join url on scrutins.url = url.id where num is $scrutin")
224
+        date=$(cut -d'|' -sf 1 <<< $data)
225
+        title=$(cut -d'|' -sf 2 <<< $data)
226
+        adoption=$(cut -d'|' -sf 3 <<< $data)
227
+        url=$(cut -d'|' -sf 4 <<< $data)
228
+        test $adoption -eq 1 && adoption='oui' || adoption='non'
115 229
 
116
-for typevote in $typevotes; do
117
-    for g in "$cible" $groupe_ref; do
118 230
         cat >> "$content" << EOcontent
119
-<table:table-cell office:value-type="string" calcext:value-type="string">
120
-<text:p>$typevote - $g</text:p>
121
-</table:table-cell>
231
+            <table:table-row table:style-name="ro1">
232
+EOcontent
233
+        if test -n "$url"; then
234
+            cat >> "$content" << EOcontent
235
+                <table:table-cell office:value-type="string" calcext:value-type="string">
236
+                <text:p><text:a xlink:href="$url" xlink:type="simple">$scrutin</text:a></text:p>
237
+                </table:table-cell>
122 238
 EOcontent
239
+        else
240
+            cat >> "$content" << EOcontent
241
+                <table:table-cell office:value-type="float" office:value="$scrutin" calcext:value-type="float">
242
+                <text:p>$scrutin</text:p>
243
+                </table:table-cell>
244
+EOcontent
245
+        fi
246
+        cat >> "$content" << EOcontent
247
+            <table:table-cell office:value-type="string" calcext:value-type="string">
248
+            <text:p>$date</text:p>
249
+            </table:table-cell>
250
+            <table:table-cell office:value-type="string" calcext:value-type="string">
251
+            <text:p>${title//\'/&apos;}</text:p>
252
+            </table:table-cell>
253
+            <table:table-cell office:value-type="string" calcext:value-type="string">
254
+            <text:p>${adoption}</text:p>
255
+            </table:table-cell>
256
+EOcontent
257
+        for typevote in 0 1 2 3; do
258
+            cible_votes=$(sqlite3 "$in_ram_database" <<< "select
259
+                                        count(député)
260
+                                     from
261
+                                        dépouillement
262
+                                     where
263
+                                        scrutin is $scrutin
264
+                                     and
265
+                                        vote is $typevote
266
+                                     and
267
+                                        groupe is $groupe_id ${nom:+ and député is ${nom%|*}}")
268
+            ref_votes=$(sqlite3 "$in_ram_database" <<< "select
269
+                                        count(député)
270
+                                     from
271
+                                        dépouillement
272
+                                     where
273
+                                        scrutin is $scrutin
274
+                                     and
275
+                                        vote is $typevote
276
+                                     and
277
+                                        groupe is $groupe_ref_id")
278
+            cat >> "$content" << EOcontent
279
+                <table:table-cell office:value-type="float" office:value="$cible_votes" calcext:value-type="float">
280
+                <text:p>$cible_votes</text:p>
281
+                </table:table-cell>
282
+                <table:table-cell office:value-type="float" office:value="$ref_votes" calcext:value-type="float">
283
+                <text:p>$ref_votes</text:p>
284
+                </table:table-cell>
285
+EOcontent
286
+            done
287
+        echo '</table:table-row>' >> "$content"
288
+
289
+        let line++
290
+
291
+        if test $((($scrutin*100)/$last)) -ne $progress; then
292
+            progress=$((($scrutin*100)/$last))
293
+            if test $(($progress % ${generation_progress:-5})) -eq 0; then
294
+                now=$(date +%s)
295
+                delta=$(( $now - $begin ))
296
+                echo $progress%, ETA: $(date +%H:%M:%S -d "$(($delta * ($last - $scrutin) / $scrutin)) seconds")
297
+            fi
298
+        fi
123 299
     done
300
+    echo
301
+
302
+    cat >> "$content" << EOcontent
303
+    </table:table>
304
+    <table:named-expressions/>
305
+    <table:database-ranges>
306
+    <table:database-range table:name="__Anonymous_Sheet_DB__0" table:target-range-address="&apos;$result&apos;.D1:&apos;$result&apos;.$(printf "\\$(printf '%03o' $((64+$nb_cols)))")$line" table:display-filter-buttons="true"/>
307
+    </table:database-ranges>
308
+    </office:spreadsheet>
309
+    </office:body>
310
+    </office:document-content>
311
+EOcontent
312
+
313
+    ( cd "/dev/shm/$result" && zip -r ../"$result" * > /dev/null 2>&1 && cd .. && rm -fr "$result" )
314
+
315
+    mv -f "/dev/shm/$result.zip" "$result.ods"
316
+
317
+    echo "$result.ods"
318
+}
319
+
320
+function save_database () {
321
+    if test -r "$database" && md5sum $in_ram_database | sed "s,$in_ram_database,$database," | md5sum --status -c -; then
322
+        rm -f $in_ram_database
323
+    elif test -w "$database"; then
324
+        mv -f $in_ram_database "$database"
325
+    elif ! test -e "$database"; then
326
+        mv $in_ram_database "$database"
327
+    else
328
+        rm -f $in_ram_database
329
+    fi
330
+}
331
+
332
+trap save_database EXIT
333
+
334
+true_flag=$(mktemp --dry-run XXXXX)
335
+
336
+OPTS=$( getopt -l no-db-update,\
337
+                  db-update-only,\
338
+                  cible:,\
339
+                  ref:,\
340
+                  député:,\
341
+                  premier-scrutin:,\
342
+                  dernier-scrutin:,\
343
+                  période:,\
344
+                  liste-dossiers,\
345
+                  liste-députés,\
346
+                  dossiers,\
347
+                  dossier:,\
348
+                  conf:,\
349
+                  database:,\
350
+                  progrès-génération:\
351
+                  progrès-update: \
352
+                  -- "$@" )
353
+
354
+eval set --$OPTS
355
+
356
+while [[ $# -gt 0 ]]; do
357
+    case "$1" in
358
+        "--no-db-update")
359
+            no_db_update=$true_flag;;
360
+        "--db-update-only")
361
+            db_update_only=$true_flag;;
362
+        "--cible")
363
+            groupe="${2^^}"
364
+            shift;;
365
+        "--ref")
366
+            groupes_ref="${2^^}"
367
+            shift;;
368
+        "--député")
369
+            depute=$true_flag
370
+            nom="$2"
371
+            shift;;
372
+        "--premier-scrutin")
373
+            no_db_update=$true_flag
374
+            first="$2"
375
+            shift;;
376
+        "--dernier-scrutin")
377
+            no_db_update=$true_flag
378
+            last="$2"
379
+            shift;;
380
+        "--période")
381
+            periode=$true_flag
382
+            no_db_update=$true_flag
383
+            periode_value="$2"
384
+            shift;;
385
+        "--liste-députés-du-groupe")
386
+            liste_deputes=$true_flag
387
+            liste_deputes_value="${2^^}"
388
+            shift;;
389
+        "--liste-députés")
390
+            liste_deputes=$true_flag;;
391
+        "--liste-dossiers")
392
+            liste_dossiers=$true_flag;;
393
+        "--dossier")
394
+            dossier=$true_flag
395
+            dossier_value="$2"
396
+            shift;;
397
+        "--dossiers")
398
+            dossier=$true_flag;;
399
+        "--conf")
400
+            test -r "$2" || {
401
+                echo "config introuvable $2" >&2
402
+                options_error=$true_flag
403
+            }
404
+            config_file="$2"
405
+            shift;;
406
+        "--database")
407
+            test -r "$2" && file -b "$2" | grep -q '^SQLite 3.x database' || {
408
+                echo "erreur sur option database: fichier '$2' introuvable ou pas une base SQLite 3" >&2
409
+                options_error=$true_flag
410
+            }
411
+            database="$2"
412
+            shift;;
413
+        "--progrès-génération")
414
+            generation_progress="$2"
415
+            shift;;
416
+        "--progrès-update")
417
+            update_progress="$2"
418
+            shift;;
419
+    esac
420
+    shift
124 421
 done
125 422
 
126
-echo '</table:table-row>' >> "$content"
423
+test "$options_error" = $true_flag && exit 1
127 424
 
128
-last=$(wget -qO- 'http://www2.assemblee-nationale.fr/scrutins/liste/(legislature)/15/(type)/TOUS/(idDossier)/TOUS' | sed -rn 's,^.*<td class="denom">(.+)</td>.*$,\1,p' | head -1)
129
-test -r "$archive_scrutins" && first=$(tar tf "$archive_scrutins" | tail -1) || first=0
425
+test -z "$database" && database="${0}.db"
130 426
 
131
-if test $first -lt $last; then
132
-    echo "récupération des scrutins n°$((first+1)) à n°$last dans "$archive_scrutins" (à conserver autant que possible)" >&2
133
-    for scrutin in $(seq $((first+1)) $last); do
134
-        wget -qO- "http://www2.assemblee-nationale.fr/scrutins/detail/(legislature)/15/(num)/$scrutin" \
135
-            |  sed -r '0,/< *div class="titre-bandeau-bleu +to-print" *>/d; /< *script +type="text\/javascript" *>/,$d' > /dev/shm/$scrutin \
136
-            && tar -C /dev/shm -rf "$archive_scrutins" $scrutin \
137
-            && rm -f /dev/shm/$scrutin \
138
-            && echo -n '.' >&2
139
-    done
140
-    echo
427
+if test -n "$config_file"; then
428
+    source "$config_file"
429
+else
430
+    config_file="${0}.conf"
431
+    if test -r "$config_file"; then
432
+        source "$config_file"
433
+    fi
141 434
 fi
142 435
 
143
-for scrutin in $(seq $last); do
144
-    printf "."
145
-    tar -C /dev/shm -Oxf "$archive_scrutins" $scrutin > $tempfile
436
+groupe=${groupe:-LREM}
437
+groupe_ref=${groupe_ref:-GDR}
438
+cible=$groupe
146 439
 
147
-    title=$(sed -rn '/<h1 class="">Analyse du scrutin n° '$scrutin'/n; s,^.*<h3 class="president-title">(.+).</h3>,\1,p' $tempfile | sed "s/;//g; s/[ \t][ \t]+/ /g; s/^Scrutin public sur *//; s/^l[ae'] *//")
148
-    date=$(sed -rn 's,^.*<h1 class="">Analyse du scrutin n° '$scrutin'<br/>(.+) </h1>,\1,p' $tempfile)
149
-    adoption=$(sed -rn 's,^.*<p class="annonce"><span class="annoncevote">(.+).</span></p>.*$,\1,p' $tempfile)
440
+in_ram_database=$(mktemp --dry-run /dev/shm/XXXXXXXXXXXX)
441
+if test -r "$database"; then
442
+    cp "$database" "$in_ram_database"
443
+else
444
+    create_database
445
+fi
150 446
 
151
-    cat >> "$content" << EOcontent
152
-<table:table-row table:style-name="ro1">
153
-<table:table-cell office:value-type="float" office:value="$scrutin" calcext:value-type="float">
154
-<text:p>$scrutin</text:p>
155
-</table:table-cell>
156
-<table:table-cell office:value-type="string" calcext:value-type="string">
157
-<text:p>$date</text:p>
158
-</table:table-cell>
159
-<table:table-cell office:value-type="string" calcext:value-type="string">
160
-<text:p>${title//\'/&apos;}</text:p>
161
-</table:table-cell>
162
-<table:table-cell office:value-type="string" calcext:value-type="string">
163
-<text:p>${adoption//\'/&apos;}</text:p>
164
-</table:table-cell>
165
-EOcontent
447
+for g in groupe groupe_ref; do
448
+    eval "${g}_id=$(sqlite3 "$in_ram_database" <<< "select id from groupes where nom_court is '${!g}'")"
449
+    if eval "test -z \$${g}_id"; then
450
+        echo "groupe ${!g} inconnu" >&2
451
+        exit 1
452
+    fi
453
+done
166 454
 
167
-    unset cible_votes[*] ref_votes[*]
168
-    for typevote in $typevotes; do
169
-        cible_votes[$typevote]=$(sed -rn "${groupes[$groupe]}p" $tempfile | sed -rn '/<p class="typevote">'${typevote}':/,/<.div>/p' | sed 's,</li>,\n,g' | grep -ic "&nbsp;<b>${nom:+${nom}</br>}")
170
-        ref_votes[$typevote]=$(sed -rn  "${groupes[$groupe_ref]}p"  $tempfile | sed -rn '/<p class="typevote">'${typevote}':/,/<.div>/p' | sed 's,</li>,\n,g' | grep -c '&nbsp;<b>')
455
+if test "$periode" = $true_flag; then
456
+    first=$(sqlite3 "$in_ram_database" <<< "select num from scrutins where date like '% du ${periode_value%:*}' order by num asc" | head -1)
457
+    last=$(sqlite3 "$in_ram_database" <<< "select num from scrutins where date like '% du ${periode_value#*:}' order by num asc" | tail -1)
458
+    test -z "$first" && echo "date de début inconnue: ${periode_value#*:}" >&2 && rm -f $in_ram_database && exit 1
459
+    test -z "$last" && echo "date de fin inconnue: ${periode_value%:*}" >&2 && rm -f $in_ram_database && exit 1
460
+else
461
+    test -z "$last" && last=$(wget -qO- 'http://www2.assemblee-nationale.fr/scrutins/liste/(legislature)/15/(type)/TOUS/(idDossier)/TOUS' \
462
+            | sed -rn 's,^.*<td class="denom">(.+)</td>.*$,\1,p' \
463
+            | head -1)
171 464
 
172
-        cat >> "$content" << EOcontent
173
-<table:table-cell office:value-type="float" office:value="${cible_votes[$typevote]}" calcext:value-type="float">
174
-<text:p>${cible_votes[$typevote]}</text:p>
175
-</table:table-cell>
176
-<table:table-cell office:value-type="float" office:value="${ref_votes[$typevote]}" calcext:value-type="float">
177
-<text:p>${ref_votes[$typevote]}</text:p>
178
-</table:table-cell>
179
-EOcontent
180
-    done
181
-    echo '</table:table-row>' >> "$content"
182
-done
183
-echo
184
-rm $tempfile
185
-
186
-cat >> "$content" << EOcontent
187
-</table:table>
188
-<table:named-expressions/>
189
-<table:database-ranges>
190
-<table:database-range table:name="__Anonymous_Sheet_DB__0" table:target-range-address="&apos;$result&apos;.A1:&apos;$result&apos;.$(printf "\\$(printf '%03o' $((64+$nb_cols)))")$(($last+1))" table:display-filter-buttons="true"/>
191
-</table:database-ranges>
192
-</office:spreadsheet>
193
-</office:body>
194
-</office:document-content>
195
-EOcontent
465
+    test -z "$first" && first=$(sqlite3 "$in_ram_database" <<< "select count(num) from scrutins")
466
+fi
196 467
 
197
-( cd "/dev/shm/$result" && zip -r ../"$result" * > /dev/null 2>&1 && cd .. && rm -fr "$result" )
468
+if test "$liste_dossiers" = $true_flag; then
469
+    sqlite3 "$in_ram_database" <<< "select printf('%s - %s', id, url) from url" | sed 's,https*://.*/dossiers/,,; s/_/ /g; s/.asp$//'
470
+    exit
471
+fi
472
+
473
+if test "$db_update_only" = $true_flag; then
474
+    unset first last
475
+    update_database
476
+    exit
477
+fi
478
+
479
+if test "$liste_deputes" = $true_flag; then
480
+    if test -n "$liste_deputes_value"; then
481
+        sqlite3 "$in_ram_database" <<< "select printf('%s - %s', nom, groupes.nom_court) from députés inner join groupes on groupes.id = députés.groupe where groupes.nom_court is '$liste_deputes_value'"
482
+    else
483
+        sqlite3 "$in_ram_database" <<< "select printf('%s - %s', nom, groupes.nom_court) from députés inner join groupes on groupes.id = députés.groupe order by groupes.nom_court asc"
484
+    fi
485
+    exit
486
+fi
198 487
 
199
-mv -f "/dev/shm/$result.zip" "$result.ods"
488
+if test "$depute" = $true_flag; then
489
+    if test -n "$nom"; then
490
+        match=$(sqlite3 "$in_ram_database" <<< "select count(députés.id) from députés inner join groupes on groupes.id = députés.groupe where députés.nom like '%$nom%' and groupes.nom_court is '$groupe' collate nocase")
491
+        if test $match -ne 1; then
492
+            if test $match -eq 0; then
493
+                echo "pas de député correspondant dans le groupe $groupe"
494
+            else 
495
+                echo "plusieurs députés correspondent:"
496
+                sqlite3 "$in_ram_database" <<< "select députés.nom from députés inner join groupes on groupes.id = députés.groupe where députés.nom like '%$nom%' and groupes.nom_court is '$groupe' collate nocase"
497
+            fi
498
+            exit 1
499
+        else
500
+            nom=$(sqlite3 "$in_ram_database" <<< "select députés.id,députés.nom from députés inner join groupes on groupes.id = députés.groupe where députés.nom like '%$nom%' and groupes.nom_court is '$groupe' collate nocase")
501
+            cible="${nom#*|} ($groupe)"                        
502
+        fi
503
+    fi
504
+fi
505
+
506
+if test "$dossier" = $true_flag; then
507
+    if test -z "$dossier_value"; then
508
+        IFS_=$IFS
509
+        IFS=$'\n'
510
+        select dossier in $(sqlite3 "$in_ram_database" <<< "select url from url" | sed 's,^.*/dossiers/,,; s/_/ /g; s/.asp$//'); do
511
+            if test -n "$dossier"; then
512
+                seq="sqlite3 \"$in_ram_database\" <<< \"select num from scrutins inner join url on url.id = scrutins.url where url.url like '%/dossiers/${dossier// /_}%' order by num asc\""
513
+                break
514
+            fi
515
+        done
516
+        IFS=$IFS_
517
+    else
518
+        seq="sqlite3 \"$in_ram_database\" <<< \"select num from scrutins inner join url on url.id = scrutins.url where url.id is $dossier_value order by num asc\""
519
+        dossier=$(sqlite3 "$in_ram_database" <<< "select url from url where id is $dossier_value" | sed 's,^.*/dossiers/,,; s/_/ /g; s/.asp$//')
520
+    fi
521
+fi
200 522
 
201
-echo "$result.ods"
523
+update_database
524
+write_comparaison
+10
analyse-votes-AN.conf
... ...
@@ -0,0 +1,10 @@
1
+groupes[LREM]='Groupe La République en Marche'
2
+groupes[LR]='Groupe Les Républicains'
3
+groupes[MODEM]='Groupe du Mouvement Démocrate et apparentés'
4
+groupes[PS]='Groupe Socialistes et apparentés'
5
+groupes[UDI]='Groupe UDI, Agir et Indépendants'
6
+groupes[NG]='Groupe Nouvelle Gauche'
7
+groupes[FI]='Groupe La France insoumise'
8
+groupes[GDR]='Groupe de la Gauche démocrate et républicaine'
9
+groupes[LT]='Groupe Libertés et Territoires'
10
+groupes[NI]='Non inscrits'