...
|
...
|
@@ -2,7 +2,7 @@
|
2
|
2
|
|
3
|
3
|
set -e
|
4
|
4
|
|
5
|
|
-for tool in sqlite3 getopt md5sum mktemp; do
|
|
5
|
+for tool in sqlite3 getopt mktemp w3m jq; do
|
6
|
6
|
which $tool > /dev/null 2>&1 || {
|
7
|
7
|
echo missing tool $tool
|
8
|
8
|
exit 1
|
...
|
...
|
@@ -12,129 +12,145 @@ done
|
12
|
12
|
IFS_=$IFS
|
13
|
13
|
|
14
|
14
|
function sqlite_request () {
|
15
|
|
- sqlite3 "$in_ram_database" <<< "$1"
|
|
15
|
+ sqlite3 ${2:+-cmd} ${2:+".mode $2"} "$in_ram_database" <<< "$1"
|
16
|
16
|
}
|
17
|
17
|
|
18
|
18
|
function create_database () {
|
19
|
|
- if test ${#groupes[@]} -eq 0; then
|
20
|
|
- echo "unable to find groupes in $config_file" >&2
|
21
|
|
- exit 1
|
22
|
|
- fi
|
23
|
|
-
|
24
|
|
- sqlite_request "create table if not exists votes (id integer primary key, nom text)"
|
25
|
|
- sqlite_request "create table if not exists url (id integer primary key autoincrement, url text)"
|
26
|
|
- sqlite_request "create table if not exists députés (id integer primary key autoincrement, nom text, groupe integer)"
|
27
|
|
- sqlite_request "create table if not exists groupes (id integer primary key autoincrement, nom text, nom_court text)"
|
28
|
|
- sqlite_request "create table if not exists scrutins (num integer primary key, date text not null, intitulé text non null, adoption boolean, url integer)"
|
29
|
|
- sqlite_request "create table if not exists dépouillement (député integer not null, scrutin integer not null, groupe integer not null, vote integer not null)"
|
30
|
|
- sqlite_request "create table if not exists dossiers (id text primary key, titre text, url text)"
|
|
19
|
+ sqlite_request "create table if not exists dossiers (id integer primary key, titre text, url text)"
|
|
20
|
+ sqlite_request "create table if not exists votes (id integer primary key, nom text)"
|
|
21
|
+ sqlite_request "create table if not exists députés (id integer primary key, nom text, groupe integer, date text)"
|
|
22
|
+ sqlite_request "create table if not exists groupes (id integer primary key, nom text unique, nom_court text)"
|
|
23
|
+ sqlite_request "create table if not exists scrutins (num integer primary key, séance text, date text not null, intitulé text non null, adoption boolean, dossier integer, mise_au_point text)"
|
|
24
|
+ sqlite_request "create table if not exists dépouillements (scrutin integer not null, député integer not null, vote integer not null)"
|
|
25
|
+ sqlite_request "create unique index if not exists 'index_députés' on députés (nom, groupe)"
|
|
26
|
+ sqlite_request "create unique index if not exists 'index_dossiers' on dossiers (titre, url)"
|
|
27
|
+ sqlite_request "create unique index if not exists 'index_dépouillements' on dépouillements (député, scrutin)"
|
31
|
28
|
|
32
|
|
- v_id=0
|
33
|
29
|
for v in Pour Contre Abstention Non-votant; do
|
34
|
|
- if test -z $(sqlite_request "select nom from votes where id is $v_id"); then
|
35
|
|
- sqlite_request "insert into votes values ($v_id, '$v')"
|
36
|
|
- else
|
37
|
|
- test -z $(sqlite_request "select nom from votes where id is $v_id and nom is '$v'") \
|
38
|
|
- && sqlite_request "update votes set nom = '$v' where id is $v_id)"
|
39
|
|
- fi
|
40
|
|
- let v_id++
|
|
30
|
+ sqlite_request "insert or ignore into votes (nom) values ('$v')"
|
41
|
31
|
done
|
42
|
|
- unset v_id v
|
43
|
|
-
|
44
|
|
- for g in ${!groupes[@]}; do
|
45
|
|
- test -z $(sqlite_request "select id from groupes where nom is '${groupes[$g]}' and nom_court is '$g'") \
|
46
|
|
- && sqlite_request "insert into groupes (nom, nom_court) values ('${groupes[$g]}', '$g')"
|
47
|
|
- done
|
48
|
|
- unset g groupes
|
49
|
|
-
|
50
|
|
- test -z $(sqlite_request "select id from url where id = 0") \
|
51
|
|
- && sqlite_request "insert into url values (0, '')"
|
52
|
32
|
}
|
53
|
33
|
|
54
|
34
|
function update_database () {
|
55
|
35
|
test "$no_db_update" = $true_flag && return
|
56
|
36
|
tempfile="/dev/shm/scrutin.$$"
|
57
|
37
|
progress=0
|
|
38
|
+ for r in "${!acronymes[@]}"; do
|
|
39
|
+ sqlite_request "update groupes set nom_court = \"${acronymes[$r]}\" where nom = \"$r\""
|
|
40
|
+ done
|
|
41
|
+ sqlite_request "create table if not exists dossier_par_scrutin (scrutin integer, url text)"
|
|
42
|
+ echo "récupération des dossiers"
|
|
43
|
+ wget -qO- "https://www.assemblee-nationale.fr/dyn/$mandature/dossiers" \
|
|
44
|
+ | sed -rn 's/<p class="m-0"><a title="Accéder au dossier législatif" href="([^"]+)">([^<]+)<.+$/\1 \2/p' \
|
|
45
|
+ | sed -r "s/^[[:space:]]*//; s/'/'/g" \
|
|
46
|
+ | awk -v dq='"' '{
|
|
47
|
+ printf("insert or ignore into dossiers (titre, url) values (%s, %s);\n", dq gensub($1 " ", "", "1", $0) dq, dq "https://www.assemblee-nationale.fr" $1 dq)
|
|
48
|
+ }' > $tempfile
|
|
49
|
+ sqlite3 "$in_ram_database" < $tempfile
|
58
|
50
|
first_=$first
|
59
|
|
- first=$(sqlite_request "select count(num) from scrutins")
|
|
51
|
+ first=$(sqlite_request "select max(num) from scrutins")
|
60
|
52
|
if test ${first:-0} -lt $last; then
|
61
|
53
|
echo "récupération des scrutins n°$((${first:-0}+1)) à n°$last dans "$database" (à conserver autant que possible)" >&2
|
62
|
54
|
|
63
|
|
- local url_database=/dev/shm/url_database
|
64
|
|
- : > "$url_database"
|
65
|
55
|
test $((last % 100)) -ne 0 && last_offset=0
|
66
|
56
|
IFS=$' \t\n'
|
67
|
57
|
for offset in $(seq $((last - 100)) -100 ${first:-0} ) $last_offset; do
|
68
|
58
|
wget -qO- "http://www2.assemblee-nationale.fr/scrutins/liste/(offset)/$offset/(legislature)/$mandature/(type)/TOUS/(idDossier)/TOUS" \
|
69
|
|
- | awk '
|
|
59
|
+ | awk -v dq='"' '
|
|
60
|
+ BEGIN {
|
|
61
|
+ }
|
70
|
62
|
/<td class="denom">/ {
|
71
|
|
- scrutin = gensub(/^.+denom.>([[:digit:]]+).*<.td./,"\\1","1",$0)
|
|
63
|
+ scrutin = gensub(/^.+denom.>([[:digit:]]+)\\*?<.td./,"\\1","1",$0)
|
72
|
64
|
}
|
73
|
|
- /<td class="desc">.+dossier<.a/ {
|
74
|
|
- a[scrutin] = gensub(/^.+.<a href="(.+)">dossier<.a>.*$/,"\\1","1",$0)
|
|
65
|
+ /<td class="desc">/ {
|
|
66
|
+ if (match($0, ">dossier<") > 0)
|
|
67
|
+ dossier[scrutin] = gensub(/^.+.<a href="([^"]+)">dossier<.a>.*$/,"\\1","1",$0)
|
75
|
68
|
}
|
76
|
69
|
END {
|
77
|
|
- for (i in a)
|
78
|
|
- print gensub("*","","1",i) "|" a[i]
|
79
|
|
- }' >> "$url_database"
|
|
70
|
+ for (i in dossier) {
|
|
71
|
+ printf("insert into dossier_par_scrutin (scrutin, url) values (%i, %s);\n", i, dq dossier[i] dq)
|
|
72
|
+ }
|
|
73
|
+ }' > $tempfile
|
|
74
|
+ sqlite3 "$in_ram_database" < $tempfile
|
80
|
75
|
done
|
81
|
|
- sort -u "$url_database" > "${url_database}.sorted"
|
82
|
|
- mv -f "${url_database}.sorted" "$url_database"
|
83
|
76
|
|
84
|
|
- IFS=$'\n'
|
|
77
|
+
|
|
78
|
+# IFS=$'\n'
|
85
|
79
|
begin=$(date +%s)
|
86
|
80
|
for scrutin in $(seq $((${first:-0}+1)) $last); do
|
87
|
|
- wget -qO- "http://www2.assemblee-nationale.fr/scrutins/detail/(legislature)/$mandature/(num)/$scrutin" \
|
88
|
|
- | sed -r '0,/< *div class="titre-bandeau-bleu +to-print" *>/d; /< *script +type="text\/javascript" *>/,$d' > $tempfile
|
89
|
|
-
|
90
|
|
- unset title date adoption url id_url
|
91
|
|
-
|
92
|
|
- title=$(sed -rn '/<h1 class="">Analyse du scrutin n° '$scrutin'/n; s,^.*<h3 class="president-title">(.+).</h3>,\1,p' $tempfile \
|
93
|
|
- | sed "s/;//g; s/[ \t][ \t]+/ /g; s/^Scrutin public sur *//; s/^l[ae']s* *//")
|
94
|
|
- eval $(sed -rn '
|
95
|
|
- s,^.*<h1 class="">Analyse du scrutin n° '$scrutin'<br/>(.+) </h1>,date="\1",p
|
96
|
|
- s,^.*<p class="annonce"><span class="annoncevote">(.+).</span></p>.*$,adoption="\1",p
|
97
|
|
- s,^.*<p .*class="repartitionvotes.*Pour l.adoption :.+>([0-9]+)<.*$,pour=\1,p
|
98
|
|
- s,^.*<p .*class="repartitionvotes.*Contre :.+>([0-9]+)<.*$,contre=\1,p' $tempfile)
|
99
|
|
- test -n "$title" -a -n "$date" || {
|
100
|
|
- echo "erreur dans la récupération du scrutin $scrutin"
|
101
|
|
- exit 1
|
102
|
|
- }
|
103
|
|
- if test -n "$adoption"; then
|
104
|
|
- grep -q 'e a a' <<< "$adoption" && adoption=1 || adoption=0
|
105
|
|
- else
|
106
|
|
- if test $pour -gt $contre; then
|
107
|
|
- adoption=1
|
108
|
|
- elif test $contre -gt $pour; then
|
109
|
|
- adoption=0
|
110
|
|
- fi
|
111
|
|
- fi
|
|
81
|
+ w3m -cols 512 -dump "http://www2.assemblee-nationale.fr/scrutins/detail/(legislature)/$mandature/(num)/$scrutin" \
|
|
82
|
+ | sed -n '/^Analyse du scrutin n° /,/^Votes des groupes/{/^Navigation/,/^ • Non inscrits/d;/^[[:space:]]*$/d;p}' \
|
|
83
|
+ | awk -v sq="'" -v dq='"' '
|
|
84
|
+ BEGIN { adoption = -1; map = 0 }
|
|
85
|
+ /^Analyse du scrutin/ { scrutin = $NF }
|
|
86
|
+ /séance du [0-3][0-9]\/[01][0-9]\/(19|20)[0-9]+/ { date = $NF; seance = $1 }
|
|
87
|
+ /^Scrutin public sur / { titre = gensub("^Scrutin public sur l[ae" sq "]s? ?", "", "1") }
|
|
88
|
+ /^L.Assemblée .+ adopté/ { adoption = NF == 3 }
|
|
89
|
+ /^Nombre de votants :/ { votants = $NF }
|
|
90
|
+ /^Nombre de suffrages exprimés :/ { exprimes = $NF }
|
|
91
|
+ /^Majorité absolue :/ { majo_absolue = $NF }
|
|
92
|
+ /^Pour l.adoption :/ { pour = $NF }
|
|
93
|
+ /^Contre :/ { contre = $NF }
|
|
94
|
+ /^Groupe / { groupe = gensub("^Groupe (.+) \\([1-9].+$", "\\1", "1")
|
|
95
|
+ groupe = gensub("^(la|les|le|l" sq "|du|des|de|de la|d" sq ") ", "", "1", groupe)
|
|
96
|
+ }
|
|
97
|
+ /^Non inscrits/ { groupe = "Non inscrits" }
|
|
98
|
+ /^(Pour|Abstention|Contre):/ { position = gensub(":", "", "1", $1) }
|
|
99
|
+ /^Non-votants?:/ {
|
|
100
|
+ position = gensub("s?:", "", "1", $1)
|
|
101
|
+ nvl = ""
|
|
102
|
+ while ($1 != "Groupe") {
|
|
103
|
+ getline
|
|
104
|
+ if ($1 == "Groupe")
|
|
105
|
+ break
|
|
106
|
+ nvl = nvl $0
|
|
107
|
+ }
|
|
108
|
+ f = split(nvl, nv, "(, | et )")
|
|
109
|
+ for (i=1; i<=f; i++) {
|
|
110
|
+ votes[groupe][position][gensub("(^ +|M\\. |Mme |Mlle | \\(.+)", "", "g", nv[i])]++
|
|
111
|
+ }
|
|
112
|
+ groupe = gensub("^Groupe (.+) \\([1-9].+$", "\\1", "1")
|
|
113
|
+ }
|
|
114
|
+ /^ • / { votes[groupe][position][gensub("^[[:punct:] ]*", "", "1")]++ }
|
|
115
|
+ /^Mises au point/,/^Votes des groupes/ { if ($1 != "(Sous") mises_au_point[map++] = $0 }
|
|
116
|
+ END {
|
|
117
|
+ if (adoption < 0)
|
|
118
|
+ adoption = pour >= majo_absolue
|
|
119
|
+
|
|
120
|
+ for (i=1; i<map-1; i++)
|
|
121
|
+ mise_au_point = sprintf("%s[%s]", mise_au_point, mises_au_point[i])
|
|
122
|
+
|
|
123
|
+ printf("insert into scrutins (num, séance, date, intitulé, adoption, mise_au_point) values (%i, %s, %s, %s, %i, %s);\n",
|
|
124
|
+ scrutin,
|
|
125
|
+ sq seance sq,
|
|
126
|
+ sq date sq,
|
|
127
|
+ dq gensub(dq, dq dq, "g", titre) dq,
|
|
128
|
+ adoption,
|
|
129
|
+ dq gensub(dq, dq dq, "g", mise_au_point) dq,
|
|
130
|
+ scrutin)
|
|
131
|
+ printf("update scrutins set dossier = ( select id from dossiers inner join dossier_par_scrutin where dossiers.url = dossier_par_scrutin.url and dossier_par_scrutin.scrutin = %i) where num = %i;\n",
|
|
132
|
+ scrutin,
|
|
133
|
+ scrutin)
|
|
134
|
+ for (groupe in votes) {
|
|
135
|
+ printf("insert or ignore into groupes (nom) values (%s);\n", dq groupe dq)
|
|
136
|
+ for (position in votes[groupe]) {
|
|
137
|
+ for (nom in votes[groupe][position]) {
|
|
138
|
+ if (nom !~ " \\(.+\\) *$")
|
|
139
|
+ printf("insert or ignore into députés (nom, groupe, date) select %s, id, %s from groupes where nom = %s;\n",
|
|
140
|
+ dq nom dq,
|
|
141
|
+ dq date dq,
|
|
142
|
+ dq groupe dq)
|
|
143
|
+ printf("insert or ignore into dépouillements (scrutin, député, vote) select %i, députés.id, votes.id from députés inner join votes where députés.nom = %s and votes.nom = %s;\n",
|
|
144
|
+ scrutin,
|
|
145
|
+ dq nom dq,
|
|
146
|
+ dq position dq)
|
|
147
|
+ }
|
|
148
|
+ }
|
|
149
|
+ }
|
|
150
|
+ }
|
|
151
|
+ ' > $tempfile
|
|
152
|
+ sqlite3 "$in_ram_database" < $tempfile
|
112
|
153
|
|
113
|
|
- url=$(awk -F'|' "/^$scrutin\|/{print \$2}" "$url_database")
|
114
|
|
- id_url=$(sqlite_request "select id from url where url is '$url'")
|
115
|
|
- if test -z "$id_url"; then
|
116
|
|
- sqlite_request "insert into url (url) values ('$url')"
|
117
|
|
- id_url=$(sqlite_request "select id from url where url is '$url'")
|
118
|
|
- fi
|
119
|
|
-
|
120
|
|
- sqlite_request "insert into scrutins values ($scrutin, '$date', \"${title//\"}\", $adoption, ${id_url:-0})"
|
121
|
|
- unset adoption pour contre
|
122
|
|
-
|
123
|
|
- for v in $(sqlite_request "select * from votes"); do
|
124
|
|
- for g in $(sqlite_request "select id,nom from groupes"); do
|
125
|
|
- for d in $(sed -rn '/<p class="nomgroupe">'${g#*|}' <span class="block topmargin">/,/<div class="TTgroupe topmargin-lg">/p' $tempfile \
|
126
|
|
- | sed -rn '/<p class="typevote">'${v#*|}':/,/<.div>/p' \
|
127
|
|
- | sed 's,</li>,\n,g' \
|
128
|
|
- | sed -rn '/<p class="typevote">/d; s,^\s*<li>\s*,,; s, , ,g; s/^\s*//; s/M(me|\.) //; s/ \(.*$//; s,<b>,,; s,</b>,,p'); do
|
129
|
|
- d_id=$(sqlite_request "select id from députés where nom is \"$d\" and groupe is ${g%|*}")
|
130
|
|
- if test -z "$d_id"; then
|
131
|
|
- sqlite_request "insert into députés (nom, groupe) values (\"$d\", ${g%|*})"
|
132
|
|
- d_id=$(sqlite_request "select id from députés where nom is \"$d\" and groupe is ${g%|*}")
|
133
|
|
- fi
|
134
|
|
- sqlite_request "insert into dépouillement values ($d_id, $scrutin, ${g%|*}, ${v%|*})"
|
135
|
|
- done
|
136
|
|
- done
|
137
|
|
- done
|
138
|
154
|
|
139
|
155
|
if test $(( ($scrutin - ${first:-0}) * 100 / ( $last - ${first:-0} ) )) -ne ${progress:-0}; then
|
140
|
156
|
progress=$(( ($scrutin - ${first:-0}) * 100 / ( $last - ${first:-0} ) ))
|
...
|
...
|
@@ -146,8 +162,10 @@ function update_database () {
|
146
|
162
|
fi
|
147
|
163
|
fi
|
148
|
164
|
done
|
|
165
|
+ sqlite_request 'drop table dossier_par_scrutin'
|
|
166
|
+
|
149
|
167
|
echo -e "\r\033[KTerminé: $(($scrutin - ${first:-0} - 1)) scrutins ajoutés"
|
150
|
|
- rm -f "$url_database" "$tempfile"
|
|
168
|
+ rm -f "$tempfile"
|
151
|
169
|
fi
|
152
|
170
|
first=$first_
|
153
|
171
|
}
|