#!/bin/bash
for tool in sqlite3 getopt md5sum mktemp; do
which $tool > /dev/null 2>&1 || {
echo missing tool $tool
exit 1
}
done
function create_database () {
declare -A groupes
if test -z "${groupes[@]}"; then
echo "unable to find groupes in $config_file" >&2
exit 1
fi
sqlite3 "$in_ram_database" <<< "create table if not exists votes (id integer primary key, nom text)"
sqlite3 "$in_ram_database" <<< "create table if not exists url (id integer primary key autoincrement, url text)"
sqlite3 "$in_ram_database" <<< "create table if not exists députés (id integer primary key autoincrement, nom text, groupe integer)"
sqlite3 "$in_ram_database" <<< "create table if not exists groupes (id integer primary key autoincrement, nom text, nom_court text)"
sqlite3 "$in_ram_database" <<< "create table if not exists scrutins (num integer primary key, date text not null, intitulé text non null, adoption boolean, url integer)"
sqlite3 "$in_ram_database" <<< "create table if not exists dépouillement (député integer not null, scrutin integer not null, groupe integer not null, vote integer not null)"
v_id=0
for v in Pour Contre Abstention Non-votant; do
if test -z $(sqlite3 "$in_ram_database" <<< "select nom from votes where id is $v_id"); then
sqlite3 "$in_ram_database" <<< "insert into votes values ($v_id, '$v')"
else
test -z $(sqlite3 "$in_ram_database" <<< "select nom from votes where id is $v_id and nom is '$v'") \
&& sqlite3 "$in_ram_database" <<< "update votes set nom = '$v' where id is $v_id)"
fi
let v_id++
done
unset v_id v
for g in ${!groupes[@]}; do
test -z $(sqlite3 "$in_ram_database" <<< "select id from groupes where nom is '${groupes[$g]}' and nom_court is '$g'") \
&& sqlite3 "$in_ram_database" <<< "insert into groupes (nom, nom_court) values ('${groupes[$g]}', '$g')"
done
unset g groupes
test -z $(sqlite3 "$in_ram_database" <<< "select id from url where id = 0") \
&& sqlite3 "$in_ram_database" <<< "insert into url values (0, '')"
}
function update_database () {
test "$no_db_update" = $true_flag && return
tempfile="/dev/shm/scrutin.$$"
progress=0
if test ${first:-0} -lt $last; then
echo "récupération des scrutins n°$((${first:-0}+1)) à n°$last dans "$database" (à conserver autant que possible)" >&2
url_database=/dev/shm/url_database
: > "$url_database"
test $((last % 100)) -ne 0 && last_offset=0
for offset in $(seq $((last - 100)) -100 ${first:-0} ) $last_offset; do
wget -qO- "http://www2.assemblee-nationale.fr/scrutins/liste/(offset)/$offset/(legislature)/15/(type)/TOUS/(idDossier)/TOUS" \
| awk '
/<td class="denom">/ {
scrutin = gensub(/^.+denom.>([[:digit:]]+).*<.td./,"\\1","1",$0)
}
/<td class="desc">.+dossier<.a/ {
a[scrutin] = gensub(/^.+.<a href="(.+)">dossier<.a>.*$/,"\\1","1",$0)
}
END {
for (i in a)
print gensub("*","","1",i) "|" a[i]
}' >> "$url_database"
done
sort -u "$url_database" > "${url_database}.sorted"
mv -f "${url_database}.sorted" "$url_database"
IFS=$'\n'
begin=$(date +%s)
for scrutin in $(seq $((${first:-0}+1)) $last); do
wget -qO- "http://www2.assemblee-nationale.fr/scrutins/detail/(legislature)/15/(num)/$scrutin" \
| sed -r '0,/< *div class="titre-bandeau-bleu +to-print" *>/d; /< *script +type="text\/javascript" *>/,$d' > $tempfile
unset title date adoption url id_url
title=$(sed -rn '/<h1 class="">Analyse du scrutin n° '$scrutin'/n; s,^.*<h3 class="president-title">(.+).</h3>,\1,p' $tempfile \
| sed "s/;//g; s/[ \t][ \t]+/ /g; s/^Scrutin public sur *//; s/^l[ae']s* *//")
date=$(sed -rn 's,^.*<h1 class="">Analyse du scrutin n° '$scrutin'<br/>(.+) </h1>,\1,p' $tempfile)
adoption=$(sed -rn 's,^.*<p class="annonce"><span class="annoncevote">(.+).</span></p>.*$,\1,p' $tempfile)
test -n "$title" -a -n "$date" -a -n "$adoption" || {
echo "erreur dans la récupération du scrutin $scrutin"
exit 1
}
grep -q 'e a a' <<< "$adoption" && adoption=1 || adoption=0
url=$(awk -F'|' "/^$scrutin\|/{print \$2}" "$url_database")
id_url=$(sqlite3 "$in_ram_database" <<< "select id from url where url is '$url'")
if test -z "$id_url"; then
sqlite3 "$in_ram_database" <<< "insert into url (url) values ('$url')"
id_url=$(sqlite3 "$in_ram_database" <<< "select id from url where url is '$url'")
fi
sqlite3 "$in_ram_database" <<< "insert into scrutins values ($scrutin, '$date', \"${title//\"}\", $adoption, ${id_url:-0})"
for v in $(sqlite3 "$in_ram_database" <<< "select * from votes"); do
for g in $(sqlite3 "$in_ram_database" <<< "select id,nom from groupes"); do
for d in $(sed -rn '/<p class="nomgroupe">'${g#*|}' <span class="block topmargin">/,/<div class="TTgroupe topmargin-lg">/p' $tempfile \
| sed -rn '/<p class="typevote">'${v#*|}':/,/<.div>/p' \
| sed 's,</li>,\n,g' \
| sed -rn '/<p class="typevote">/d; s,^\s*<li>\s*,,; s, , ,g; s/^\s*//; s/M(me|\.) //; s/ \(.*$//; s,<b>,,; s,</b>,,p'); do
d_id=$(sqlite3 "$in_ram_database" <<< "select id from députés where nom is \"$d\" and groupe is ${g%|*}")
if test -z "$d_id"; then
sqlite3 "$in_ram_database" <<< "insert into députés (nom, groupe) values (\"$d\", ${g%|*})"
d_id=$(sqlite3 "$in_ram_database" <<< "select id from députés where nom is \"$d\" and groupe is ${g%|*}")
fi
sqlite3 "$in_ram_database" <<< "insert into dépouillement values ($d_id, $scrutin, ${g%|*}, ${v%|*})"
done
done
done
if test $((($scrutin*100)/$last)) -ne $progress; then
progress=$((($scrutin*100)/$last))
if test $(($progress % ${update_progress:-1})) -eq 0; then
now=$(date +%s)
delta=$(( $now - $begin ))
echo $progress%, ETA: $(date +%H:%M:%S -d "$(($delta * ($last - $scrutin) / $scrutin)) seconds")
fi
fi
done
rm -f "$url_database" "$tempfile"
fi
}
function write_comparaison () {
result="comparaisons $cible avec $groupe_ref${dossier:+ - ${dossier}}"
content="/dev/shm/$result/content.xml"
id_cols="Scrutin Date Titre Adoption"
typevotes=$(sqlite3 "$in_ram_database" <<< "select nom from votes")
nb_cols=$(wc -w <<< "$id_cols $typevotes $typevotes")
echo "génération du fichier $result"
mkdir -p "/dev/shm/$result/META-INF"
cat > "/dev/shm/$result/META-INF/manifest.xml" << EOmetainf
<?xml version="1.0" encoding="UTF-8"?>
<manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" manifest:version="1.2">
<manifest:file-entry manifest:full-path="/" manifest:version="1.2" manifest:media-type="application/vnd.oasis.opendocument.spreadsheet"/>
<manifest:file-entry manifest:full-path="content.xml" manifest:media-type="text/xml"/>
</manifest:manifest>
EOmetainf
printf 'application/vnd.oasis.opendocument.spreadsheet' > "/dev/shm/$result/mimetype"
echo '<?xml version="1.0" encoding="UTF-8"?>' > "$content"
cat >> "$content" << EOcontent
<office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:rpt="http://openoffice.org/2005/report" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:calcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:css3t="http://www.w3.org/TR/css3-text/" office:version="1.2">
<office:scripts/>
<office:font-face-decls>
<style:font-face style:name="Liberation Sans" svg:font-family="'Liberation Sans'" style:font-family-generic="swiss" style:font-pitch="variable"/>
<style:font-face style:name="DejaVu Sans" svg:font-family="'DejaVu Sans'" style:font-family-generic="system" style:font-pitch="variable"/>
<style:font-face style:name="FreeSans" svg:font-family="FreeSans" style:font-family-generic="system" style:font-pitch="variable"/>
</office:font-face-decls>
<office:automatic-styles>
EOcontent
for i in $(seq $nb_cols); do
cat >> "$content" << EOcontent
<style:style style:name="co$i" style:family="table-column">
<style:table-column-properties fo:break-before="auto" style:column-width="30.00mm"/>
</style:style>
EOcontent
done
cat >> "$content" << EOcontent
<style:style style:name="ro1" style:family="table-row">
<style:table-row-properties style:row-height="4.52mm" fo:break-before="auto" style:use-optimal-row-height="true"/>
</style:style>
<style:style style:name="ta1" style:family="table" style:master-page-name="Default">
<style:table-properties table:display="true" style:writing-mode="lr-tb"/>
</style:style>
<style:style style:name="ce1" style:family="table-cell" style:parent-style-name="Default">
<style:table-cell-properties fo:background-color="#cccccc"/>
</style:style>
</office:automatic-styles>
<office:body>
<office:spreadsheet>
<table:calculation-settings table:automatic-find-labels="false"/>
<table:table table:name="$result" table:style-name="ta1">
<office:forms form:automatic-focus="false" form:apply-design-mode="false"/>
<table:table-column table:style-name="co1" table:number-columns-repeated="$(wc -w <<< $id_cols)" table:default-cell-style-name="Default"/>
EOcontent
for i in $(seq $(wc -w <<< $typevotes)); do
cat >> "$content" << EOcontent
<table:table-column table:style-name="co1" table:default-cell-style-name="ce1"/>
<table:table-column table:style-name="co1" table:default-cell-style-name="Default"/>
EOcontent
done
echo '<table:table-row table:style-name="ro1">' >> "$content"
for colonne in $id_cols; do
cat >> "$content" << EOcontent
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>$colonne</text:p>
</table:table-cell>
EOcontent
done
for typevote in $typevotes; do
for g in "$cible" $groupe_ref; do
cat >> "$content" << EOcontent
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>$typevote - $g</text:p>
</table:table-cell>
EOcontent
done
done
echo '</table:table-row>' >> "$content"
progress=0
begin=$(date +%s)
line=1
for scrutin in $(eval ${seq:-seq $first $last}); do
data=$(sqlite3 "$in_ram_database" <<< "select date,intitulé,adoption,url.url from scrutins inner join url on scrutins.url = url.id where num is $scrutin")
date=$(cut -d'|' -sf 1 <<< $data)
title=$(cut -d'|' -sf 2 <<< $data)
adoption=$(cut -d'|' -sf 3 <<< $data)
url=$(cut -d'|' -sf 4 <<< $data)
test $adoption -eq 1 && adoption='oui' || adoption='non'
cat >> "$content" << EOcontent
<table:table-row table:style-name="ro1">
EOcontent
if test -n "$url"; then
cat >> "$content" << EOcontent
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p><text:a xlink:href="$url" xlink:type="simple">$scrutin</text:a></text:p>
</table:table-cell>
EOcontent
else
cat >> "$content" << EOcontent
<table:table-cell office:value-type="float" office:value="$scrutin" calcext:value-type="float">
<text:p>$scrutin</text:p>
</table:table-cell>
EOcontent
fi
cat >> "$content" << EOcontent
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>$date</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>${title//\'/'}</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>${adoption}</text:p>
</table:table-cell>
EOcontent
for typevote in 0 1 2 3; do
cible_votes=$(sqlite3 "$in_ram_database" <<< "select
count(député)
from
dépouillement
where
scrutin is $scrutin
and
vote is $typevote
and
groupe is $groupe_id ${nom:+ and député is ${nom%|*}}")
ref_votes=$(sqlite3 "$in_ram_database" <<< "select
count(député)
from
dépouillement
where
scrutin is $scrutin
and
vote is $typevote
and
groupe is $groupe_ref_id")
cat >> "$content" << EOcontent
<table:table-cell office:value-type="float" office:value="$cible_votes" calcext:value-type="float">
<text:p>$cible_votes</text:p>
</table:table-cell>
<table:table-cell office:value-type="float" office:value="$ref_votes" calcext:value-type="float">
<text:p>$ref_votes</text:p>
</table:table-cell>
EOcontent
done
echo '</table:table-row>' >> "$content"
let line++
if test $((($scrutin*100)/$last)) -ne $progress; then
progress=$((($scrutin*100)/$last))
if test $(($progress % ${generation_progress:-5})) -eq 0; then
now=$(date +%s)
delta=$(( $now - $begin ))
echo $progress%, ETA: $(date +%H:%M:%S -d "$(($delta * ($last - $scrutin) / $scrutin)) seconds")
fi
fi
done
echo
cat >> "$content" << EOcontent
</table:table>
<table:named-expressions/>
<table:database-ranges>
<table:database-range table:name="__Anonymous_Sheet_DB__0" table:target-range-address="'$result'.D1:'$result'.$(printf "\\$(printf '%03o' $((64+$nb_cols)))")$line" table:display-filter-buttons="true"/>
</table:database-ranges>
</office:spreadsheet>
</office:body>
</office:document-content>
EOcontent
( cd "/dev/shm/$result" && zip -r ../"$result" * > /dev/null 2>&1 && cd .. && rm -fr "$result" )
mv -f "/dev/shm/$result.zip" "$result.ods"
echo "$result.ods"
}
function save_database () {
if test -r "$database" && md5sum $in_ram_database | sed "s,$in_ram_database,$database," | md5sum --status -c -; then
rm -f $in_ram_database
elif test -w "$database"; then
mv -f $in_ram_database "$database"
elif ! test -e "$database"; then
mv $in_ram_database "$database"
else
rm -f $in_ram_database
fi
}
trap save_database EXIT
true_flag=$(mktemp --dry-run XXXXX)
OPTS=$( getopt -l no-db-update,\
db-update-only,\
cible:,\
ref:,\
député:,\
premier-scrutin:,\
dernier-scrutin:,\
période:,\
liste-dossiers,\
liste-députés,\
dossiers,\
dossier:,\
conf:,\
database:,\
progrès-génération:\
progrès-update: \
-- "$@" )
eval set --$OPTS
while [[ $# -gt 0 ]]; do
case "$1" in
"--no-db-update")
no_db_update=$true_flag;;
"--db-update-only")
db_update_only=$true_flag;;
"--cible")
groupe="${2^^}"
shift;;
"--ref")
groupes_ref="${2^^}"
shift;;
"--député")
depute=$true_flag
nom="$2"
shift;;
"--premier-scrutin")
no_db_update=$true_flag
first="$2"
shift;;
"--dernier-scrutin")
no_db_update=$true_flag
last="$2"
shift;;
"--période")
periode=$true_flag
no_db_update=$true_flag
periode_value="$2"
shift;;
"--liste-députés-du-groupe")
liste_deputes=$true_flag
liste_deputes_value="${2^^}"
shift;;
"--liste-députés")
liste_deputes=$true_flag;;
"--liste-dossiers")
liste_dossiers=$true_flag;;
"--dossier")
dossier=$true_flag
dossier_value="$2"
shift;;
"--dossiers")
dossier=$true_flag;;
"--conf")
test -r "$2" || {
echo "config introuvable $2" >&2
options_error=$true_flag
}
config_file="$2"
shift;;
"--database")
test -r "$2" && file -b "$2" | grep -q '^SQLite 3.x database' || {
echo "erreur sur option database: fichier '$2' introuvable ou pas une base SQLite 3" >&2
options_error=$true_flag
}
database="$2"
shift;;
"--progrès-génération")
generation_progress="$2"
shift;;
"--progrès-update")
update_progress="$2"
shift;;
esac
shift
done
test "$options_error" = $true_flag && exit 1
test -z "$database" && database="${0}.db"
if test -n "$config_file"; then
source "$config_file"
else
config_file="${0}.conf"
if test -r "$config_file"; then
source "$config_file"
fi
fi
groupe=${groupe:-LREM}
groupe_ref=${groupe_ref:-GDR}
cible=$groupe
in_ram_database=$(mktemp --dry-run /dev/shm/XXXXXXXXXXXX)
if test -r "$database"; then
cp "$database" "$in_ram_database"
else
create_database
fi
for g in groupe groupe_ref; do
eval "${g}_id=$(sqlite3 "$in_ram_database" <<< "select id from groupes where nom_court is '${!g}'")"
if eval "test -z \$${g}_id"; then
echo "groupe ${!g} inconnu" >&2
exit 1
fi
done
if test "$periode" = $true_flag; then
first=$(sqlite3 "$in_ram_database" <<< "select num from scrutins where date like '% du ${periode_value%:*}' order by num asc" | head -1)
last=$(sqlite3 "$in_ram_database" <<< "select num from scrutins where date like '% du ${periode_value#*:}' order by num asc" | tail -1)
test -z "$first" && echo "date de début inconnue: ${periode_value#*:}" >&2 && rm -f $in_ram_database && exit 1
test -z "$last" && echo "date de fin inconnue: ${periode_value%:*}" >&2 && rm -f $in_ram_database && exit 1
else
test -z "$last" && last=$(wget -qO- 'http://www2.assemblee-nationale.fr/scrutins/liste/(legislature)/15/(type)/TOUS/(idDossier)/TOUS' \
| sed -rn 's,^.*<td class="denom">(.+)</td>.*$,\1,p' \
| head -1)
test -z "$first" && first=$(sqlite3 "$in_ram_database" <<< "select count(num) from scrutins")
fi
if test "$liste_dossiers" = $true_flag; then
sqlite3 "$in_ram_database" <<< "select printf('%s - %s', id, url) from url" | sed 's,https*://.*/dossiers/,,; s/_/ /g; s/.asp$//'
exit
fi
if test "$db_update_only" = $true_flag; then
unset first last
update_database
exit
fi
if test "$liste_deputes" = $true_flag; then
if test -n "$liste_deputes_value"; then
sqlite3 "$in_ram_database" <<< "select printf('%s - %s', nom, groupes.nom_court) from députés inner join groupes on groupes.id = députés.groupe where groupes.nom_court is '$liste_deputes_value'"
else
sqlite3 "$in_ram_database" <<< "select printf('%s - %s', nom, groupes.nom_court) from députés inner join groupes on groupes.id = députés.groupe order by groupes.nom_court asc"
fi
exit
fi
if test "$depute" = $true_flag; then
if test -n "$nom"; then
match=$(sqlite3 "$in_ram_database" <<< "select count(députés.id) from députés inner join groupes on groupes.id = députés.groupe where députés.nom like '%$nom%' and groupes.nom_court is '$groupe' collate nocase")
if test $match -ne 1; then
if test $match -eq 0; then
echo "pas de député correspondant dans le groupe $groupe"
else
echo "plusieurs députés correspondent:"
sqlite3 "$in_ram_database" <<< "select députés.nom from députés inner join groupes on groupes.id = députés.groupe where députés.nom like '%$nom%' and groupes.nom_court is '$groupe' collate nocase"
fi
exit 1
else
nom=$(sqlite3 "$in_ram_database" <<< "select députés.id,députés.nom from députés inner join groupes on groupes.id = députés.groupe where députés.nom like '%$nom%' and groupes.nom_court is '$groupe' collate nocase")
cible="${nom#*|} ($groupe)"
fi
fi
fi
if test "$dossier" = $true_flag; then
if test -z "$dossier_value"; then
IFS_=$IFS
IFS=$'\n'
select dossier in $(sqlite3 "$in_ram_database" <<< "select url from url" | sed 's,^.*/dossiers/,,; s/_/ /g; s/.asp$//'); do
if test -n "$dossier"; then
seq="sqlite3 \"$in_ram_database\" <<< \"select num from scrutins inner join url on url.id = scrutins.url where url.url like '%/dossiers/${dossier// /_}%' order by num asc\""
break
fi
done
IFS=$IFS_
else
seq="sqlite3 \"$in_ram_database\" <<< \"select num from scrutins inner join url on url.id = scrutins.url where url.id is $dossier_value order by num asc\""
dossier=$(sqlite3 "$in_ram_database" <<< "select url from url where id is $dossier_value" | sed 's,^.*/dossiers/,,; s/_/ /g; s/.asp$//')
fi
fi
update_database
write_comparaison