Discussion:Orlando di Lasso (ChoralWiki)/List of choral works
De Wicri Musique
Scripts utilisés
- Récupération de la page en html
curl www2.cpdl.org/wiki/index.php/Orlando_di_Lasso > CpdlListOfWorks
- Génération du premier paragraphe:
SxmlUnIndent < CpdlListOfWorks \
| SxmlSelect -g html/body/div@id=content/div@id=bodyContent/div@id=mw-content-text/div/table -p @g1 \
| SxmlSelect -s table/tbody/tr/td/ul/li/a -p @s1 \
| sed -e 's/\/wiki\/index.php\//cpdl:/' \
| SxmlSelect -g a/1 -g a/attribute::title -g a/attribute::href -p "<cpdl><n>@g1</n><t>@g2</t><h>@g3</h></cpdl>" \
| sed -e 's/(Orlando di Lasso)\<\/t\>/(Roland de Lassus)\<\/t\>/' \
| SxmlSelect -g cpdl/n/1 -g cpdl/t/1 -g cpdl/h/1 -p '*[[@g2|@g1]] ([[@g3|CPDL]])'
- Génération du deuxième paragraphe:
(
echo "<li>"
SxmlUnIndent < CpdlListOfWorks \
| SxmlSelect -s html/body/div@id=content/div@id=bodyContent/div@id=mw-content-text/div/table -p @s1
echo "</li>"
) | SxmlUnIndent \
| SgmlSelect -g li/2 -p @g1 \
| SxmlSelect -s table/tbody/tr/td/ul/li/a -p @s1 \
| sed -e 's/\/wiki\/index.php\//cpdl:/' \
| SxmlSelect -g a/1 -g a/attribute::title -g a/attribute::href -p "<cpdl><n>@g1</n><t>@g2</t><h>@g3</h></cpdl>" \
| sed -e 's/(Orlando di Lasso)\<\/t\>/(Roland de Lassus)\<\/t\>/' \
| SxmlSelect -g cpdl/n/1 -g cpdl/t/1 -g cpdl/h/1 -p '*[[@g2|@g1]] ([[@g3|CPDL]])'
- Création d'un fichier intermédiaire
(
echo "<li>"
SxmlUnIndent < CpdlListOfWorks \
| SxmlSelect -s html/body/div@id=content/div@id=bodyContent/div@id=mw-content-text/div/table -p @s1
echo "</li>"
) | SxmlUnIndent > CpdlListOfWorks2
- Recupération des paragraphes (à partir du troisième)
Il suffit de changer « li/3 » par le bon indice.
SgmlSelect -g li/3 -p @g1 < CpdlListOfWorks2 \
| SxmlSelect -s table/tbody/tr/td/ul/li/a -p @s1 \
| sed -e 's/\/wiki\/index.php\//cpdl:/' \
| SxmlSelect -g a/1 -g a/attribute::title -g a/attribute::href -p "<cpdl><n>@g1</n><t>@g2</t><h>@g3</h></cpdl>" \
| sed -e 's/(Orlando di Lasso)\<\/t\>/(Roland de Lassus)\<\/t\>/' \
| SxmlSelect -g cpdl/n/1 -g cpdl/t/1 -g cpdl/h/1 -p '*[[@g2|@g1]] ([[@g3|CPDL]])'
- Détection des doublons
cat CpdlListOfWorks2 \
| SxmlSelect -s li/table/tbody/tr/td/ul/li/a/1 -p @s1 -p @s1 \
| sort | IndexBuildRec | grep -v ">1<"