Index de l'article

Foreach sur des id définis

<?php
include_once('simple_html_dom.php');
 
// Connexion MySQL
$bdd = new PDO('mysql:host=localhost; dbname=webscraping; charset=utf8', 'root', 'geo_local');
 
// -------------
class MyClass
{
public $var1 = '51051799';
public $var2 = '54830967';
public $var4 = '54851231';
public $var5 = '54233927';
public $var6 = '51387124';
public $var7 = '54238347';
public $var8 = '53957864';
public $var9 = '50887120';
}
$class = new MyClass();
// -------------
 
// Boucle gérant les pages html
foreach($class as $key => $value)
{
// Préfixe de l'URL
$content = 'https://www.xxx.org/profile/' ;
 
// URL incrémentée
$content_ = str_get_html($content.$value) ;
 
// Création d'un contexte avant l'appel de la page afin de pouvoir ignorer les erreurs d'import
$context = stream_context_create(array(
'http' => array('ignore_errors' => true),
));
 
// Appel de la page
$html = @file_get_html($content_, false, $context) ;
 
// Appel des contenus
foreach($html->find('#content_element_0_main_column_0_ctl09_Name') as $name) ;
foreach($html->find('#content_element_0_main_column_0_ctl09_Email') as $email) ;
foreach($html->find('#content_element_0_main_column_0_ctl09_WebAddress') as $website) ;
foreach($html->find('#content_element_0_main_column_0_ctl09_AssistantWrap') as $assistant) ;
foreach($html->find('#content_element_0_main_column_0_ctl09_AssistantEmail') as $email_assistant) ;
foreach($html->find('#content_element_0_main_column_0_ctl09_Specialty') as $speciality) ;
foreach($html->find('div.acsCol5.acsCol12Mobile') as $phone) ;
 
// Corrections de champs
$name_ = $name->plaintext ;
$name_ = str_replace("\n"," ",$name_); $name_ = str_replace("\r"," ",$name_); $name_ = str_replace("\t"," ",$name_);
$name_ = str_replace('"',' ',$name_); $name_ = str_replace(';',' ',$name_); $name_ = str_replace(' ',' ',$name_);
$name_ = trim($name_); $name_ = mb_convert_case($name_, MB_CASE_TITLE) ;
 
$email_ = $email->plaintext ;
$email_ = str_replace("\n"," ",$email_); $email_ = str_replace("\r"," ",$email_); $email_ = str_replace("\t"," ",$email_);
$email_ = trim($email_); $email_ = strtolower($email_);
 
$website_ = $website->plaintext ;
$website_ = str_replace("\n"," ",$website_); $website_ = str_replace("\r"," ",$website_); $website_ = str_replace("\t"," ",$website_);
$website_ = str_replace('"',' ',$website_); $website_ = str_replace(';',' ',$website_); $website_ = str_replace(' ',' ',$website_);
$website_ = trim($website_); $website_ = strtolower($website_);
 
$email_assistant_ = $email_assistant->plaintext ;
$email_assistant_ = str_replace("\n"," ",$email_assistant_); $email_assistant_ = str_replace("\r"," ",$email_assistant_); $email_assistant_ = str_replace("\t"," ",$email_assistant_);
$email_assistant_ = trim($email_assistant_); $email_assistant_ = strtolower($email_assistant_);
 
$speciality_ = $speciality->plaintext ;
$speciality_ = str_replace("\n"," ",$speciality_); $speciality_ = str_replace("\r"," ",$speciality_); $speciality_ = str_replace("\t"," ",$speciality_);
$speciality_ = trim($speciality_); $speciality_ = mb_convert_case($speciality_, MB_CASE_TITLE) ;
 
$assistant_ = $assistant->plaintext ;
$assistant_ = str_replace("\n"," ",$assistant_); $assistant_ = str_replace("\r"," ",$assistant_); $assistant_ = str_replace("\t"," ",$assistant_);
$assistant_ = str_replace('"',' ',$assistant_); $assistant_ = str_replace(';',' ',$assistant_); $assistant_ = str_replace(' ',' ',$assistant_);
$assistant_ = trim($assistant_); $assistant_ = mb_convert_case($assistant_, MB_CASE_TITLE) ;
 
$phone_ = $phone->plaintext ;
$phone_ = str_replace("\n"," ",$phone_); $phone_ = str_replace("\r"," ",$phone_); $phone_ = str_replace("\t"," ",$phone_);
$phone_ = str_replace('"',' ',$phone_); $phone_ = str_replace(';',' ',$phone_); $phone_ = str_replace(' ',' ',$phone_);
$phone_ = trim($phone_); $phone_ = strtolower($phone_);
 
//var_dump($email_);
//exit;
 
// Préparation de la requête
$req = $bdd->prepare("INSERT IGNORE INTO facs (name, email, website, speciality, phone, email_assistant, assistant)
VALUES (:name, :email, :website, :speciality, :phone, :email_assistant, :assistant)");
 
// Exécution de la requête
$req->execute(array(
// Variables contenues
"name" => $name_,
"email" => $email_,
"website" => $website_,
"speciality" => $speciality_,
"phone" => $phone_,
"email_assistant" => $email_assistant_,
"assistant" => $assistant_,
)
);
 
// Fin de la boucle gérant les pages html
}
?>