.dev/scripts/lang_by_country/get_latest_lang_by_country.php

Summary

Maintainability
A
2 hrs
Test Coverage
#!/usr/bin/env php
<?php

require_once dirname(__DIR__) . '/scripts_utils.php';

// TODO

$url = $url ?: 'https://www.cia.gov/library/publications/the-world-factbook/fields/2098.html';
$result_file = $result_file ?: __DIR__ . '/countries.php';
$suffix = $suffix ?: '';

if (!function_exists('data_get_latest_lang_by_country')) {
    function data_get_latest_lang_by_country()
    {
        global $url, $result_file, $suffix;

        $f2 = __DIR__ . '/' . basename($url) . '.table' . $suffix . '.html';
        if (!file_exists($f2)) {
            $html1 = file_get_contents($url);
            $regex1 = '~<article class="description-box">(.+?)</article>~ims';
            preg_match($regex1, $html1, $m1);
            $html1 = $m1[1];
            $html1 = preg_replace('~<script[^>]*?>.*?</script>~ims', '', $html1);
            $html1 = preg_replace('~<style[^>]*?>.*?</style>~ims', '', $html1);
            $html1 = preg_replace('~<select[^>]*?>.*?</select>~ims', '', $html1);
            preg_match_all('~<table[^>]*id="[a-z]{2}"[^>]*>.+?</table>~ims', $html1, $m1_2);
            // Strip empty lines inside matches
            $html1 = implode(PHP_EOL . PHP_EOL, preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $m1_2[0]));
            // Strip multiple whitespaces/tabs
            $html1 = preg_replace('~[ \t]{2,}~im', ' ', $html1);
            file_put_contents($f2, $html1);
        }
        $html2 = file_get_contents($f2);

        preg_match_all('~<table[^>]*id="(?P<code>[a-z]{2})"[^>]*>.*?<td[^>]*class="category_data"[^>]*>(?P<data>.+?)</td>.*?</table>~ims', $html2, $m2);
        $data_tmp = [];
        foreach ((array) $m2[0] as $k => $tmp) {
            $data_tmp[$m2['code'][$k]] = trim(strip_tags($m2['data'][$k]));
        }
        $data = [];
        foreach ((array) $data_tmp as $code => $text) {
            preg_match_all('~([a-z]+)\s*\(official[^\)]*\)~ims', $text, $m3);
            if ($m3[1]) {
                $data[$code] = $m3[1];
            }
        }
        print_r($data);
        //    $tmp_tbl = html_table_to_array($html2);
        //print_r($tmp_tbl);
        /*
    $data = array();
    foreach ($tmp_tbl as $v) {
        $id = $v[1];
        if (!$id) {
            continue;
        }
        $data[$id] = array(
            'code'    => $id,
            'code3' => $v[2],
            'num'    => $v[3],
            'name'    => $v[0],
            'cont'    => '',
            'active'=> 0,
        );
    }
    foreach (array('UA','RU','US','DE','FR','ES','GB') as $c) {
        $data[$c]['active'] = 1;
    }

    $f4 = $result_file;
    file_put_contents($f4, '<?'.'php'.PHP_EOL.'$data = '.var_export($data, 1).';');
    print_r($data);
*/
    }
}

data_get_latest_lang_by_country();