<?php

/*
https://bipadix.ub.edu/cuina/rcub/export.php

estructura rcub
<issue>
	<id type="internal" advice="ignore">2658</id>
	<articles>
		<article current_publication_id="34345" >
			<id type="internal" advice="ignore">37146</id>
			<publication>
				<id type="internal" advice="ignore">34345</id>
				<article_galley>
					<id type="internal" advice="ignore">35965</id>
				</article_galley>
			</publication>
		</article>
	</articles>
</issue>

https://revistes.ub.edu/index.php/<path>/article/view/<article-id|url_path>/<article_galley-id>
*
*
a partir de rcub <article> + <publication>
- elimino
+ conservo
<article
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	date_submitted="2021-10-23"
-	status="1"
-	submission_progress="0"
*	current_publication_id="34344"
+	stage="production"
>
<publication
-	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	locale="es_ES"
-	version="1"
-	status="1"
*	url_path=""
+	seq="0"
+	date_published="2021-11-02"
+	section_ref="Aper"
+	access_status="0"
-	xsi:schemaLocation="http://pkp.sfu.ca native.xsd"
>
--> raco <article>
a	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
p	locale="es_ES"
a	date_submitted="2021-10-23"
a	stage="production"
p	date_published="2021-11-02"
p	section_ref="Aper"
p	seq="2"
p	access_status="0"

* */

ini_set('display_errors', '1');
ini_set('display_startup_errors', '1');
error_reporting(E_ALL);

$dbname = '****';
$username = '****';
$password = '****';
$host = '****';
$charset = 'utf8mb4';
$db = new PDO("mysql:host=$host;dbname=$dbname;charset=$charset", $username, $password);
$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$db->setAttribute(PDO::ATTR_EMULATE_PREPARES, false);

//quins usuaris tenen permis. mail o identificador
$user = auten('dni o mails autoritzats');

download();
cape();
form();
delete();
upload();

/*
transform('native-20220422-104238-issues-101.xml');
transform('native-20220422-095047-issues-15.xml');
transform_all();
transform('abriu.native-20220215-130646-issues-23.xml');
transform('native-20220216-145948-issues-47.xml');
transform('native-20220216-143034-issues-49.xml');
transform('native-20220310-093649-issues-89.xml');
*/

llista('originals RCUB','rcub');
llista('RACO','raco');

function transform($f='') {
	if (!$f) return;
	if (!file_exists($f)) $f = "/dades/www_bipadix/cuina/rcub/xml.rcub/$f";
	if (!file_exists($f)) return;

	$x = file_get_contents($f);

	loga("transform $f",$f);
	loga(strlen($x));

	//preservo els <embed> de cover
	$x = rep('~(<cover\s.+<embed)~mUs','$1x',$x);

	//elimino tots els blocks <embed> ... </embed>
	$x = rep('~<embed\b.+</embed>~','',$x);

	//recupero <embed> de cover
	$x = rep('~<embedx\b~','<embed',$x);

	//elimino tots els blocks <submission_file> ... </submission_file>
	$x = rep('~<submission_file\b.*</submission_file>~Usm','',$x);

	//elimino tots els blocks <submission_file> ... </submission_file>
	$x = rep('~<supplementary_file\b.*</supplementary_file>~Usm','',$x);

	//elimino tots els blocks <submission_file_ref .../>
	$x = rep('~<submission_file_ref\b.*/>~','',$x);

	//elimino tots els blocks <article > ... </article> que NO tenen l'atribut status = 3
	$x = rep('~<article\b[^>]* status="[^3].*</article>~Usm','',$x);

	//elimino d'<issue> l'attribute url_path="..."
	$x = rep('~(<issue\b.*) url_path=".*"~U','$1',$x);


	//elimino d'<author> l'attributes seq id
	$x = rep('~(<author\b.*) seq=".*"~U','$1',$x);
	$x = rep('~(<author\b.*) id=".*"~U','$1',$x);
	$x = rep('~<givenname\b~U','<firstname',$x);
	$x = rep('~</givenname>~U','</firstname>',$x);
	$x = rep('~<familyname\b~U','<lastname',$x);
	$x = rep('~</familyname>~U','</lastname>',$x);
	$x = rep('~(<firstname\b.*) locale=".*"~U','$1',$x);
	$x = rep('~(<lastname\b.*) locale=".*"~U','$1',$x);


	//elimino <citations> ... </citations>
	$x = rep('~<citations\b.*</citations>~Usm','',$x);


	//elimino <article> l'attributes status | submission_progress
	$x = rep('~(<article\b.*) status=".*"~U','$1',$x);
	$x = rep('~(<article\b.*) submission_progress=".*"~U','$1',$x);

	//elimino <article_galley> attributes locale
	$x = rep('~(<article_galley\b.*) locale=".*"~U','$1',$x);

	//elimino <publication> l'attribute xmlns:xsi | version | status | xsi:schemaLocation | primary_contact_id
	$x = rep('~(<publication\b.*) primary_contact_id=".*"~U','$1',$x);
	$x = rep('~(<publication\b.*) xsi:schemaLocation=".*"~U','$1',$x);
	$x = rep('~(<publication\b.*) xmlns:xsi=".*"~U','$1',$x);
	$x = rep('~(<publication\b.*) version=".*"~U','$1',$x);
	$x = rep('~(<publication\b.*) status=".*"~U','$1',$x);

	//busco el primer issue_id
	preg_match_all('~<issue\b.*<id\b.*>(.+)</id>~Usm',$x,$mm);
	if (empty($mm[1][0])) {
		$issue_id = $path = $num = '';
	} else {
		$issue_id = $mm[1][0];
		//busco journal.path issue.volume issue.number issue.year a partir de l'issue_id
		$kk = sql("select j.path,concat_ws('-',i.volume,i.number,i.year) num from issues i join journals j on i.journal_id=j.journal_id where i.issue_id=?",[$issue_id]);
		$path = $kk[0]['path'];
		$num = $kk[0]['num'];
	}

	//******************************************
	//$cp = tots els <article current_publication_id
	//això permet eliminar totes els <publication> que no són current
	preg_match_all('~<article\b.* current_publication_id="(\d+)"~U',$x,$aa);
	$cp = $aa[1];
	loga($cp);

	//$pp = tots els publication - id
	preg_match_all('~<publication\b.*<id\b.*>(.+)</id>~Usm',$x,$aa);
	$pp = $aa[1];
	loga($pp);

	//$ncp = quins $pp no són $cp
	$ncp = array_diff($pp,$cp);
	loga($ncp);

	//elimino tots els blocks $ncp <publication > ... </publication> - els que no són current
	if ($ncp) foreach ($ncp as $n)
		$x = rep("~<publication [^>]*>\s+<id\b[^>]*>$n</id>.*</publication>~Usm",'',$x);

	//ara cada <article> té un <publication>
	//******************************************


	//url_path serveix per construir url de l'article
	//$up = tots els <publication url_path
	preg_match_all('~<publication\b.* url_path="(.*)"~U',$x,$aa);
	$up = $aa[1];
	loga($up);

	//elimino <publication> l'attribute url_path
	$x = rep('~(<publication\b.*) url_path=".*"~U','$1',$x);


	//*********************************************
	//ajuntem atributs <article> i <pubication>
	//$ap els atributs de cada <publication>
	preg_match_all('~<publication\b(.+)>~Usm',$x,$aa);
	$ap = $aa[1];
	loga($ap);

	//replace <article> attribute current_publication_id="..."
	//--> hi posem els atributs de <publication>
	foreach ($cp as $i=>$c) $x = str_replace(" current_publication_id=\"$c\"",$ap[$i],$x);

	//elimino línies <publication> i </publication>
	$x = rep('~<publication\b.*>~','',$x);
	$x = rep('~</publication>~','',$x);

	//cada <article ha de tenir attribute date_published
	//(si no el hi poso issue - date_published)
	//si no el tenen duplico date_submitted de l'article
	preg_match_all('~<article\b.+>~U',$x,$aa);
	$aa = $aa[0];
	foreach ($aa as $a) {
		if (strpos($a,'date_published')) continue;
		$a9 = preg_replace('~date_submitted="(.+)"~U','date_submitted="$1" date_published="$1"',$a);
		$x = str_replace($a,$a9,$x);
	}

	//******************************************


	//************************************************
	//issue_galleys
	//no sé convertir issue_galleys. no funciona com article_galley. no sé ni si són legals a raco
	//elimino tots els <issue_galley>
	$x = rep('~<issue_galley\b.*</issue_galley>~Usm','',$x);

	/*
	//elimino tots els blocs <issue_file>
	$x = rep('~<issue_file\b.*</issue_file>~Usm','',$x);

	//de quin issue és cada issue_galley
	//busco tots els <issue> id i <issue_galley> id
	//$ig[issue_galley-id] = issue-id
	preg_match_all('~<(issue|issue_galley)\b.*<id\stype="internal".*>(.+)</id>~Usm',$x,$aa);
	$nn = $aa[1];
	$ii = $aa[2];
	$ig = [];
	foreach ($nn as $i=>$node) {
		if ($node == 'issue') {
			$ida = $ii[$i];
		} else {
			$ig[$ii[$i]] = $ida;
		}
	}
	loga($ig);

	//afegim a cada issue_galley
	//<remote src="https://revistes.ub.edu/index.php/<path>/issue/view/<issue-id>/<issue_galley-id>"/>
	foreach ($ig as $g=>$i) {
		$remote = "  <remote src=\"https://revistes.ub.edu/index.php/$path/issue/view/$i/$g\"/>\n    ";
		$x = rep("~(<issue_galley\b.+<id\s+type=\"internal\"\s+advice=\"ignore\">$g</id>.+)</issue_galley>~mUs","$1$remote</issue_galley>",$x);
	}
	* */
	//**************************************************


	//**********************************
	//de quin article és cada galley
	//busco tots els <article> id i <article_galley> id
	//$gg[is] = article-id
	preg_match_all('~<(article|article_galley)\b.*<id\stype="internal".*>(.+)</id>~Usm',$x,$aa);
	$nn = $aa[1];
	$ii = $aa[2];
	$gg = [];
	foreach ($nn as $i=>$node) {
		if ($node == 'article') {
			//per construir l'url https://revistes.ub.edu/index.php/<path>/article/view/<article-id|url_path>/<article_galley-id>
			$ida = empty($up[$i]) ? $ii[$i] : $up[$i];
		} else {
			$gg[$ii[$i]] = $ida;
		}
	}
	loga($gg);

	//afegim <remote> a cada galley
	// busco
	//	<article_galley xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" approved="false" xsi:schemaLocation="http://pkp.sfu.ca native.xsd">
	//	<id type="internal" advice="ignore">37569</id>
	//afegeixo una línia a continuació $remote
	//	<remote src="https://revistes.ub.edu/index.php/<path>/article/view/<article-id|url_path>/<article_galley-id>"/>
	$linia0 = "<article_galley xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" approved=\"false\" xsi:schemaLocation=\"http://pkp.sfu.ca native.xsd\">";
	foreach ($gg as $gi=>$ai) {
	//	$remote = "  <remote src=\"https://revistes.ub.edu/index.php/$path/article/view/$ai/$gi\"/>\n        ";
		$remote = "  <remote src=\"https://revistes.ub.edu/index.php/$path/article/view/$ai\"/>\n        ";
		$x = rep("~($linia0\s+<id\s+type=\"internal\"\s+advice=\"ignore\">$gi</id>.+)</article_galley>~mUs","$1$remote</article_galley>",$x);
	}
	//***********************************************


	//<name locale="es_ES">PDF</name> --> <name locale="es_ES"></name>
	$x = rep('~<name locale="(\w+)">\w+</name>~U','<name locale="$1">Accés</name>',$x);


	//elimino línies en blanc
	//elimino línies duplicades <firstname></firstname i <lastname></lastname>
	$xx = explode(PHP_EOL,$x);
	$xx = array_map('rtrim',$xx);
	$xx = array_filter($xx);
	$xx = array_values($xx);
	foreach ($xx as $i=>$l) if ($i) if ($xx[$i] == $xx[$i-1]) if (preg_match('~^\s*<(first|last)name>.*</(first|last)name>$~uU',$l)) unset($xx[$i-1]);
	$x = implode(PHP_EOL,$xx);


	//nou fitxer amb format d'importació raco
//	$f9 = array_merge(['raco',$path],$issue_id,[basename($f)]);
	$f9 = ['raco',$path,$num,basename($f)];
	$f9 = implode('-',$f9);
	$f9 = dirname(__FILE__) . "/xml.raco/$f9";
	loga($f9);

	//creo el nou fitxer
	file_put_contents($f9,$x);

//	loga($x);
}

function rep($find,$replace,$x) {
	$x = preg_replace($find,$replace,$x,-1,$q);
	loga([$find,$q,strlen($x)]);
	return $x;
}

function transform_all() {
	$ff = glob(dirname(__FILE__) . '/xml.rcub/*');
	if (!$ff) return;
	foreach ($ff as $f) transform($f);
}

function llista($tit,$dir) {
	$ff = glob(dirname(__FILE__) . "/xml.$dir/*");
	if (!$ff) return;
	date_default_timezone_set('CET');
//	rsort($ff);

	echo "<h3>XML $tit</h3>\n";

	echo "<table>\n";
	foreach ($ff as $f) {
		$size = number_format(filesize($f),0,',','.');
		$name = basename($f);
		$time = date('Y-m-d H:i',filemtime($f));
		$view = "<a target=_blank href='xml.$dir/$name'>view</a>";
		$download = "<a target=_blank href='?d=$dir/$name'>download</a>";
		$delete = "<a href='?x=$dir/$name' onclick=\"return confirm('esborro $dir - $name ?')\">esborra</a>";
		$id = substr($name,strpos($name,'native-'));

		echo "<tr name='$id' onmouseover=\"marca('$id');\" onmouseout=\"desmarca('$id');\">
			<td>$delete</td><td>$download</td><td align=right>$size </td><td>$time</td><td><b>$name</b></td>
			</tr>\n";
	}
	echo "</table>\n";
}

function cape() {
	echo "<!DOCTYPE html>
	<head>
	<meta charset='utf-8'>
	<style>
	* {font-family: monospace,monospace;}
	.avis {background-color: yellow; padding:22px; margin:22px;}
	.gris {background-color: #eee; padding:11px 22px;}
	.grix {background-color: #ddd;}
	table {border-collapse: collapse;}
	td {padding: 2px 11px;}
/*	tr:hover td {background:#ddd;} */
	h3 {margin-bottom: 2px;}
	input[type=file] {background-color: white;width: 100%;}
	</style>
	<script>
	function marca(k) {
		var ii = document.getElementsByName(k);
		for(var i = 0; i < ii.length; i++) ii[i].classList.add('grix');
	}
	function desmarca(k) {
		var ii = document.getElementsByName(k);
		for(var i = 0; i < ii.length; i++) ii[i].classList.remove('grix');
	}
	</script>
	</head>
	<body>
	<h1>Conversió de fitxers \"Native XML Plugin\" de RCUB per importar a RACO</h1>\n";
}

function upload() {
	if (empty($_FILES['f'])) return;
	extract($_FILES['f']); // name type tmp_name error size
	$f9 = dirname(__FILE__) . "/xml.rcub/$name";

	loga('Upload',$f9);
	loga($_FILES['f']);
//	loga(stat($tmp_name));
//	loga(date('Y-m-d',$stat['mtime']));

	if (file_exists($f9)) {
		avis("Error: <b>$name</b> ja existeix");
	} elseif ($type != 'text/xml') {
		avis("Error: <b>$name</b> no és un XML");
	} elseif (move_uploaded_file($tmp_name, $f9)) {
		avis("Fitxer pujat correctament: <b>$name</b>");
		transform($f9);
	} else {
		avis("Error pujant el fitxer: <b>$name</b>");
	}
}

function form() {
	echo "<form class=gris action=export.php id=upload method=post enctype='multipart/form-data'>
	Puja un fitxer \"Native XML Plugin\" de RCUB
	<input type=file name=f accept='text/xml' />
	<input type=submit>
	</form>\n";
}

function delete() {
	if (empty($_GET['x'])) return;
	$f = dirname(__FILE__) . '/xml.' . $_GET['x'];
	if (!file_exists($f)) return;
	if (realpath($f) != $f) return;

	$dir = pathinfo(dirname($f),PATHINFO_EXTENSION);
	$name = basename($f);

	if (unlink($f)) {
		avis("Fitxer esborrat: <b>$dir - $name</b>");
		loga("esborra $f",$f);
	}
}

function download() {
	if (empty($_GET['d'])) return;
	$f = dirname(__FILE__) . '/xml.' . $_GET['d'];
	if (!file_exists($f)) return;

    header('Content-Description: File Transfer');
    header('Content-Type: application/octet-stream');
    header('Content-Disposition: attachment; filename="'.basename($f).'"');
    header('Expires: 0');
    header('Cache-Control: must-revalidate');
    header('Pragma: public');
    header('Content-Length: ' . filesize($f));
    readfile($f);

	loga("download $f",$f);
	die;
}

function avis($x) {
	echo "<div class=avis>$x</div>\n";
}

/*
[cn] => Gestio Documental
[colect2] => FPE
[uid] => ****
[mail] => ****@ub.edu
* */
function auten($x='') {
	require_once '/usr/local/UB/CAS/ub_idp/config.php';
	require_once $phpcas_path . '/CAS.php';
	phpCAS::client(CAS_VERSION_2_0, $cas_host, $cas_port, $cas_context);
	phpCAS::setCasServerCACert($cas_server_ca_cert_path);
	phpCAS::forceAuthentication();

	if (!$x) return;
	$xx = preg_split('~\s+~',$x);
	if (in_array(phpCAS::getUser(),$xx)) return phpCAS::getAttributes();
	if (in_array(phpCAS::getAttribute('mail'),$xx)) return phpCAS::getAttributes();

	pre(phpCAS::getAttributes());
	die;
}

function pre($a) {
	$trace = debug_backtrace();
	$filename = $trace[0]['file'];
	$linenumber = $trace[0]['line'];
	$lines = file($filename);
	$line = trim($lines[$linenumber-1]);

	$a = print_r($a,true);
	$a = htmlentities($a);

	echo "<pre style='background-color:#ff9'>$filename	$linenumber	$line\n$a</pre>\n";
}

function loga($x,$l='log') {
	$c = '# '; //primer char de línia
	static $f;

	if (!$f) {
		$l = basename($l,'.xml');
		$f = __DIR__ . "/log/$l" . date('__Y-m-d_H-i-s') . '.txt';
		if (file_exists($f)) {
			$b = $f . date('__Y-m-d_H-i-s',filemtime($f)) . '.txt';
			rename($f,$b);
		}
		file_put_contents ($f,$c . date('Y-m-d H:i:s') . "\n" . print_r($GLOBALS['user'],true) . "\n");
	}

	$trace = debug_backtrace();
	$filename = $trace[0]['file'];
	$linenumber = $trace[0]['line'];
	$lines = file($filename);
	$line = trim($lines[$linenumber-1]);
	$hora = date('H:i.s');
	$x = print_r($x,true);

	file_put_contents($f,"\n$c$hora\n$c$filename\n$c$linenumber\n$c$line\n$x\n",FILE_APPEND);
}

function sql($sql,$params=array()) {
	global $db;

//	pre($sql);
//	pre($params);

	$st = $db->prepare($sql);
	$st->execute($params);

	if (!$st) return false;

	switch (strtolower(strtok($sql,' '))) {
		case 'select':
		case 'show': return $st->fetchAll(PDO::FETCH_ASSOC);
		case 'insert': return $db->lastInsertId();
		default: return $st->rowCount();
	}
}


/*
 *
journal - issue - publications - submissions - section - galley

select
j.path,
i.issue_id,i.published,i.access_status,
p.publication_id,p.access_status,p.status,p.url_path,p.version,
s.submission_id,s.status,s.stage_id,
ss.setting_value section,
g.galley_id,g.locale,g.label,g.seq
from journals j
join issues i on j.journal_id=i.journal_id
join publication_settings ps on i.issue_id=ps.setting_value and ps.setting_name='issueId'
join publications p on ps.publication_id=p.publication_id
join submissions s on p.submission_id=s.submission_id
join section_settings ss on p.section_id=ss.section_id and ss.setting_name='abbrev' and ss.setting_value>'' and p.locale=ss.locale
join publication_galleys g on p.publication_id=g.publication_id
where p.publication_id=1714
order by j.path,i.issue_id,p.seq;

select s.*, po.seq from submissions s
left join publications po on s.current_publication_id = po.publication_id
left join publications issue_p on issue_p.submission_id = s.submission_id
left join publication_settings issue_ps on issue_p.publication_id = issue_ps.publication_id
where s.context_id = 12 and s.status in (3) and (issue_ps.setting_name = 'issueId' and issue_ps.setting_value in ('2658'))
group by s.submission_id, po.seq order by po.seq asc;

select `s`.* from `submissions` as `s`
left join `publications` as `issue_p` on `issue_p`.`submission_id` = `s`.`submission_id`
left join `publication_settings` as `issue_ps` on `issue_p`.`publication_id` = `issue_ps`.`publication_id`
where `s`.`context_id` = '12' and `s`.`status` in (3) and `issue_ps`.`setting_name` = 'issueId' and `issue_ps`.`setting_value` in ('2658')
group by `s`.`submission_id` order by `s`.`date_submitted` desc;

 * */
